@juspay/neurolink 8.2.0 → 8.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +13 -3
  3. package/dist/adapters/providerImageAdapter.d.ts +1 -1
  4. package/dist/adapters/providerImageAdapter.js +62 -0
  5. package/dist/agent/directTools.d.ts +0 -72
  6. package/dist/agent/directTools.js +3 -74
  7. package/dist/cli/commands/config.d.ts +18 -18
  8. package/dist/cli/factories/commandFactory.js +1 -0
  9. package/dist/cli/loop/conversationSelector.js +4 -0
  10. package/dist/cli/loop/session.js +27 -15
  11. package/dist/constants/enums.d.ts +1 -0
  12. package/dist/constants/enums.js +3 -1
  13. package/dist/constants/tokens.d.ts +3 -0
  14. package/dist/constants/tokens.js +3 -0
  15. package/dist/core/baseProvider.d.ts +56 -53
  16. package/dist/core/baseProvider.js +107 -1095
  17. package/dist/core/constants.d.ts +3 -0
  18. package/dist/core/constants.js +6 -3
  19. package/dist/core/modelConfiguration.js +10 -0
  20. package/dist/core/modules/GenerationHandler.d.ts +63 -0
  21. package/dist/core/modules/GenerationHandler.js +230 -0
  22. package/dist/core/modules/MessageBuilder.d.ts +39 -0
  23. package/dist/core/modules/MessageBuilder.js +179 -0
  24. package/dist/core/modules/StreamHandler.d.ts +52 -0
  25. package/dist/core/modules/StreamHandler.js +103 -0
  26. package/dist/core/modules/TelemetryHandler.d.ts +64 -0
  27. package/dist/core/modules/TelemetryHandler.js +170 -0
  28. package/dist/core/modules/ToolsManager.d.ts +98 -0
  29. package/dist/core/modules/ToolsManager.js +521 -0
  30. package/dist/core/modules/Utilities.d.ts +88 -0
  31. package/dist/core/modules/Utilities.js +329 -0
  32. package/dist/factories/providerRegistry.js +1 -1
  33. package/dist/lib/adapters/providerImageAdapter.d.ts +1 -1
  34. package/dist/lib/adapters/providerImageAdapter.js +62 -0
  35. package/dist/lib/agent/directTools.d.ts +0 -72
  36. package/dist/lib/agent/directTools.js +3 -74
  37. package/dist/lib/constants/enums.d.ts +1 -0
  38. package/dist/lib/constants/enums.js +3 -1
  39. package/dist/lib/constants/tokens.d.ts +3 -0
  40. package/dist/lib/constants/tokens.js +3 -0
  41. package/dist/lib/core/baseProvider.d.ts +56 -53
  42. package/dist/lib/core/baseProvider.js +107 -1095
  43. package/dist/lib/core/constants.d.ts +3 -0
  44. package/dist/lib/core/constants.js +6 -3
  45. package/dist/lib/core/modelConfiguration.js +10 -0
  46. package/dist/lib/core/modules/GenerationHandler.d.ts +63 -0
  47. package/dist/lib/core/modules/GenerationHandler.js +231 -0
  48. package/dist/lib/core/modules/MessageBuilder.d.ts +39 -0
  49. package/dist/lib/core/modules/MessageBuilder.js +180 -0
  50. package/dist/lib/core/modules/StreamHandler.d.ts +52 -0
  51. package/dist/lib/core/modules/StreamHandler.js +104 -0
  52. package/dist/lib/core/modules/TelemetryHandler.d.ts +64 -0
  53. package/dist/lib/core/modules/TelemetryHandler.js +171 -0
  54. package/dist/lib/core/modules/ToolsManager.d.ts +98 -0
  55. package/dist/lib/core/modules/ToolsManager.js +522 -0
  56. package/dist/lib/core/modules/Utilities.d.ts +88 -0
  57. package/dist/lib/core/modules/Utilities.js +330 -0
  58. package/dist/lib/factories/providerRegistry.js +1 -1
  59. package/dist/lib/mcp/servers/agent/directToolsServer.js +0 -1
  60. package/dist/lib/models/modelRegistry.js +44 -0
  61. package/dist/lib/neurolink.js +35 -3
  62. package/dist/lib/providers/amazonBedrock.js +59 -10
  63. package/dist/lib/providers/anthropic.js +2 -30
  64. package/dist/lib/providers/azureOpenai.js +2 -24
  65. package/dist/lib/providers/googleAiStudio.js +2 -24
  66. package/dist/lib/providers/googleVertex.js +2 -45
  67. package/dist/lib/providers/huggingFace.js +3 -31
  68. package/dist/lib/providers/litellm.d.ts +1 -1
  69. package/dist/lib/providers/litellm.js +110 -44
  70. package/dist/lib/providers/mistral.js +5 -32
  71. package/dist/lib/providers/ollama.d.ts +1 -0
  72. package/dist/lib/providers/ollama.js +476 -129
  73. package/dist/lib/providers/openAI.js +2 -28
  74. package/dist/lib/providers/openaiCompatible.js +3 -31
  75. package/dist/lib/types/content.d.ts +16 -113
  76. package/dist/lib/types/content.js +16 -2
  77. package/dist/lib/types/conversation.d.ts +3 -17
  78. package/dist/lib/types/generateTypes.d.ts +2 -2
  79. package/dist/lib/types/index.d.ts +2 -0
  80. package/dist/lib/types/index.js +2 -0
  81. package/dist/lib/types/multimodal.d.ts +282 -0
  82. package/dist/lib/types/multimodal.js +101 -0
  83. package/dist/lib/types/streamTypes.d.ts +2 -2
  84. package/dist/lib/utils/imageProcessor.d.ts +1 -1
  85. package/dist/lib/utils/messageBuilder.js +25 -2
  86. package/dist/lib/utils/multimodalOptionsBuilder.d.ts +1 -1
  87. package/dist/lib/utils/pdfProcessor.d.ts +9 -0
  88. package/dist/lib/utils/pdfProcessor.js +67 -9
  89. package/dist/mcp/servers/agent/directToolsServer.js +0 -1
  90. package/dist/models/modelRegistry.js +44 -0
  91. package/dist/neurolink.js +35 -3
  92. package/dist/providers/amazonBedrock.js +59 -10
  93. package/dist/providers/anthropic.js +2 -30
  94. package/dist/providers/azureOpenai.js +2 -24
  95. package/dist/providers/googleAiStudio.js +2 -24
  96. package/dist/providers/googleVertex.js +2 -45
  97. package/dist/providers/huggingFace.js +3 -31
  98. package/dist/providers/litellm.d.ts +1 -1
  99. package/dist/providers/litellm.js +110 -44
  100. package/dist/providers/mistral.js +5 -32
  101. package/dist/providers/ollama.d.ts +1 -0
  102. package/dist/providers/ollama.js +476 -129
  103. package/dist/providers/openAI.js +2 -28
  104. package/dist/providers/openaiCompatible.js +3 -31
  105. package/dist/types/content.d.ts +16 -113
  106. package/dist/types/content.js +16 -2
  107. package/dist/types/conversation.d.ts +3 -17
  108. package/dist/types/generateTypes.d.ts +2 -2
  109. package/dist/types/index.d.ts +2 -0
  110. package/dist/types/index.js +2 -0
  111. package/dist/types/multimodal.d.ts +282 -0
  112. package/dist/types/multimodal.js +100 -0
  113. package/dist/types/streamTypes.d.ts +2 -2
  114. package/dist/utils/imageProcessor.d.ts +1 -1
  115. package/dist/utils/messageBuilder.js +25 -2
  116. package/dist/utils/multimodalOptionsBuilder.d.ts +1 -1
  117. package/dist/utils/pdfProcessor.d.ts +9 -0
  118. package/dist/utils/pdfProcessor.js +67 -9
  119. package/package.json +5 -2
@@ -8,8 +8,6 @@ import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, Ra
8
8
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { validateApiKey, createOpenAIConfig, getProviderModel, } from "../utils/providerConfig.js";
10
10
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
11
- import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
12
- import { buildMultimodalOptions } from "../utils/multimodalOptionsBuilder.js";
13
11
  import { createProxyFetch } from "../proxy/proxyFetch.js";
14
12
  import { isZodSchema } from "../utils/schemaConversion.js";
15
13
  // Configuration helpers - now using consolidated utility
@@ -246,32 +244,8 @@ export class OpenAIProvider extends BaseProvider {
246
244
  filteredOutTools: Object.keys(allTools).filter((name) => !tools[name]),
247
245
  });
248
246
  // Build message array from options with multimodal support
249
- const hasMultimodalInput = !!(options.input?.images?.length ||
250
- options.input?.content?.length ||
251
- options.input?.files?.length ||
252
- options.input?.csvFiles?.length ||
253
- options.input?.pdfFiles?.length);
254
- let messages;
255
- if (hasMultimodalInput) {
256
- logger.debug(`OpenAI: Detected multimodal input, using multimodal message builder`, {
257
- hasImages: !!options.input?.images?.length,
258
- imageCount: options.input?.images?.length || 0,
259
- hasContent: !!options.input?.content?.length,
260
- contentCount: options.input?.content?.length || 0,
261
- hasFiles: !!options.input?.files?.length,
262
- fileCount: options.input?.files?.length || 0,
263
- hasCSVFiles: !!options.input?.csvFiles?.length,
264
- csvFileCount: options.input?.csvFiles?.length || 0,
265
- });
266
- const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName);
267
- const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
268
- // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
269
- messages = convertToCoreMessages(mm);
270
- }
271
- else {
272
- logger.debug(`OpenAI: Text-only input, using standard message builder`);
273
- messages = await buildMessagesArray(options);
274
- }
247
+ // Using protected helper from BaseProvider to eliminate code duplication
248
+ const messages = await this.buildMessagesForStream(options);
275
249
  // Debug the actual request being sent to OpenAI
276
250
  logger.debug(`OpenAI: streamText request parameters:`, {
277
251
  modelName: this.modelName,
@@ -7,8 +7,6 @@ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
7
7
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
8
8
  import { createProxyFetch } from "../proxy/proxyFetch.js";
9
9
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
10
- import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
11
- import { buildMultimodalOptions } from "../utils/multimodalOptionsBuilder.js";
12
10
  // Constants
13
11
  const FALLBACK_OPENAI_COMPATIBLE_MODEL = "gpt-3.5-turbo";
14
12
  // Configuration helpers
@@ -161,35 +159,9 @@ export class OpenAICompatibleProvider extends BaseProvider {
161
159
  const timeout = this.getTimeout(options);
162
160
  const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
163
161
  try {
164
- // Check for multimodal input (images, PDFs, CSVs, files)
165
- const hasMultimodalInput = !!(options.input?.images?.length ||
166
- options.input?.content?.length ||
167
- options.input?.files?.length ||
168
- options.input?.csvFiles?.length ||
169
- options.input?.pdfFiles?.length);
170
- let messages;
171
- if (hasMultimodalInput) {
172
- logger.debug(`OpenAI Compatible: Detected multimodal input, using multimodal message builder`, {
173
- hasImages: !!options.input?.images?.length,
174
- imageCount: options.input?.images?.length || 0,
175
- hasContent: !!options.input?.content?.length,
176
- contentCount: options.input?.content?.length || 0,
177
- hasFiles: !!options.input?.files?.length,
178
- fileCount: options.input?.files?.length || 0,
179
- hasCSVFiles: !!options.input?.csvFiles?.length,
180
- csvFileCount: options.input?.csvFiles?.length || 0,
181
- hasPDFFiles: !!options.input?.pdfFiles?.length,
182
- pdfFileCount: options.input?.pdfFiles?.length || 0,
183
- });
184
- const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName);
185
- const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
186
- // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
187
- messages = convertToCoreMessages(mm);
188
- }
189
- else {
190
- logger.debug(`OpenAI Compatible: Text-only input, using standard message builder`);
191
- messages = await buildMessagesArray(options);
192
- }
162
+ // Build message array from options with multimodal support
163
+ // Using protected helper from BaseProvider to eliminate code duplication
164
+ const messages = await this.buildMessagesForStream(options);
193
165
  const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
194
166
  const result = streamText({
195
167
  model,
@@ -1,115 +1,18 @@
1
1
  /**
2
2
  * Content type definitions for multimodal support
3
- * Supports text and image content with provider-specific formatting
4
- */
5
- /**
6
- * Text content type for multimodal messages
7
- */
8
- export type TextContent = {
9
- type: "text";
10
- text: string;
11
- };
12
- /**
13
- * Image content type for multimodal messages
14
- */
15
- export type ImageContent = {
16
- type: "image";
17
- data: Buffer | string;
18
- mediaType?: "image/jpeg" | "image/png" | "image/gif" | "image/webp" | "image/bmp" | "image/tiff";
19
- metadata?: {
20
- description?: string;
21
- quality?: "low" | "high" | "auto";
22
- dimensions?: {
23
- width: number;
24
- height: number;
25
- };
26
- filename?: string;
27
- };
28
- };
29
- /**
30
- * CSV content type for multimodal messages
31
- */
32
- export type CSVContent = {
33
- type: "csv";
34
- data: Buffer | string;
35
- metadata?: {
36
- filename?: string;
37
- maxRows?: number;
38
- formatStyle?: "raw" | "markdown" | "json";
39
- description?: string;
40
- };
41
- };
42
- /**
43
- * PDF document content type for multimodal messages
44
- */
45
- export type PDFContent = {
46
- type: "pdf";
47
- data: Buffer | string;
48
- metadata?: {
49
- filename?: string;
50
- pages?: number;
51
- version?: string;
52
- description?: string;
53
- };
54
- };
55
- /**
56
- * Union type for all content types
57
- */
58
- export type Content = TextContent | ImageContent | CSVContent | PDFContent;
59
- /**
60
- * Vision capability information for providers
61
- */
62
- export type VisionCapability = {
63
- provider: string;
64
- supportedModels: string[];
65
- maxImageSize?: number;
66
- supportedFormats: string[];
67
- maxImagesPerRequest?: number;
68
- };
69
- /**
70
- * Provider-specific image format requirements
71
- */
72
- export type ProviderImageFormat = {
73
- provider: string;
74
- format: "data_uri" | "base64" | "inline_data" | "source";
75
- requiresPrefix?: boolean;
76
- mimeTypeField?: string;
77
- dataField?: string;
78
- };
79
- /**
80
- * Image processing result
81
- */
82
- export type ProcessedImage = {
83
- data: string;
84
- mediaType: string;
85
- size: number;
86
- format: "data_uri" | "base64" | "inline_data" | "source";
87
- };
88
- /**
89
- * Multimodal message structure for provider adapters
90
- */
91
- export type MultimodalMessage = {
92
- role: "user" | "assistant" | "system";
93
- content: Content[];
94
- };
95
- /**
96
- * Multimodal input type for options that may contain images or content arrays
97
- */
98
- export type MultimodalInput = {
99
- text: string;
100
- images?: Array<Buffer | string>;
101
- content?: Array<TextContent | ImageContent>;
102
- csvFiles?: Array<Buffer | string>;
103
- pdfFiles?: Array<Buffer | string>;
104
- files?: Array<Buffer | string>;
105
- };
106
- /**
107
- * Provider-specific multimodal payload
108
- */
109
- export type ProviderMultimodalPayload = {
110
- provider: string;
111
- model: string;
112
- messages?: MultimodalMessage[];
113
- contents?: unknown[];
114
- [key: string]: unknown;
115
- };
3
+ *
4
+ * @deprecated This file has been reorganized. All multimodal types are now in './multimodal.js'
5
+ * These re-exports are maintained for backward compatibility.
6
+ * Please import from './multimodal.js' in new code.
7
+ *
8
+ * Migration guide:
9
+ * ```typescript
10
+ * // Old (still works)
11
+ * import type { MultimodalInput } from './types/content.js';
12
+ *
13
+ * // New (preferred)
14
+ * import type { MultimodalInput } from './types/multimodal.js';
15
+ * ```
16
+ */
17
+ export type { TextContent, ImageContent, CSVContent, PDFContent, AudioContent, VideoContent, Content, MultimodalInput, MultimodalMessage, VisionCapability, ProviderImageFormat, ProcessedImage, ProviderMultimodalPayload, } from "./multimodal.js";
18
+ export { isTextContent, isImageContent, isCSVContent, isPDFContent, isAudioContent, isVideoContent, isMultimodalInput, } from "./multimodal.js";
@@ -1,6 +1,20 @@
1
1
  /**
2
2
  * Content type definitions for multimodal support
3
- * Supports text and image content with provider-specific formatting
3
+ *
4
+ * @deprecated This file has been reorganized. All multimodal types are now in './multimodal.js'
5
+ * These re-exports are maintained for backward compatibility.
6
+ * Please import from './multimodal.js' in new code.
7
+ *
8
+ * Migration guide:
9
+ * ```typescript
10
+ * // Old (still works)
11
+ * import type { MultimodalInput } from './types/content.js';
12
+ *
13
+ * // New (preferred)
14
+ * import type { MultimodalInput } from './types/multimodal.js';
15
+ * ```
4
16
  */
5
- export {};
17
+ // Runtime function re-exports for type guards
18
+ // These MUST be regular exports (not "export type") because they are actual functions
19
+ export { isTextContent, isImageContent, isCSVContent, isPDFContent, isAudioContent, isVideoContent, isMultimodalInput, } from "./multimodal.js";
6
20
  //# sourceMappingURL=content.js.map
@@ -92,24 +92,10 @@ export type ChatMessage = {
92
92
  };
93
93
  };
94
94
  /**
95
- * Content format for multimodal messages (used internally)
95
+ * Multimodal message types - Re-exported from multimodal.ts
96
+ * @deprecated Import from './multimodal.js' instead for better organization
96
97
  */
97
- export type MessageContent = {
98
- type: string;
99
- text?: string;
100
- image?: string;
101
- mimeType?: string;
102
- [key: string]: unknown;
103
- };
104
- /**
105
- * Extended chat message for multimodal support (internal use)
106
- */
107
- export type MultimodalChatMessage = {
108
- /** Role of the message sender */
109
- role: "user" | "assistant" | "system";
110
- /** Content of the message - can be text or multimodal content array */
111
- content: string | MessageContent[];
112
- };
98
+ export type { MessageContent, MultimodalChatMessage } from "./multimodal.js";
113
99
  /**
114
100
  * Events emitted by conversation memory system
115
101
  */
@@ -6,7 +6,7 @@ import type { EvaluationData } from "./evaluation.js";
6
6
  import type { ChatMessage, ConversationMemoryConfig } from "./conversation.js";
7
7
  import type { MiddlewareFactoryOptions } from "./middlewareTypes.js";
8
8
  import type { JsonValue } from "./common.js";
9
- import type { TextContent, ImageContent } from "./content.js";
9
+ import type { Content } from "./content.js";
10
10
  /**
11
11
  * Generate function options type - Primary method for content generation
12
12
  * Supports multimodal content while maintaining backward compatibility
@@ -18,7 +18,7 @@ export type GenerateOptions = {
18
18
  csvFiles?: Array<Buffer | string>;
19
19
  pdfFiles?: Array<Buffer | string>;
20
20
  files?: Array<Buffer | string>;
21
- content?: Array<TextContent | ImageContent>;
21
+ content?: Content[];
22
22
  };
23
23
  output?: {
24
24
  format?: "text" | "structured" | "json";
@@ -2,6 +2,8 @@
2
2
  * Centralized type exports for NeuroLink
3
3
  */
4
4
  export * from "./common.js";
5
+ export { AIProviderName } from "../constants/enums.js";
6
+ export type { ZodUnknownSchema, ValidationSchema, OptionalValidationSchema, StandardRecord, OptionalStandardRecord, } from "./typeAliases.js";
5
7
  export * from "./tools.js";
6
8
  export * from "./providers.js";
7
9
  export * from "./cli.js";
@@ -3,6 +3,8 @@
3
3
  */
4
4
  // Common utility types
5
5
  export * from "./common.js";
6
+ // Constants and enums
7
+ export { AIProviderName } from "../constants/enums.js";
6
8
  // Tool system types
7
9
  export * from "./tools.js";
8
10
  // Provider types
@@ -0,0 +1,282 @@
1
+ /**
2
+ * Multimodal Content Types for NeuroLink
3
+ *
4
+ * Central registry for all multimodal input/output types.
5
+ * This file consolidates types from content.ts and conversation.ts
6
+ * to provide a single source of truth for multimodal functionality.
7
+ *
8
+ * @module types/multimodal
9
+ *
10
+ * @example Basic Multimodal Input
11
+ * ```typescript
12
+ * import type { MultimodalInput } from './types/multimodal.js';
13
+ *
14
+ * const input: MultimodalInput = {
15
+ * text: "What's in this image?",
16
+ * images: [imageBuffer, "https://example.com/image.jpg"],
17
+ * pdfFiles: [pdfBuffer]
18
+ * };
19
+ * ```
20
+ *
21
+ * @example Audio/Video Input (Future)
22
+ * ```typescript
23
+ * const avInput: MultimodalInput = {
24
+ * text: "Transcribe this audio and analyze this video",
25
+ * audioFiles: [audioBuffer],
26
+ * videoFiles: ["path/to/video.mp4"]
27
+ * };
28
+ * ```
29
+ *
30
+ * @example Advanced Content Array
31
+ * ```typescript
32
+ * const advanced: MultimodalInput = {
33
+ * text: "irrelevant", // ignored when content[] is provided
34
+ * content: [
35
+ * { type: "text", text: "Analyze these items:" },
36
+ * { type: "image", data: imageBuffer, mediaType: "image/jpeg" },
37
+ * { type: "pdf", data: pdfBuffer, metadata: { filename: "report.pdf" } }
38
+ * ]
39
+ * };
40
+ * ```
41
+ */
42
+ /**
43
+ * Text content type for multimodal messages
44
+ */
45
+ export type TextContent = {
46
+ type: "text";
47
+ text: string;
48
+ };
49
+ /**
50
+ * Image content type for multimodal messages
51
+ */
52
+ export type ImageContent = {
53
+ type: "image";
54
+ data: Buffer | string;
55
+ mediaType?: "image/jpeg" | "image/png" | "image/gif" | "image/webp" | "image/bmp" | "image/tiff";
56
+ metadata?: {
57
+ description?: string;
58
+ quality?: "low" | "high" | "auto";
59
+ dimensions?: {
60
+ width: number;
61
+ height: number;
62
+ };
63
+ filename?: string;
64
+ };
65
+ };
66
+ /**
67
+ * CSV content type for multimodal messages
68
+ */
69
+ export type CSVContent = {
70
+ type: "csv";
71
+ data: Buffer | string;
72
+ metadata?: {
73
+ filename?: string;
74
+ maxRows?: number;
75
+ formatStyle?: "raw" | "markdown" | "json";
76
+ description?: string;
77
+ };
78
+ };
79
+ /**
80
+ * PDF document content type for multimodal messages
81
+ */
82
+ export type PDFContent = {
83
+ type: "pdf";
84
+ data: Buffer | string;
85
+ metadata?: {
86
+ filename?: string;
87
+ pages?: number;
88
+ version?: string;
89
+ description?: string;
90
+ };
91
+ };
92
+ /**
93
+ * Audio content type for multimodal messages
94
+ *
95
+ * NOTE: This is for FILE-BASED audio input (not streaming).
96
+ * For streaming audio (live transcription), use AudioInputSpec from streamTypes.ts
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * const audioContent: AudioContent = {
101
+ * type: "audio",
102
+ * data: audioBuffer,
103
+ * mediaType: "audio/mpeg",
104
+ * metadata: {
105
+ * filename: "recording.mp3",
106
+ * duration: 120.5,
107
+ * transcription: "Hello world"
108
+ * }
109
+ * };
110
+ * ```
111
+ */
112
+ export type AudioContent = {
113
+ type: "audio";
114
+ data: Buffer | string;
115
+ mediaType?: "audio/mpeg" | "audio/wav" | "audio/ogg" | "audio/webm" | "audio/aac" | "audio/flac" | "audio/mp4";
116
+ metadata?: {
117
+ filename?: string;
118
+ duration?: number;
119
+ sampleRate?: number;
120
+ channels?: number;
121
+ transcription?: string;
122
+ language?: string;
123
+ };
124
+ };
125
+ /**
126
+ * Video content type for multimodal messages
127
+ *
128
+ * NOTE: This is for FILE-BASED video input.
129
+ * For streaming video, this type may be extended in future.
130
+ *
131
+ * @example
132
+ * ```typescript
133
+ * const videoContent: VideoContent = {
134
+ * type: "video",
135
+ * data: videoBuffer,
136
+ * mediaType: "video/mp4",
137
+ * metadata: {
138
+ * filename: "demo.mp4",
139
+ * duration: 300,
140
+ * dimensions: { width: 1920, height: 1080 }
141
+ * }
142
+ * };
143
+ * ```
144
+ */
145
+ export type VideoContent = {
146
+ type: "video";
147
+ data: Buffer | string;
148
+ mediaType?: "video/mp4" | "video/webm" | "video/ogg" | "video/quicktime" | "video/x-msvideo" | "video/x-matroska";
149
+ metadata?: {
150
+ filename?: string;
151
+ duration?: number;
152
+ dimensions?: {
153
+ width: number;
154
+ height: number;
155
+ };
156
+ frameRate?: number;
157
+ codec?: string;
158
+ extractedFrames?: string[];
159
+ transcription?: string;
160
+ };
161
+ };
162
+ /**
163
+ * Union type for all content types
164
+ * Covers text, images, documents, and multimedia
165
+ */
166
+ export type Content = TextContent | ImageContent | CSVContent | PDFContent | AudioContent | VideoContent;
167
+ /**
168
+ * Multimodal input type for options that may contain images or content arrays
169
+ * This is the primary interface for users to provide multimodal content
170
+ */
171
+ export type MultimodalInput = {
172
+ text: string;
173
+ images?: Array<Buffer | string>;
174
+ content?: Content[];
175
+ csvFiles?: Array<Buffer | string>;
176
+ pdfFiles?: Array<Buffer | string>;
177
+ files?: Array<Buffer | string>;
178
+ /** Audio files for file-based audio processing (future) */
179
+ audioFiles?: Array<Buffer | string>;
180
+ /** Video files for file-based video processing (future) */
181
+ videoFiles?: Array<Buffer | string>;
182
+ };
183
+ /**
184
+ * Content format for multimodal messages (used internally)
185
+ * Compatible with Vercel AI SDK message format
186
+ */
187
+ export type MessageContent = {
188
+ type: string;
189
+ text?: string;
190
+ image?: string;
191
+ mimeType?: string;
192
+ [key: string]: unknown;
193
+ };
194
+ /**
195
+ * Extended chat message for multimodal support (internal use)
196
+ * Used during message processing and transformation
197
+ */
198
+ export type MultimodalChatMessage = {
199
+ /** Role of the message sender */
200
+ role: "user" | "assistant" | "system";
201
+ /** Content of the message - can be text or multimodal content array */
202
+ content: string | MessageContent[];
203
+ };
204
+ /**
205
+ * Multimodal message structure for provider adapters
206
+ */
207
+ export type MultimodalMessage = {
208
+ role: "user" | "assistant" | "system";
209
+ content: Content[];
210
+ };
211
+ /**
212
+ * Vision capability information for providers
213
+ */
214
+ export type VisionCapability = {
215
+ provider: string;
216
+ supportedModels: string[];
217
+ maxImageSize?: number;
218
+ supportedFormats: string[];
219
+ maxImagesPerRequest?: number;
220
+ };
221
+ /**
222
+ * Provider-specific image format requirements
223
+ */
224
+ export type ProviderImageFormat = {
225
+ provider: string;
226
+ format: "data_uri" | "base64" | "inline_data" | "source";
227
+ requiresPrefix?: boolean;
228
+ mimeTypeField?: string;
229
+ dataField?: string;
230
+ };
231
+ /**
232
+ * Image processing result
233
+ */
234
+ export type ProcessedImage = {
235
+ data: string;
236
+ mediaType: string;
237
+ size: number;
238
+ format: "data_uri" | "base64" | "inline_data" | "source";
239
+ };
240
+ /**
241
+ * Provider-specific multimodal payload
242
+ */
243
+ export type ProviderMultimodalPayload = {
244
+ provider: string;
245
+ model: string;
246
+ messages?: MultimodalMessage[];
247
+ contents?: unknown[];
248
+ [key: string]: unknown;
249
+ };
250
+ /**
251
+ * Type guard to check if content is TextContent
252
+ */
253
+ export declare function isTextContent(content: Content): content is TextContent;
254
+ /**
255
+ * Type guard to check if content is ImageContent
256
+ */
257
+ export declare function isImageContent(content: Content): content is ImageContent;
258
+ /**
259
+ * Type guard to check if content is CSVContent
260
+ */
261
+ export declare function isCSVContent(content: Content): content is CSVContent;
262
+ /**
263
+ * Type guard to check if content is PDFContent
264
+ */
265
+ export declare function isPDFContent(content: Content): content is PDFContent;
266
+ /**
267
+ * Type guard to check if content is AudioContent
268
+ */
269
+ export declare function isAudioContent(content: Content): content is AudioContent;
270
+ /**
271
+ * Type guard to check if content is VideoContent
272
+ */
273
+ export declare function isVideoContent(content: Content): content is VideoContent;
274
+ /**
275
+ * Type guard to check if input contains multimodal content
276
+ * Now includes audio and video detection
277
+ */
278
+ export declare function isMultimodalInput(input: unknown): input is MultimodalInput;
279
+ /**
280
+ * Type guard to check if message content is multimodal (array)
281
+ */
282
+ export declare function isMultimodalMessageContent(content: string | MessageContent[]): content is MessageContent[];