@juspay/neurolink 7.34.0 → 7.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +64 -7
- package/dist/adapters/providerImageAdapter.d.ts +56 -0
- package/dist/adapters/providerImageAdapter.js +257 -0
- package/dist/cli/commands/config.d.ts +20 -20
- package/dist/cli/commands/setup-anthropic.d.ts +16 -0
- package/dist/cli/commands/setup-anthropic.js +414 -0
- package/dist/cli/commands/setup-azure.d.ts +17 -0
- package/dist/cli/commands/setup-azure.js +415 -0
- package/dist/cli/commands/setup-bedrock.d.ts +13 -0
- package/dist/cli/commands/setup-bedrock.js +487 -0
- package/dist/cli/commands/setup-gcp.d.ts +18 -0
- package/dist/cli/commands/setup-gcp.js +569 -0
- package/dist/cli/commands/setup-google-ai.d.ts +16 -0
- package/dist/cli/commands/setup-google-ai.js +369 -0
- package/dist/cli/commands/setup-huggingface.d.ts +8 -0
- package/dist/cli/commands/setup-huggingface.js +200 -0
- package/dist/cli/commands/setup-mistral.d.ts +8 -0
- package/dist/cli/commands/setup-mistral.js +233 -0
- package/dist/cli/commands/setup-openai.d.ts +16 -0
- package/dist/cli/commands/setup-openai.js +402 -0
- package/dist/cli/commands/setup.d.ts +19 -0
- package/dist/cli/commands/setup.js +539 -0
- package/dist/cli/factories/commandFactory.d.ts +5 -0
- package/dist/cli/factories/commandFactory.js +67 -3
- package/dist/cli/factories/setupCommandFactory.d.ts +18 -0
- package/dist/cli/factories/setupCommandFactory.js +137 -0
- package/dist/cli/parser.js +4 -1
- package/dist/cli/utils/envManager.d.ts +3 -2
- package/dist/cli/utils/envManager.js +18 -4
- package/dist/core/baseProvider.js +99 -45
- package/dist/core/types.d.ts +3 -0
- package/dist/lib/adapters/providerImageAdapter.d.ts +56 -0
- package/dist/lib/adapters/providerImageAdapter.js +257 -0
- package/dist/lib/core/baseProvider.js +99 -45
- package/dist/lib/core/types.d.ts +3 -0
- package/dist/lib/neurolink.js +8 -3
- package/dist/lib/types/content.d.ts +78 -0
- package/dist/lib/types/content.js +5 -0
- package/dist/lib/types/conversation.d.ts +19 -0
- package/dist/lib/types/generateTypes.d.ts +4 -1
- package/dist/lib/types/streamTypes.d.ts +6 -3
- package/dist/lib/utils/imageProcessor.d.ts +84 -0
- package/dist/lib/utils/imageProcessor.js +362 -0
- package/dist/lib/utils/messageBuilder.d.ts +8 -1
- package/dist/lib/utils/messageBuilder.js +279 -0
- package/dist/neurolink.js +8 -3
- package/dist/types/content.d.ts +78 -0
- package/dist/types/content.js +5 -0
- package/dist/types/conversation.d.ts +19 -0
- package/dist/types/generateTypes.d.ts +4 -1
- package/dist/types/streamTypes.d.ts +6 -3
- package/dist/utils/imageProcessor.d.ts +84 -0
- package/dist/utils/imageProcessor.js +362 -0
- package/dist/utils/messageBuilder.d.ts +8 -1
- package/dist/utils/messageBuilder.js +279 -0
- package/package.json +1 -1
@@ -1,8 +1,13 @@
|
|
1
1
|
/**
|
2
2
|
* Message Builder Utility
|
3
3
|
* Centralized logic for building message arrays from TextGenerationOptions
|
4
|
+
* Enhanced with multimodal support for images
|
4
5
|
*/
|
5
6
|
import { CONVERSATION_INSTRUCTIONS } from "../config/conversationMemory.js";
|
7
|
+
import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerImageAdapter.js";
|
8
|
+
import { logger } from "./logger.js";
|
9
|
+
import { request } from "undici";
|
10
|
+
import { readFileSync, existsSync } from "fs";
|
6
11
|
/**
|
7
12
|
* Build a properly formatted message array for AI providers
|
8
13
|
* Combines system prompt, conversation history, and current user prompt
|
@@ -46,3 +51,277 @@ export function buildMessagesArray(options) {
|
|
46
51
|
}
|
47
52
|
return messages;
|
48
53
|
}
|
54
|
+
/**
|
55
|
+
* Build multimodal message array with image support
|
56
|
+
* Detects when images are present and routes through provider adapter
|
57
|
+
*/
|
58
|
+
export async function buildMultimodalMessagesArray(options, provider, model) {
|
59
|
+
// Check if this is a multimodal request
|
60
|
+
const hasImages = (options.input.images && options.input.images.length > 0) ||
|
61
|
+
(options.input.content &&
|
62
|
+
options.input.content.some((c) => c.type === "image"));
|
63
|
+
// If no images, use standard message building and convert to MultimodalChatMessage[]
|
64
|
+
if (!hasImages) {
|
65
|
+
const standardMessages = buildMessagesArray(options);
|
66
|
+
return standardMessages.map((msg) => ({ ...msg, content: msg.content }));
|
67
|
+
}
|
68
|
+
// Validate provider supports vision
|
69
|
+
if (!ProviderImageAdapter.supportsVision(provider, model)) {
|
70
|
+
throw new Error(`Provider ${provider} with model ${model} does not support vision processing. ` +
|
71
|
+
`Supported providers: ${ProviderImageAdapter.getVisionProviders().join(", ")}`);
|
72
|
+
}
|
73
|
+
const messages = [];
|
74
|
+
// Build enhanced system prompt
|
75
|
+
let systemPrompt = options.systemPrompt?.trim() || "";
|
76
|
+
// Add conversation-aware instructions when history exists
|
77
|
+
const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0;
|
78
|
+
if (hasConversationHistory) {
|
79
|
+
systemPrompt = `${systemPrompt.trim()}${CONVERSATION_INSTRUCTIONS}`;
|
80
|
+
}
|
81
|
+
// Add system message if we have one
|
82
|
+
if (systemPrompt.trim()) {
|
83
|
+
messages.push({
|
84
|
+
role: "system",
|
85
|
+
content: systemPrompt.trim(),
|
86
|
+
});
|
87
|
+
}
|
88
|
+
// Add conversation history if available
|
89
|
+
if (hasConversationHistory && options.conversationHistory) {
|
90
|
+
// Convert conversation history to MultimodalChatMessage format
|
91
|
+
options.conversationHistory.forEach((msg) => {
|
92
|
+
messages.push({
|
93
|
+
role: msg.role,
|
94
|
+
content: msg.content,
|
95
|
+
});
|
96
|
+
});
|
97
|
+
}
|
98
|
+
// Handle multimodal content
|
99
|
+
try {
|
100
|
+
let userContent;
|
101
|
+
if (options.input.content && options.input.content.length > 0) {
|
102
|
+
// Advanced content format - convert to provider-specific format
|
103
|
+
userContent = await convertContentToProviderFormat(options.input.content, provider, model);
|
104
|
+
}
|
105
|
+
else if (options.input.images && options.input.images.length > 0) {
|
106
|
+
// Simple images format - convert to provider-specific format
|
107
|
+
userContent = await convertSimpleImagesToProviderFormat(options.input.text, options.input.images, provider, model);
|
108
|
+
}
|
109
|
+
else {
|
110
|
+
// Text-only fallback
|
111
|
+
userContent = options.input.text;
|
112
|
+
}
|
113
|
+
// 🔧 CRITICAL FIX: Handle multimodal content properly for Vercel AI SDK
|
114
|
+
if (typeof userContent === "string") {
|
115
|
+
// Simple text content - use standard MultimodalChatMessage format
|
116
|
+
messages.push({
|
117
|
+
role: "user",
|
118
|
+
content: userContent,
|
119
|
+
});
|
120
|
+
}
|
121
|
+
else {
|
122
|
+
// 🔧 MULTIMODAL CONTENT: Wrap the content array in a proper message object
|
123
|
+
// The Vercel AI SDK expects messages with multimodal content arrays
|
124
|
+
messages.push({
|
125
|
+
role: "user",
|
126
|
+
content: userContent,
|
127
|
+
});
|
128
|
+
}
|
129
|
+
return messages;
|
130
|
+
}
|
131
|
+
catch (error) {
|
132
|
+
MultimodalLogger.logError("MULTIMODAL_BUILD", error, {
|
133
|
+
provider,
|
134
|
+
model,
|
135
|
+
hasImages,
|
136
|
+
imageCount: options.input.images?.length || 0,
|
137
|
+
});
|
138
|
+
throw error;
|
139
|
+
}
|
140
|
+
}
|
141
|
+
/**
|
142
|
+
* Convert advanced content format to provider-specific format
|
143
|
+
*/
|
144
|
+
async function convertContentToProviderFormat(content, provider, _model) {
|
145
|
+
const textContent = content.find((c) => c.type === "text");
|
146
|
+
const imageContent = content.filter((c) => c.type === "image");
|
147
|
+
if (!textContent) {
|
148
|
+
throw new Error("Multimodal content must include at least one text element");
|
149
|
+
}
|
150
|
+
if (imageContent.length === 0) {
|
151
|
+
return textContent.text;
|
152
|
+
}
|
153
|
+
// Extract images as Buffer | string array
|
154
|
+
const images = imageContent.map((img) => img.data);
|
155
|
+
return await convertSimpleImagesToProviderFormat(textContent.text, images, provider, _model);
|
156
|
+
}
|
157
|
+
/**
|
158
|
+
* Check if a string is an internet URL
|
159
|
+
*/
|
160
|
+
function isInternetUrl(input) {
|
161
|
+
return input.startsWith("http://") || input.startsWith("https://");
|
162
|
+
}
|
163
|
+
/**
|
164
|
+
* Download image from URL and convert to base64 data URI
|
165
|
+
*/
|
166
|
+
async function downloadImageFromUrl(url) {
|
167
|
+
try {
|
168
|
+
const response = await request(url, {
|
169
|
+
method: "GET",
|
170
|
+
headersTimeout: 10000, // 10 second timeout for headers
|
171
|
+
bodyTimeout: 30000, // 30 second timeout for body
|
172
|
+
maxRedirections: 5,
|
173
|
+
});
|
174
|
+
if (response.statusCode !== 200) {
|
175
|
+
throw new Error(`HTTP ${response.statusCode}: Failed to download image from ${url}`);
|
176
|
+
}
|
177
|
+
// Get content type from headers
|
178
|
+
const contentType = response.headers["content-type"] || "image/jpeg";
|
179
|
+
// Validate it's an image
|
180
|
+
if (!contentType.startsWith("image/")) {
|
181
|
+
throw new Error(`URL does not point to an image. Content-Type: ${contentType}`);
|
182
|
+
}
|
183
|
+
// Read the response body
|
184
|
+
const chunks = [];
|
185
|
+
for await (const chunk of response.body) {
|
186
|
+
chunks.push(chunk);
|
187
|
+
}
|
188
|
+
const buffer = Buffer.concat(chunks);
|
189
|
+
// Check file size (limit to 10MB)
|
190
|
+
const maxSize = 10 * 1024 * 1024; // 10MB
|
191
|
+
if (buffer.length > maxSize) {
|
192
|
+
throw new Error(`Image too large: ${buffer.length} bytes (max: ${maxSize} bytes)`);
|
193
|
+
}
|
194
|
+
// Convert to base64 data URI
|
195
|
+
const base64 = buffer.toString("base64");
|
196
|
+
const dataUri = `data:${contentType};base64,${base64}`;
|
197
|
+
return dataUri;
|
198
|
+
}
|
199
|
+
catch (error) {
|
200
|
+
MultimodalLogger.logError("URL_DOWNLOAD_FAILED", error, { url });
|
201
|
+
throw new Error(`Failed to download image from ${url}: ${error instanceof Error ? error.message : String(error)}`);
|
202
|
+
}
|
203
|
+
}
|
204
|
+
/**
|
205
|
+
* Convert simple images format to Vercel AI SDK format with smart auto-detection
|
206
|
+
* - URLs: Downloaded and converted to base64 for Vercel AI SDK compatibility
|
207
|
+
* - Local files: Converted to base64 for Vercel AI SDK compatibility
|
208
|
+
* - Buffers/Data URIs: Processed normally
|
209
|
+
*/
|
210
|
+
async function convertSimpleImagesToProviderFormat(text, images, provider, _model) {
|
211
|
+
// For Vercel AI SDK, we need to return the content in the standard format
|
212
|
+
// The Vercel AI SDK will handle provider-specific formatting internally
|
213
|
+
// Smart auto-detection: separate URLs from actual image data
|
214
|
+
const urlImages = [];
|
215
|
+
const actualImages = [];
|
216
|
+
images.forEach((image, _index) => {
|
217
|
+
if (typeof image === "string" && isInternetUrl(image)) {
|
218
|
+
// Internet URL - will be downloaded and converted to base64
|
219
|
+
urlImages.push(image);
|
220
|
+
}
|
221
|
+
else {
|
222
|
+
// Actual image data (file path, Buffer, data URI) - process for Vercel AI SDK
|
223
|
+
actualImages.push(image);
|
224
|
+
}
|
225
|
+
});
|
226
|
+
// Download URL images and add to actual images
|
227
|
+
for (const url of urlImages) {
|
228
|
+
try {
|
229
|
+
const downloadedDataUri = await downloadImageFromUrl(url);
|
230
|
+
actualImages.push(downloadedDataUri);
|
231
|
+
}
|
232
|
+
catch (error) {
|
233
|
+
MultimodalLogger.logError("URL_DOWNLOAD_FAILED_SKIPPING", error, { url });
|
234
|
+
// Continue processing other images even if one URL fails
|
235
|
+
logger.warn(`Failed to download image from ${url}, skipping: ${error instanceof Error ? error.message : String(error)}`);
|
236
|
+
}
|
237
|
+
}
|
238
|
+
const content = [{ type: "text", text }];
|
239
|
+
// Process all images (including downloaded URLs) for Vercel AI SDK
|
240
|
+
actualImages.forEach((image, index) => {
|
241
|
+
try {
|
242
|
+
// Vercel AI SDK expects { type: 'image', image: Buffer | string, mimeType?: string }
|
243
|
+
// For Vertex AI, we need to include mimeType
|
244
|
+
let imageData;
|
245
|
+
let mimeType = "image/jpeg"; // Default mime type
|
246
|
+
if (typeof image === "string") {
|
247
|
+
if (image.startsWith("data:")) {
|
248
|
+
// Data URI (including downloaded URLs) - extract mime type and use directly
|
249
|
+
const match = image.match(/^data:([^;]+);base64,(.+)$/);
|
250
|
+
if (match) {
|
251
|
+
mimeType = match[1];
|
252
|
+
imageData = image; // Keep as data URI for Vercel AI SDK
|
253
|
+
}
|
254
|
+
else {
|
255
|
+
imageData = image;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
else if (isInternetUrl(image)) {
|
259
|
+
// This should not happen as URLs are processed separately above
|
260
|
+
// But handle it gracefully just in case
|
261
|
+
throw new Error(`Unprocessed URL found in actualImages: ${image}`);
|
262
|
+
}
|
263
|
+
else {
|
264
|
+
// File path string - convert to base64 data URI
|
265
|
+
try {
|
266
|
+
if (existsSync(image)) {
|
267
|
+
const buffer = readFileSync(image);
|
268
|
+
const base64 = buffer.toString("base64");
|
269
|
+
// Detect mime type from file extension
|
270
|
+
const ext = image.toLowerCase().split(".").pop();
|
271
|
+
switch (ext) {
|
272
|
+
case "png":
|
273
|
+
mimeType = "image/png";
|
274
|
+
break;
|
275
|
+
case "gif":
|
276
|
+
mimeType = "image/gif";
|
277
|
+
break;
|
278
|
+
case "webp":
|
279
|
+
mimeType = "image/webp";
|
280
|
+
break;
|
281
|
+
case "bmp":
|
282
|
+
mimeType = "image/bmp";
|
283
|
+
break;
|
284
|
+
case "tiff":
|
285
|
+
case "tif":
|
286
|
+
mimeType = "image/tiff";
|
287
|
+
break;
|
288
|
+
default:
|
289
|
+
mimeType = "image/jpeg";
|
290
|
+
break;
|
291
|
+
}
|
292
|
+
imageData = `data:${mimeType};base64,${base64}`;
|
293
|
+
}
|
294
|
+
else {
|
295
|
+
throw new Error(`Image file not found: ${image}`);
|
296
|
+
}
|
297
|
+
}
|
298
|
+
catch (error) {
|
299
|
+
MultimodalLogger.logError("FILE_PATH_CONVERSION", error, {
|
300
|
+
index,
|
301
|
+
filePath: image,
|
302
|
+
});
|
303
|
+
throw new Error(`Failed to convert file path to base64: ${image}. ${error}`);
|
304
|
+
}
|
305
|
+
}
|
306
|
+
}
|
307
|
+
else {
|
308
|
+
// Buffer - convert to base64 data URI
|
309
|
+
const base64 = image.toString("base64");
|
310
|
+
imageData = `data:${mimeType};base64,${base64}`;
|
311
|
+
}
|
312
|
+
content.push({
|
313
|
+
type: "image",
|
314
|
+
image: imageData,
|
315
|
+
mimeType: mimeType, // Add mimeType for Vertex AI compatibility
|
316
|
+
});
|
317
|
+
}
|
318
|
+
catch (error) {
|
319
|
+
MultimodalLogger.logError("ADD_IMAGE_TO_CONTENT", error, {
|
320
|
+
index,
|
321
|
+
provider,
|
322
|
+
});
|
323
|
+
throw error;
|
324
|
+
}
|
325
|
+
});
|
326
|
+
return content;
|
327
|
+
}
|
package/dist/neurolink.js
CHANGED
@@ -854,7 +854,7 @@ export class NeuroLink {
|
|
854
854
|
// Continue with warning rather than throwing - graceful degradation
|
855
855
|
}
|
856
856
|
}
|
857
|
-
// Convert to TextGenerationOptions
|
857
|
+
// 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
|
858
858
|
const baseOptions = {
|
859
859
|
prompt: options.input.text,
|
860
860
|
provider: options.provider,
|
@@ -868,6 +868,7 @@ export class NeuroLink {
|
|
868
868
|
context: options.context,
|
869
869
|
evaluationDomain: options.evaluationDomain,
|
870
870
|
toolUsageContext: options.toolUsageContext,
|
871
|
+
input: options.input, // This includes text, images, and content arrays
|
871
872
|
};
|
872
873
|
// Apply factory enhancement using centralized utilities
|
873
874
|
const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
|
@@ -1664,7 +1665,9 @@ export class NeuroLink {
|
|
1664
1665
|
const processedStream = (async function* (self) {
|
1665
1666
|
try {
|
1666
1667
|
for await (const chunk of mcpStream) {
|
1667
|
-
if (chunk &&
|
1668
|
+
if (chunk &&
|
1669
|
+
"content" in chunk &&
|
1670
|
+
typeof chunk.content === "string") {
|
1668
1671
|
accumulatedContent += chunk.content;
|
1669
1672
|
// Emit chunk event for compatibility
|
1670
1673
|
self.emitter.emit("response:chunk", chunk.content);
|
@@ -1941,7 +1944,9 @@ export class NeuroLink {
|
|
1941
1944
|
const fallbackProcessedStream = (async function* (self) {
|
1942
1945
|
try {
|
1943
1946
|
for await (const chunk of fallbackStreamResult.stream) {
|
1944
|
-
if (chunk &&
|
1947
|
+
if (chunk &&
|
1948
|
+
"content" in chunk &&
|
1949
|
+
typeof chunk.content === "string") {
|
1945
1950
|
fallbackAccumulatedContent += chunk.content;
|
1946
1951
|
// Emit chunk event
|
1947
1952
|
self.emitter.emit("response:chunk", chunk.content);
|
@@ -0,0 +1,78 @@
|
|
1
|
+
/**
|
2
|
+
* Content type definitions for multimodal support
|
3
|
+
* Supports text and image content with provider-specific formatting
|
4
|
+
*/
|
5
|
+
/**
|
6
|
+
* Text content type for multimodal messages
|
7
|
+
*/
|
8
|
+
export interface TextContent {
|
9
|
+
type: "text";
|
10
|
+
text: string;
|
11
|
+
}
|
12
|
+
/**
|
13
|
+
* Image content type for multimodal messages
|
14
|
+
*/
|
15
|
+
export interface ImageContent {
|
16
|
+
type: "image";
|
17
|
+
data: Buffer | string;
|
18
|
+
mediaType?: "image/jpeg" | "image/png" | "image/gif" | "image/webp" | "image/bmp" | "image/tiff";
|
19
|
+
metadata?: {
|
20
|
+
description?: string;
|
21
|
+
quality?: "low" | "high" | "auto";
|
22
|
+
dimensions?: {
|
23
|
+
width: number;
|
24
|
+
height: number;
|
25
|
+
};
|
26
|
+
filename?: string;
|
27
|
+
};
|
28
|
+
}
|
29
|
+
/**
|
30
|
+
* Union type for all content types
|
31
|
+
*/
|
32
|
+
export type Content = TextContent | ImageContent;
|
33
|
+
/**
|
34
|
+
* Vision capability information for providers
|
35
|
+
*/
|
36
|
+
export interface VisionCapability {
|
37
|
+
provider: string;
|
38
|
+
supportedModels: string[];
|
39
|
+
maxImageSize?: number;
|
40
|
+
supportedFormats: string[];
|
41
|
+
maxImagesPerRequest?: number;
|
42
|
+
}
|
43
|
+
/**
|
44
|
+
* Provider-specific image format requirements
|
45
|
+
*/
|
46
|
+
export interface ProviderImageFormat {
|
47
|
+
provider: string;
|
48
|
+
format: "data_uri" | "base64" | "inline_data" | "source";
|
49
|
+
requiresPrefix?: boolean;
|
50
|
+
mimeTypeField?: string;
|
51
|
+
dataField?: string;
|
52
|
+
}
|
53
|
+
/**
|
54
|
+
* Image processing result
|
55
|
+
*/
|
56
|
+
export interface ProcessedImage {
|
57
|
+
data: string;
|
58
|
+
mediaType: string;
|
59
|
+
size: number;
|
60
|
+
format: "data_uri" | "base64" | "inline_data" | "source";
|
61
|
+
}
|
62
|
+
/**
|
63
|
+
* Multimodal message structure for provider adapters
|
64
|
+
*/
|
65
|
+
export interface MultimodalMessage {
|
66
|
+
role: "user" | "assistant" | "system";
|
67
|
+
content: Content[];
|
68
|
+
}
|
69
|
+
/**
|
70
|
+
* Provider-specific multimodal payload
|
71
|
+
*/
|
72
|
+
export interface ProviderMultimodalPayload {
|
73
|
+
provider: string;
|
74
|
+
model: string;
|
75
|
+
messages?: MultimodalMessage[];
|
76
|
+
contents?: unknown[];
|
77
|
+
[key: string]: unknown;
|
78
|
+
}
|
@@ -66,6 +66,25 @@ export interface ChatMessage {
|
|
66
66
|
/** Content of the message */
|
67
67
|
content: string;
|
68
68
|
}
|
69
|
+
/**
|
70
|
+
* Content format for multimodal messages (used internally)
|
71
|
+
*/
|
72
|
+
export interface MessageContent {
|
73
|
+
type: string;
|
74
|
+
text?: string;
|
75
|
+
image?: string;
|
76
|
+
mimeType?: string;
|
77
|
+
[key: string]: unknown;
|
78
|
+
}
|
79
|
+
/**
|
80
|
+
* Extended chat message for multimodal support (internal use)
|
81
|
+
*/
|
82
|
+
export interface MultimodalChatMessage {
|
83
|
+
/** Role of the message sender */
|
84
|
+
role: "user" | "assistant" | "system";
|
85
|
+
/** Content of the message - can be text or multimodal content array */
|
86
|
+
content: string | MessageContent[];
|
87
|
+
}
|
69
88
|
/**
|
70
89
|
* Events emitted by conversation memory system
|
71
90
|
*/
|
@@ -6,13 +6,16 @@ import type { EvaluationData } from "./evaluation.js";
|
|
6
6
|
import type { ChatMessage, ConversationMemoryConfig } from "./conversation.js";
|
7
7
|
import type { MiddlewareFactoryOptions } from "./middlewareTypes.js";
|
8
8
|
import type { JsonValue } from "./common.js";
|
9
|
+
import type { TextContent, ImageContent } from "./content.js";
|
9
10
|
/**
|
10
11
|
* Generate function options type - Primary method for content generation
|
11
|
-
*
|
12
|
+
* Supports multimodal content while maintaining backward compatibility
|
12
13
|
*/
|
13
14
|
export type GenerateOptions = {
|
14
15
|
input: {
|
15
16
|
text: string;
|
17
|
+
images?: Array<Buffer | string>;
|
18
|
+
content?: Array<TextContent | ImageContent>;
|
16
19
|
};
|
17
20
|
output?: {
|
18
21
|
format?: "text" | "structured" | "json";
|
@@ -5,6 +5,7 @@ import type { AnalyticsData, TokenUsage } from "./analytics.js";
|
|
5
5
|
import type { EvaluationData } from "./evaluation.js";
|
6
6
|
import type { UnknownRecord, JsonValue } from "./common.js";
|
7
7
|
import type { ChatMessage } from "./conversation.js";
|
8
|
+
import type { TextContent, ImageContent } from "./content.js";
|
8
9
|
import type { MiddlewareFactoryOptions } from "./middlewareTypes.js";
|
9
10
|
/**
|
10
11
|
* Progress tracking and metadata for streaming operations
|
@@ -118,10 +119,12 @@ export interface AudioChunk {
|
|
118
119
|
channels: number;
|
119
120
|
encoding: PCMEncoding;
|
120
121
|
}
|
121
|
-
export
|
122
|
+
export interface StreamOptions {
|
122
123
|
input: {
|
123
|
-
text
|
124
|
+
text: string;
|
124
125
|
audio?: AudioInputSpec;
|
126
|
+
images?: Array<Buffer | string>;
|
127
|
+
content?: Array<TextContent | ImageContent>;
|
125
128
|
};
|
126
129
|
output?: {
|
127
130
|
format?: "text" | "structured" | "json";
|
@@ -166,7 +169,7 @@ export type StreamOptions = {
|
|
166
169
|
};
|
167
170
|
conversationMessages?: ChatMessage[];
|
168
171
|
middleware?: MiddlewareFactoryOptions;
|
169
|
-
}
|
172
|
+
}
|
170
173
|
/**
|
171
174
|
* Stream function result type - Primary output format for streaming
|
172
175
|
* Future-ready for multi-modal outputs while maintaining text focus
|
@@ -0,0 +1,84 @@
|
|
1
|
+
/**
|
2
|
+
* Image processing utilities for multimodal support
|
3
|
+
* Handles format conversion for different AI providers
|
4
|
+
*/
|
5
|
+
import type { ProcessedImage } from "../types/content.js";
|
6
|
+
/**
|
7
|
+
* Image processor class for handling provider-specific image formatting
|
8
|
+
*/
|
9
|
+
export declare class ImageProcessor {
|
10
|
+
/**
|
11
|
+
* Process image for OpenAI (requires data URI format)
|
12
|
+
*/
|
13
|
+
static processImageForOpenAI(image: Buffer | string): string;
|
14
|
+
/**
|
15
|
+
* Process image for Google AI (requires base64 without data URI prefix)
|
16
|
+
*/
|
17
|
+
static processImageForGoogle(image: Buffer | string): {
|
18
|
+
mimeType: string;
|
19
|
+
data: string;
|
20
|
+
};
|
21
|
+
/**
|
22
|
+
* Process image for Anthropic (requires base64 without data URI prefix)
|
23
|
+
*/
|
24
|
+
static processImageForAnthropic(image: Buffer | string): {
|
25
|
+
mediaType: string;
|
26
|
+
data: string;
|
27
|
+
};
|
28
|
+
/**
|
29
|
+
* Process image for Vertex AI (model-specific routing)
|
30
|
+
*/
|
31
|
+
static processImageForVertex(image: Buffer | string, model: string): {
|
32
|
+
mimeType?: string;
|
33
|
+
mediaType?: string;
|
34
|
+
data: string;
|
35
|
+
};
|
36
|
+
/**
|
37
|
+
* Detect image type from filename or data
|
38
|
+
*/
|
39
|
+
static detectImageType(input: string | Buffer): string;
|
40
|
+
/**
|
41
|
+
* Validate image size (default 10MB limit)
|
42
|
+
*/
|
43
|
+
static validateImageSize(data: Buffer | string, maxSize?: number): boolean;
|
44
|
+
/**
|
45
|
+
* Validate image format
|
46
|
+
*/
|
47
|
+
static validateImageFormat(mediaType: string): boolean;
|
48
|
+
/**
|
49
|
+
* Get image dimensions from Buffer (basic implementation)
|
50
|
+
*/
|
51
|
+
static getImageDimensions(buffer: Buffer): {
|
52
|
+
width: number;
|
53
|
+
height: number;
|
54
|
+
} | null;
|
55
|
+
/**
|
56
|
+
* Convert image to ProcessedImage format
|
57
|
+
*/
|
58
|
+
static processImage(image: Buffer | string, provider: string, model?: string): ProcessedImage;
|
59
|
+
}
|
60
|
+
/**
|
61
|
+
* Utility functions for image handling
|
62
|
+
*/
|
63
|
+
export declare const imageUtils: {
|
64
|
+
/**
|
65
|
+
* Check if a string is a valid data URI
|
66
|
+
*/
|
67
|
+
isDataUri: (str: string) => boolean;
|
68
|
+
/**
|
69
|
+
* Check if a string is a valid URL
|
70
|
+
*/
|
71
|
+
isUrl: (str: string) => boolean;
|
72
|
+
/**
|
73
|
+
* Check if a string is base64 encoded
|
74
|
+
*/
|
75
|
+
isBase64: (str: string) => boolean;
|
76
|
+
/**
|
77
|
+
* Extract file extension from filename or URL
|
78
|
+
*/
|
79
|
+
getFileExtension: (filename: string) => string | null;
|
80
|
+
/**
|
81
|
+
* Convert file size to human readable format
|
82
|
+
*/
|
83
|
+
formatFileSize: (bytes: number) => string;
|
84
|
+
};
|