@juspay/neurolink 7.35.0 → 7.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/adapters/providerImageAdapter.d.ts +56 -0
- package/dist/adapters/providerImageAdapter.js +257 -0
- package/dist/cli/commands/config.d.ts +20 -20
- package/dist/cli/factories/commandFactory.d.ts +1 -0
- package/dist/cli/factories/commandFactory.js +26 -3
- package/dist/config/taskClassificationConfig.d.ts +51 -0
- package/dist/config/taskClassificationConfig.js +148 -0
- package/dist/core/baseProvider.js +99 -45
- package/dist/core/types.d.ts +3 -0
- package/dist/lib/adapters/providerImageAdapter.d.ts +56 -0
- package/dist/lib/adapters/providerImageAdapter.js +257 -0
- package/dist/lib/config/taskClassificationConfig.d.ts +51 -0
- package/dist/lib/config/taskClassificationConfig.js +148 -0
- package/dist/lib/core/baseProvider.js +99 -45
- package/dist/lib/core/types.d.ts +3 -0
- package/dist/lib/neurolink.d.ts +20 -0
- package/dist/lib/neurolink.js +276 -8
- package/dist/lib/types/content.d.ts +78 -0
- package/dist/lib/types/content.js +5 -0
- package/dist/lib/types/conversation.d.ts +19 -0
- package/dist/lib/types/generateTypes.d.ts +4 -1
- package/dist/lib/types/index.d.ts +2 -0
- package/dist/lib/types/index.js +2 -0
- package/dist/lib/types/streamTypes.d.ts +6 -3
- package/dist/lib/types/taskClassificationTypes.d.ts +52 -0
- package/dist/lib/types/taskClassificationTypes.js +5 -0
- package/dist/lib/utils/imageProcessor.d.ts +84 -0
- package/dist/lib/utils/imageProcessor.js +362 -0
- package/dist/lib/utils/messageBuilder.d.ts +8 -1
- package/dist/lib/utils/messageBuilder.js +279 -0
- package/dist/lib/utils/modelRouter.d.ts +107 -0
- package/dist/lib/utils/modelRouter.js +292 -0
- package/dist/lib/utils/promptRedaction.d.ts +29 -0
- package/dist/lib/utils/promptRedaction.js +62 -0
- package/dist/lib/utils/taskClassificationUtils.d.ts +55 -0
- package/dist/lib/utils/taskClassificationUtils.js +149 -0
- package/dist/lib/utils/taskClassifier.d.ts +23 -0
- package/dist/lib/utils/taskClassifier.js +94 -0
- package/dist/neurolink.d.ts +20 -0
- package/dist/neurolink.js +276 -8
- package/dist/types/content.d.ts +78 -0
- package/dist/types/content.js +5 -0
- package/dist/types/conversation.d.ts +19 -0
- package/dist/types/generateTypes.d.ts +4 -1
- package/dist/types/index.d.ts +2 -0
- package/dist/types/index.js +2 -0
- package/dist/types/streamTypes.d.ts +6 -3
- package/dist/types/taskClassificationTypes.d.ts +52 -0
- package/dist/types/taskClassificationTypes.js +5 -0
- package/dist/utils/imageProcessor.d.ts +84 -0
- package/dist/utils/imageProcessor.js +362 -0
- package/dist/utils/messageBuilder.d.ts +8 -1
- package/dist/utils/messageBuilder.js +279 -0
- package/dist/utils/modelRouter.d.ts +107 -0
- package/dist/utils/modelRouter.js +292 -0
- package/dist/utils/promptRedaction.d.ts +29 -0
- package/dist/utils/promptRedaction.js +62 -0
- package/dist/utils/taskClassificationUtils.d.ts +55 -0
- package/dist/utils/taskClassificationUtils.js +149 -0
- package/dist/utils/taskClassifier.d.ts +23 -0
- package/dist/utils/taskClassifier.js +94 -0
- package/package.json +1 -1
@@ -1,8 +1,13 @@
|
|
1
1
|
/**
|
2
2
|
* Message Builder Utility
|
3
3
|
* Centralized logic for building message arrays from TextGenerationOptions
|
4
|
+
* Enhanced with multimodal support for images
|
4
5
|
*/
|
5
6
|
import { CONVERSATION_INSTRUCTIONS } from "../config/conversationMemory.js";
|
7
|
+
import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerImageAdapter.js";
|
8
|
+
import { logger } from "./logger.js";
|
9
|
+
import { request } from "undici";
|
10
|
+
import { readFileSync, existsSync } from "fs";
|
6
11
|
/**
|
7
12
|
* Build a properly formatted message array for AI providers
|
8
13
|
* Combines system prompt, conversation history, and current user prompt
|
@@ -46,3 +51,277 @@ export function buildMessagesArray(options) {
|
|
46
51
|
}
|
47
52
|
return messages;
|
48
53
|
}
|
54
|
+
/**
|
55
|
+
* Build multimodal message array with image support
|
56
|
+
* Detects when images are present and routes through provider adapter
|
57
|
+
*/
|
58
|
+
export async function buildMultimodalMessagesArray(options, provider, model) {
|
59
|
+
// Check if this is a multimodal request
|
60
|
+
const hasImages = (options.input.images && options.input.images.length > 0) ||
|
61
|
+
(options.input.content &&
|
62
|
+
options.input.content.some((c) => c.type === "image"));
|
63
|
+
// If no images, use standard message building and convert to MultimodalChatMessage[]
|
64
|
+
if (!hasImages) {
|
65
|
+
const standardMessages = buildMessagesArray(options);
|
66
|
+
return standardMessages.map((msg) => ({ ...msg, content: msg.content }));
|
67
|
+
}
|
68
|
+
// Validate provider supports vision
|
69
|
+
if (!ProviderImageAdapter.supportsVision(provider, model)) {
|
70
|
+
throw new Error(`Provider ${provider} with model ${model} does not support vision processing. ` +
|
71
|
+
`Supported providers: ${ProviderImageAdapter.getVisionProviders().join(", ")}`);
|
72
|
+
}
|
73
|
+
const messages = [];
|
74
|
+
// Build enhanced system prompt
|
75
|
+
let systemPrompt = options.systemPrompt?.trim() || "";
|
76
|
+
// Add conversation-aware instructions when history exists
|
77
|
+
const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0;
|
78
|
+
if (hasConversationHistory) {
|
79
|
+
systemPrompt = `${systemPrompt.trim()}${CONVERSATION_INSTRUCTIONS}`;
|
80
|
+
}
|
81
|
+
// Add system message if we have one
|
82
|
+
if (systemPrompt.trim()) {
|
83
|
+
messages.push({
|
84
|
+
role: "system",
|
85
|
+
content: systemPrompt.trim(),
|
86
|
+
});
|
87
|
+
}
|
88
|
+
// Add conversation history if available
|
89
|
+
if (hasConversationHistory && options.conversationHistory) {
|
90
|
+
// Convert conversation history to MultimodalChatMessage format
|
91
|
+
options.conversationHistory.forEach((msg) => {
|
92
|
+
messages.push({
|
93
|
+
role: msg.role,
|
94
|
+
content: msg.content,
|
95
|
+
});
|
96
|
+
});
|
97
|
+
}
|
98
|
+
// Handle multimodal content
|
99
|
+
try {
|
100
|
+
let userContent;
|
101
|
+
if (options.input.content && options.input.content.length > 0) {
|
102
|
+
// Advanced content format - convert to provider-specific format
|
103
|
+
userContent = await convertContentToProviderFormat(options.input.content, provider, model);
|
104
|
+
}
|
105
|
+
else if (options.input.images && options.input.images.length > 0) {
|
106
|
+
// Simple images format - convert to provider-specific format
|
107
|
+
userContent = await convertSimpleImagesToProviderFormat(options.input.text, options.input.images, provider, model);
|
108
|
+
}
|
109
|
+
else {
|
110
|
+
// Text-only fallback
|
111
|
+
userContent = options.input.text;
|
112
|
+
}
|
113
|
+
// 🔧 CRITICAL FIX: Handle multimodal content properly for Vercel AI SDK
|
114
|
+
if (typeof userContent === "string") {
|
115
|
+
// Simple text content - use standard MultimodalChatMessage format
|
116
|
+
messages.push({
|
117
|
+
role: "user",
|
118
|
+
content: userContent,
|
119
|
+
});
|
120
|
+
}
|
121
|
+
else {
|
122
|
+
// 🔧 MULTIMODAL CONTENT: Wrap the content array in a proper message object
|
123
|
+
// The Vercel AI SDK expects messages with multimodal content arrays
|
124
|
+
messages.push({
|
125
|
+
role: "user",
|
126
|
+
content: userContent,
|
127
|
+
});
|
128
|
+
}
|
129
|
+
return messages;
|
130
|
+
}
|
131
|
+
catch (error) {
|
132
|
+
MultimodalLogger.logError("MULTIMODAL_BUILD", error, {
|
133
|
+
provider,
|
134
|
+
model,
|
135
|
+
hasImages,
|
136
|
+
imageCount: options.input.images?.length || 0,
|
137
|
+
});
|
138
|
+
throw error;
|
139
|
+
}
|
140
|
+
}
|
141
|
+
/**
|
142
|
+
* Convert advanced content format to provider-specific format
|
143
|
+
*/
|
144
|
+
async function convertContentToProviderFormat(content, provider, _model) {
|
145
|
+
const textContent = content.find((c) => c.type === "text");
|
146
|
+
const imageContent = content.filter((c) => c.type === "image");
|
147
|
+
if (!textContent) {
|
148
|
+
throw new Error("Multimodal content must include at least one text element");
|
149
|
+
}
|
150
|
+
if (imageContent.length === 0) {
|
151
|
+
return textContent.text;
|
152
|
+
}
|
153
|
+
// Extract images as Buffer | string array
|
154
|
+
const images = imageContent.map((img) => img.data);
|
155
|
+
return await convertSimpleImagesToProviderFormat(textContent.text, images, provider, _model);
|
156
|
+
}
|
157
|
+
/**
|
158
|
+
* Check if a string is an internet URL
|
159
|
+
*/
|
160
|
+
function isInternetUrl(input) {
|
161
|
+
return input.startsWith("http://") || input.startsWith("https://");
|
162
|
+
}
|
163
|
+
/**
|
164
|
+
* Download image from URL and convert to base64 data URI
|
165
|
+
*/
|
166
|
+
async function downloadImageFromUrl(url) {
|
167
|
+
try {
|
168
|
+
const response = await request(url, {
|
169
|
+
method: "GET",
|
170
|
+
headersTimeout: 10000, // 10 second timeout for headers
|
171
|
+
bodyTimeout: 30000, // 30 second timeout for body
|
172
|
+
maxRedirections: 5,
|
173
|
+
});
|
174
|
+
if (response.statusCode !== 200) {
|
175
|
+
throw new Error(`HTTP ${response.statusCode}: Failed to download image from ${url}`);
|
176
|
+
}
|
177
|
+
// Get content type from headers
|
178
|
+
const contentType = response.headers["content-type"] || "image/jpeg";
|
179
|
+
// Validate it's an image
|
180
|
+
if (!contentType.startsWith("image/")) {
|
181
|
+
throw new Error(`URL does not point to an image. Content-Type: ${contentType}`);
|
182
|
+
}
|
183
|
+
// Read the response body
|
184
|
+
const chunks = [];
|
185
|
+
for await (const chunk of response.body) {
|
186
|
+
chunks.push(chunk);
|
187
|
+
}
|
188
|
+
const buffer = Buffer.concat(chunks);
|
189
|
+
// Check file size (limit to 10MB)
|
190
|
+
const maxSize = 10 * 1024 * 1024; // 10MB
|
191
|
+
if (buffer.length > maxSize) {
|
192
|
+
throw new Error(`Image too large: ${buffer.length} bytes (max: ${maxSize} bytes)`);
|
193
|
+
}
|
194
|
+
// Convert to base64 data URI
|
195
|
+
const base64 = buffer.toString("base64");
|
196
|
+
const dataUri = `data:${contentType};base64,${base64}`;
|
197
|
+
return dataUri;
|
198
|
+
}
|
199
|
+
catch (error) {
|
200
|
+
MultimodalLogger.logError("URL_DOWNLOAD_FAILED", error, { url });
|
201
|
+
throw new Error(`Failed to download image from ${url}: ${error instanceof Error ? error.message : String(error)}`);
|
202
|
+
}
|
203
|
+
}
|
204
|
+
/**
|
205
|
+
* Convert simple images format to Vercel AI SDK format with smart auto-detection
|
206
|
+
* - URLs: Downloaded and converted to base64 for Vercel AI SDK compatibility
|
207
|
+
* - Local files: Converted to base64 for Vercel AI SDK compatibility
|
208
|
+
* - Buffers/Data URIs: Processed normally
|
209
|
+
*/
|
210
|
+
async function convertSimpleImagesToProviderFormat(text, images, provider, _model) {
|
211
|
+
// For Vercel AI SDK, we need to return the content in the standard format
|
212
|
+
// The Vercel AI SDK will handle provider-specific formatting internally
|
213
|
+
// Smart auto-detection: separate URLs from actual image data
|
214
|
+
const urlImages = [];
|
215
|
+
const actualImages = [];
|
216
|
+
images.forEach((image, _index) => {
|
217
|
+
if (typeof image === "string" && isInternetUrl(image)) {
|
218
|
+
// Internet URL - will be downloaded and converted to base64
|
219
|
+
urlImages.push(image);
|
220
|
+
}
|
221
|
+
else {
|
222
|
+
// Actual image data (file path, Buffer, data URI) - process for Vercel AI SDK
|
223
|
+
actualImages.push(image);
|
224
|
+
}
|
225
|
+
});
|
226
|
+
// Download URL images and add to actual images
|
227
|
+
for (const url of urlImages) {
|
228
|
+
try {
|
229
|
+
const downloadedDataUri = await downloadImageFromUrl(url);
|
230
|
+
actualImages.push(downloadedDataUri);
|
231
|
+
}
|
232
|
+
catch (error) {
|
233
|
+
MultimodalLogger.logError("URL_DOWNLOAD_FAILED_SKIPPING", error, { url });
|
234
|
+
// Continue processing other images even if one URL fails
|
235
|
+
logger.warn(`Failed to download image from ${url}, skipping: ${error instanceof Error ? error.message : String(error)}`);
|
236
|
+
}
|
237
|
+
}
|
238
|
+
const content = [{ type: "text", text }];
|
239
|
+
// Process all images (including downloaded URLs) for Vercel AI SDK
|
240
|
+
actualImages.forEach((image, index) => {
|
241
|
+
try {
|
242
|
+
// Vercel AI SDK expects { type: 'image', image: Buffer | string, mimeType?: string }
|
243
|
+
// For Vertex AI, we need to include mimeType
|
244
|
+
let imageData;
|
245
|
+
let mimeType = "image/jpeg"; // Default mime type
|
246
|
+
if (typeof image === "string") {
|
247
|
+
if (image.startsWith("data:")) {
|
248
|
+
// Data URI (including downloaded URLs) - extract mime type and use directly
|
249
|
+
const match = image.match(/^data:([^;]+);base64,(.+)$/);
|
250
|
+
if (match) {
|
251
|
+
mimeType = match[1];
|
252
|
+
imageData = image; // Keep as data URI for Vercel AI SDK
|
253
|
+
}
|
254
|
+
else {
|
255
|
+
imageData = image;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
else if (isInternetUrl(image)) {
|
259
|
+
// This should not happen as URLs are processed separately above
|
260
|
+
// But handle it gracefully just in case
|
261
|
+
throw new Error(`Unprocessed URL found in actualImages: ${image}`);
|
262
|
+
}
|
263
|
+
else {
|
264
|
+
// File path string - convert to base64 data URI
|
265
|
+
try {
|
266
|
+
if (existsSync(image)) {
|
267
|
+
const buffer = readFileSync(image);
|
268
|
+
const base64 = buffer.toString("base64");
|
269
|
+
// Detect mime type from file extension
|
270
|
+
const ext = image.toLowerCase().split(".").pop();
|
271
|
+
switch (ext) {
|
272
|
+
case "png":
|
273
|
+
mimeType = "image/png";
|
274
|
+
break;
|
275
|
+
case "gif":
|
276
|
+
mimeType = "image/gif";
|
277
|
+
break;
|
278
|
+
case "webp":
|
279
|
+
mimeType = "image/webp";
|
280
|
+
break;
|
281
|
+
case "bmp":
|
282
|
+
mimeType = "image/bmp";
|
283
|
+
break;
|
284
|
+
case "tiff":
|
285
|
+
case "tif":
|
286
|
+
mimeType = "image/tiff";
|
287
|
+
break;
|
288
|
+
default:
|
289
|
+
mimeType = "image/jpeg";
|
290
|
+
break;
|
291
|
+
}
|
292
|
+
imageData = `data:${mimeType};base64,${base64}`;
|
293
|
+
}
|
294
|
+
else {
|
295
|
+
throw new Error(`Image file not found: ${image}`);
|
296
|
+
}
|
297
|
+
}
|
298
|
+
catch (error) {
|
299
|
+
MultimodalLogger.logError("FILE_PATH_CONVERSION", error, {
|
300
|
+
index,
|
301
|
+
filePath: image,
|
302
|
+
});
|
303
|
+
throw new Error(`Failed to convert file path to base64: ${image}. ${error}`);
|
304
|
+
}
|
305
|
+
}
|
306
|
+
}
|
307
|
+
else {
|
308
|
+
// Buffer - convert to base64 data URI
|
309
|
+
const base64 = image.toString("base64");
|
310
|
+
imageData = `data:${mimeType};base64,${base64}`;
|
311
|
+
}
|
312
|
+
content.push({
|
313
|
+
type: "image",
|
314
|
+
image: imageData,
|
315
|
+
mimeType: mimeType, // Add mimeType for Vertex AI compatibility
|
316
|
+
});
|
317
|
+
}
|
318
|
+
catch (error) {
|
319
|
+
MultimodalLogger.logError("ADD_IMAGE_TO_CONTENT", error, {
|
320
|
+
index,
|
321
|
+
provider,
|
322
|
+
});
|
323
|
+
throw error;
|
324
|
+
}
|
325
|
+
});
|
326
|
+
return content;
|
327
|
+
}
|
@@ -0,0 +1,107 @@
|
|
1
|
+
/**
|
2
|
+
* Model Router for NeuroLink Orchestration
|
3
|
+
* Routes tasks to optimal models based on classification and requirements
|
4
|
+
*/
|
5
|
+
import type { TaskType } from "../types/taskClassificationTypes.js";
|
6
|
+
export interface ModelRoute {
|
7
|
+
provider: string;
|
8
|
+
model: string;
|
9
|
+
reasoning: string;
|
10
|
+
confidence: number;
|
11
|
+
}
|
12
|
+
export interface ModelRoutingOptions {
|
13
|
+
/** Override the task classification */
|
14
|
+
forceTaskType?: TaskType;
|
15
|
+
/** Require specific performance characteristics */
|
16
|
+
requireFast?: boolean;
|
17
|
+
/** Require specific capability (reasoning, creativity, etc.) */
|
18
|
+
requireCapability?: string;
|
19
|
+
/** Fallback strategy if primary choice fails */
|
20
|
+
fallbackStrategy?: "fast" | "reasoning" | "auto";
|
21
|
+
}
|
22
|
+
/**
|
23
|
+
* Model configurations for different task types and providers
|
24
|
+
*/
|
25
|
+
declare const MODEL_CONFIGS: {
|
26
|
+
readonly fast: {
|
27
|
+
readonly primary: {
|
28
|
+
readonly provider: "vertex";
|
29
|
+
readonly model: "gemini-2.5-flash";
|
30
|
+
readonly capabilities: readonly ["speed", "general", "code", "basic-reasoning"];
|
31
|
+
readonly avgResponseTime: 800;
|
32
|
+
readonly costPerToken: 0.0001;
|
33
|
+
readonly reasoning: "Optimized for speed and efficiency via Vertex AI";
|
34
|
+
};
|
35
|
+
readonly fallback: {
|
36
|
+
readonly provider: "vertex";
|
37
|
+
readonly model: "gemini-2.5-pro";
|
38
|
+
readonly capabilities: readonly ["speed", "general", "basic-reasoning"];
|
39
|
+
readonly avgResponseTime: 1200;
|
40
|
+
readonly costPerToken: 0.0002;
|
41
|
+
readonly reasoning: "Vertex AI Gemini Pro fallback";
|
42
|
+
};
|
43
|
+
};
|
44
|
+
readonly reasoning: {
|
45
|
+
readonly primary: {
|
46
|
+
readonly provider: "vertex";
|
47
|
+
readonly model: "claude-sonnet-4@20250514";
|
48
|
+
readonly capabilities: readonly ["reasoning", "analysis", "complex-logic", "code", "creativity"];
|
49
|
+
readonly avgResponseTime: 3000;
|
50
|
+
readonly costPerToken: 0.003;
|
51
|
+
readonly reasoning: "Advanced reasoning and analysis via Claude Sonnet 4 on Vertex AI";
|
52
|
+
};
|
53
|
+
readonly fallback: {
|
54
|
+
readonly provider: "vertex";
|
55
|
+
readonly model: "claude-opus-4@20250514";
|
56
|
+
readonly capabilities: readonly ["reasoning", "analysis", "complex-logic", "code", "creativity", "agentic"];
|
57
|
+
readonly avgResponseTime: 4000;
|
58
|
+
readonly costPerToken: 0.005;
|
59
|
+
readonly reasoning: "Claude Opus 4 fallback on Vertex AI for most complex tasks";
|
60
|
+
};
|
61
|
+
};
|
62
|
+
};
|
63
|
+
/**
|
64
|
+
* Model Router
|
65
|
+
* Intelligently routes tasks to optimal models based on classification
|
66
|
+
*/
|
67
|
+
export declare class ModelRouter {
|
68
|
+
/**
|
69
|
+
* Route a prompt to the optimal model configuration
|
70
|
+
*/
|
71
|
+
static route(prompt: string, options?: ModelRoutingOptions): ModelRoute;
|
72
|
+
/**
|
73
|
+
* Get fallback route if primary route fails
|
74
|
+
*/
|
75
|
+
static getFallbackRoute(prompt: string, primaryRoute: ModelRoute, options?: ModelRoutingOptions): ModelRoute;
|
76
|
+
/**
|
77
|
+
* Determine task type from a model route
|
78
|
+
*/
|
79
|
+
private static getTaskTypeFromRoute;
|
80
|
+
/**
|
81
|
+
* Get all available model configurations
|
82
|
+
*/
|
83
|
+
static getAvailableModels(): typeof MODEL_CONFIGS;
|
84
|
+
/**
|
85
|
+
* Validate model availability for a given route
|
86
|
+
*/
|
87
|
+
static validateRoute(route: ModelRoute): Promise<boolean>;
|
88
|
+
/**
|
89
|
+
* Get routing statistics for multiple prompts
|
90
|
+
*/
|
91
|
+
static getRoutingStats(prompts: string[]): {
|
92
|
+
total: number;
|
93
|
+
fastRoutes: number;
|
94
|
+
reasoningRoutes: number;
|
95
|
+
averageConfidence: number;
|
96
|
+
providerDistribution: Record<string, number>;
|
97
|
+
};
|
98
|
+
/**
|
99
|
+
* Estimate cost and performance for a route
|
100
|
+
*/
|
101
|
+
static getRouteEstimates(route: ModelRoute, estimatedTokens?: number): {
|
102
|
+
estimatedCost: number;
|
103
|
+
estimatedResponseTime: number;
|
104
|
+
capabilities: string[];
|
105
|
+
};
|
106
|
+
}
|
107
|
+
export {};
|
@@ -0,0 +1,292 @@
|
|
1
|
+
/**
|
2
|
+
* Model Router for NeuroLink Orchestration
|
3
|
+
* Routes tasks to optimal models based on classification and requirements
|
4
|
+
*/
|
5
|
+
import { logger } from "./logger.js";
|
6
|
+
import { BinaryTaskClassifier } from "./taskClassifier.js";
|
7
|
+
import { redactForRouting } from "./promptRedaction.js";
|
8
|
+
/**
|
9
|
+
* Routing configuration constants
|
10
|
+
*/
|
11
|
+
const ROUTING_CONFIG = {
|
12
|
+
minRouteConfidence: 0.3,
|
13
|
+
maxRouteConfidence: 0.95,
|
14
|
+
confidenceBoost: 0.1,
|
15
|
+
};
|
16
|
+
/**
|
17
|
+
* Model configurations for different task types and providers
|
18
|
+
*/
|
19
|
+
const MODEL_CONFIGS = {
|
20
|
+
fast: {
|
21
|
+
primary: {
|
22
|
+
provider: "vertex",
|
23
|
+
model: "gemini-2.5-flash",
|
24
|
+
capabilities: ["speed", "general", "code", "basic-reasoning"],
|
25
|
+
avgResponseTime: 800, // ms
|
26
|
+
costPerToken: 0.0001,
|
27
|
+
reasoning: "Optimized for speed and efficiency via Vertex AI",
|
28
|
+
},
|
29
|
+
fallback: {
|
30
|
+
provider: "vertex",
|
31
|
+
model: "gemini-2.5-pro",
|
32
|
+
capabilities: ["speed", "general", "basic-reasoning"],
|
33
|
+
avgResponseTime: 1200,
|
34
|
+
costPerToken: 0.0002,
|
35
|
+
reasoning: "Vertex AI Gemini Pro fallback",
|
36
|
+
},
|
37
|
+
},
|
38
|
+
reasoning: {
|
39
|
+
primary: {
|
40
|
+
provider: "vertex",
|
41
|
+
model: "claude-sonnet-4@20250514",
|
42
|
+
capabilities: [
|
43
|
+
"reasoning",
|
44
|
+
"analysis",
|
45
|
+
"complex-logic",
|
46
|
+
"code",
|
47
|
+
"creativity",
|
48
|
+
],
|
49
|
+
avgResponseTime: 3000, // ms
|
50
|
+
costPerToken: 0.003,
|
51
|
+
reasoning: "Advanced reasoning and analysis via Claude Sonnet 4 on Vertex AI",
|
52
|
+
},
|
53
|
+
fallback: {
|
54
|
+
provider: "vertex",
|
55
|
+
model: "claude-opus-4@20250514",
|
56
|
+
capabilities: [
|
57
|
+
"reasoning",
|
58
|
+
"analysis",
|
59
|
+
"complex-logic",
|
60
|
+
"code",
|
61
|
+
"creativity",
|
62
|
+
"agentic",
|
63
|
+
],
|
64
|
+
avgResponseTime: 4000,
|
65
|
+
costPerToken: 0.005,
|
66
|
+
reasoning: "Claude Opus 4 fallback on Vertex AI for most complex tasks",
|
67
|
+
},
|
68
|
+
},
|
69
|
+
};
|
70
|
+
/**
|
71
|
+
* Model Router
|
72
|
+
* Intelligently routes tasks to optimal models based on classification
|
73
|
+
*/
|
74
|
+
export class ModelRouter {
|
75
|
+
/**
|
76
|
+
* Route a prompt to the optimal model configuration
|
77
|
+
*/
|
78
|
+
static route(prompt, options = {}) {
|
79
|
+
const startTime = Date.now();
|
80
|
+
// 1. Classify the task if not overridden
|
81
|
+
let classification;
|
82
|
+
if (options.forceTaskType) {
|
83
|
+
classification = {
|
84
|
+
type: options.forceTaskType,
|
85
|
+
confidence: ROUTING_CONFIG.maxRouteConfidence, // Use maxRouteConfidence instead of 1.0
|
86
|
+
reasoning: "forced task type",
|
87
|
+
};
|
88
|
+
}
|
89
|
+
else {
|
90
|
+
classification = BinaryTaskClassifier.classify(prompt);
|
91
|
+
}
|
92
|
+
// 2. Apply special requirements
|
93
|
+
let taskType = classification.type;
|
94
|
+
const reasons = [classification.reasoning];
|
95
|
+
if (options.requireFast) {
|
96
|
+
taskType = "fast";
|
97
|
+
reasons.push("speed required");
|
98
|
+
}
|
99
|
+
if (options.requireCapability) {
|
100
|
+
// Check if the capability suggests a specific task type
|
101
|
+
const capability = options.requireCapability.toLowerCase();
|
102
|
+
if (["analysis", "reasoning", "complex", "research"].some((c) => capability.includes(c))) {
|
103
|
+
taskType = "reasoning";
|
104
|
+
reasons.push(`capability: ${capability}`);
|
105
|
+
}
|
106
|
+
else if (["speed", "quick", "fast", "simple"].some((c) => capability.includes(c))) {
|
107
|
+
taskType = "fast";
|
108
|
+
reasons.push(`capability: ${capability}`);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
// 3. Select model configuration
|
112
|
+
const config = MODEL_CONFIGS[taskType];
|
113
|
+
const selectedConfig = config.primary;
|
114
|
+
// 4. Calculate confidence based on multiple factors
|
115
|
+
let confidence = classification.confidence;
|
116
|
+
// Adjust confidence based on prompt characteristics
|
117
|
+
if (taskType === "fast" && prompt.length < 30) {
|
118
|
+
confidence = Math.min(ROUTING_CONFIG.maxRouteConfidence, confidence + ROUTING_CONFIG.confidenceBoost);
|
119
|
+
reasons.push("very short prompt");
|
120
|
+
}
|
121
|
+
if (taskType === "reasoning" && prompt.length > 150) {
|
122
|
+
confidence = Math.min(ROUTING_CONFIG.maxRouteConfidence, confidence + ROUTING_CONFIG.confidenceBoost);
|
123
|
+
reasons.push("detailed prompt");
|
124
|
+
}
|
125
|
+
// Ensure final confidence is within configured bounds
|
126
|
+
confidence = Math.max(ROUTING_CONFIG.minRouteConfidence, Math.min(ROUTING_CONFIG.maxRouteConfidence, confidence));
|
127
|
+
// 5. Create route result
|
128
|
+
const route = {
|
129
|
+
provider: selectedConfig.provider,
|
130
|
+
model: selectedConfig.model,
|
131
|
+
reasoning: reasons.join(", "),
|
132
|
+
confidence,
|
133
|
+
};
|
134
|
+
const routingTime = Date.now() - startTime;
|
135
|
+
logger.debug("Model routing decision", {
|
136
|
+
prompt: redactForRouting(prompt),
|
137
|
+
taskType,
|
138
|
+
route: {
|
139
|
+
provider: route.provider,
|
140
|
+
model: route.model,
|
141
|
+
confidence: route.confidence.toFixed(2),
|
142
|
+
},
|
143
|
+
reasoning: route.reasoning,
|
144
|
+
routingTime: `${routingTime}ms`,
|
145
|
+
options: Object.keys(options).length > 0 ? options : undefined,
|
146
|
+
});
|
147
|
+
return route;
|
148
|
+
}
|
149
|
+
/**
|
150
|
+
* Get fallback route if primary route fails
|
151
|
+
*/
|
152
|
+
static getFallbackRoute(prompt, primaryRoute, options = {}) {
|
153
|
+
// Determine fallback strategy
|
154
|
+
let fallbackType;
|
155
|
+
if (options.fallbackStrategy) {
|
156
|
+
if (options.fallbackStrategy === "auto") {
|
157
|
+
// Use opposite of primary for fallback
|
158
|
+
const primaryType = this.getTaskTypeFromRoute(primaryRoute);
|
159
|
+
fallbackType = primaryType === "fast" ? "reasoning" : "fast";
|
160
|
+
}
|
161
|
+
else {
|
162
|
+
fallbackType = options.fallbackStrategy;
|
163
|
+
}
|
164
|
+
}
|
165
|
+
else {
|
166
|
+
// Default: use fallback model of same type
|
167
|
+
fallbackType = this.getTaskTypeFromRoute(primaryRoute);
|
168
|
+
}
|
169
|
+
const config = MODEL_CONFIGS[fallbackType];
|
170
|
+
const fallbackConfig = config.fallback;
|
171
|
+
const route = {
|
172
|
+
provider: fallbackConfig.provider,
|
173
|
+
model: fallbackConfig.model,
|
174
|
+
reasoning: `fallback from ${primaryRoute.provider}/${primaryRoute.model}`,
|
175
|
+
confidence: Math.max(ROUTING_CONFIG.minRouteConfidence, Math.min(ROUTING_CONFIG.maxRouteConfidence, primaryRoute.confidence - 0.2)),
|
176
|
+
};
|
177
|
+
logger.debug("Fallback route selected", {
|
178
|
+
originalRoute: `${primaryRoute.provider}/${primaryRoute.model}`,
|
179
|
+
fallbackRoute: `${route.provider}/${route.model}`,
|
180
|
+
fallbackType,
|
181
|
+
strategy: options.fallbackStrategy || "default",
|
182
|
+
});
|
183
|
+
return route;
|
184
|
+
}
|
185
|
+
/**
|
186
|
+
* Determine task type from a model route
|
187
|
+
*/
|
188
|
+
static getTaskTypeFromRoute(route) {
|
189
|
+
// Check which config matches this route
|
190
|
+
for (const [taskType, config] of Object.entries(MODEL_CONFIGS)) {
|
191
|
+
if (config.primary.provider === route.provider &&
|
192
|
+
config.primary.model === route.model) {
|
193
|
+
return taskType;
|
194
|
+
}
|
195
|
+
if (config.fallback.provider === route.provider &&
|
196
|
+
config.fallback.model === route.model) {
|
197
|
+
return taskType;
|
198
|
+
}
|
199
|
+
}
|
200
|
+
// Default fallback based on model name patterns
|
201
|
+
if (route.model.includes("flash") || route.model.includes("mini")) {
|
202
|
+
return "fast";
|
203
|
+
}
|
204
|
+
return "reasoning";
|
205
|
+
}
|
206
|
+
/**
|
207
|
+
* Get all available model configurations
|
208
|
+
*/
|
209
|
+
static getAvailableModels() {
|
210
|
+
return MODEL_CONFIGS;
|
211
|
+
}
|
212
|
+
/**
|
213
|
+
* Validate model availability for a given route
|
214
|
+
*/
|
215
|
+
static async validateRoute(route) {
|
216
|
+
try {
|
217
|
+
// This would typically check provider availability
|
218
|
+
// For now, just validate the configuration exists
|
219
|
+
const configs = Object.values(MODEL_CONFIGS).flatMap((config) => [
|
220
|
+
config.primary,
|
221
|
+
config.fallback,
|
222
|
+
]);
|
223
|
+
return configs.some((config) => config.provider === route.provider && config.model === route.model);
|
224
|
+
}
|
225
|
+
catch (error) {
|
226
|
+
logger.error("Route validation failed", {
|
227
|
+
route,
|
228
|
+
error: error instanceof Error ? error.message : String(error),
|
229
|
+
});
|
230
|
+
return false;
|
231
|
+
}
|
232
|
+
}
|
233
|
+
/**
|
234
|
+
* Get routing statistics for multiple prompts
|
235
|
+
*/
|
236
|
+
static getRoutingStats(prompts) {
|
237
|
+
const routes = prompts.map((prompt) => this.route(prompt));
|
238
|
+
// Handle empty prompts array to avoid divide-by-zero
|
239
|
+
if (routes.length === 0) {
|
240
|
+
const stats = {
|
241
|
+
total: 0,
|
242
|
+
fastRoutes: 0,
|
243
|
+
reasoningRoutes: 0,
|
244
|
+
averageConfidence: 0,
|
245
|
+
providerDistribution: {},
|
246
|
+
};
|
247
|
+
logger.debug("Routing statistics", stats);
|
248
|
+
return stats;
|
249
|
+
}
|
250
|
+
const stats = {
|
251
|
+
total: routes.length,
|
252
|
+
fastRoutes: routes.filter((r) => {
|
253
|
+
const taskType = this.getTaskTypeFromRoute(r);
|
254
|
+
return taskType === "fast";
|
255
|
+
}).length,
|
256
|
+
reasoningRoutes: routes.filter((r) => {
|
257
|
+
const taskType = this.getTaskTypeFromRoute(r);
|
258
|
+
return taskType === "reasoning";
|
259
|
+
}).length,
|
260
|
+
averageConfidence: routes.reduce((sum, r) => sum + r.confidence, 0) / routes.length,
|
261
|
+
providerDistribution: routes.reduce((dist, r) => {
|
262
|
+
dist[r.provider] = (dist[r.provider] || 0) + 1;
|
263
|
+
return dist;
|
264
|
+
}, {}),
|
265
|
+
};
|
266
|
+
logger.debug("Routing statistics", stats);
|
267
|
+
return stats;
|
268
|
+
}
|
269
|
+
/**
|
270
|
+
* Estimate cost and performance for a route
|
271
|
+
*/
|
272
|
+
static getRouteEstimates(route, estimatedTokens = 500) {
|
273
|
+
// Find the config for this route
|
274
|
+
const allConfigs = Object.values(MODEL_CONFIGS).flatMap((config) => [
|
275
|
+
config.primary,
|
276
|
+
config.fallback,
|
277
|
+
]);
|
278
|
+
const config = allConfigs.find((c) => c.provider === route.provider && c.model === route.model);
|
279
|
+
if (!config) {
|
280
|
+
return {
|
281
|
+
estimatedCost: 0,
|
282
|
+
estimatedResponseTime: 2000,
|
283
|
+
capabilities: [],
|
284
|
+
};
|
285
|
+
}
|
286
|
+
return {
|
287
|
+
estimatedCost: config.costPerToken * estimatedTokens,
|
288
|
+
estimatedResponseTime: config.avgResponseTime,
|
289
|
+
capabilities: [...config.capabilities],
|
290
|
+
};
|
291
|
+
}
|
292
|
+
}
|