@juspay/neurolink 7.45.0 → 7.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/adapters/providerImageAdapter.js +12 -0
  3. package/dist/cli/commands/config.d.ts +2 -2
  4. package/dist/core/constants.js +1 -1
  5. package/dist/evaluation/contextBuilder.d.ts +48 -0
  6. package/dist/evaluation/contextBuilder.js +134 -0
  7. package/dist/evaluation/index.d.ts +36 -0
  8. package/dist/evaluation/index.js +61 -0
  9. package/dist/evaluation/prompts.d.ts +22 -0
  10. package/dist/evaluation/prompts.js +73 -0
  11. package/dist/evaluation/ragasEvaluator.d.ts +28 -0
  12. package/dist/evaluation/ragasEvaluator.js +90 -0
  13. package/dist/evaluation/retryManager.d.ts +40 -0
  14. package/dist/evaluation/retryManager.js +78 -0
  15. package/dist/evaluation/scoring.d.ts +16 -0
  16. package/dist/evaluation/scoring.js +35 -0
  17. package/dist/lib/adapters/providerImageAdapter.js +12 -0
  18. package/dist/lib/core/constants.js +1 -1
  19. package/dist/lib/evaluation/contextBuilder.d.ts +48 -0
  20. package/dist/lib/evaluation/contextBuilder.js +134 -0
  21. package/dist/lib/evaluation/index.d.ts +36 -0
  22. package/dist/lib/evaluation/index.js +61 -0
  23. package/dist/lib/evaluation/prompts.d.ts +22 -0
  24. package/dist/lib/evaluation/prompts.js +73 -0
  25. package/dist/lib/evaluation/ragasEvaluator.d.ts +28 -0
  26. package/dist/lib/evaluation/ragasEvaluator.js +90 -0
  27. package/dist/lib/evaluation/retryManager.d.ts +40 -0
  28. package/dist/lib/evaluation/retryManager.js +78 -0
  29. package/dist/lib/evaluation/scoring.d.ts +16 -0
  30. package/dist/lib/evaluation/scoring.js +35 -0
  31. package/dist/lib/middleware/builtin/autoEvaluation.d.ts +14 -0
  32. package/dist/lib/middleware/builtin/autoEvaluation.js +181 -0
  33. package/dist/lib/middleware/factory.js +6 -0
  34. package/dist/lib/providers/azureOpenai.js +36 -3
  35. package/dist/lib/providers/googleAiStudio.js +37 -3
  36. package/dist/lib/providers/googleVertex.js +37 -3
  37. package/dist/lib/types/evaluation.d.ts +2 -0
  38. package/dist/lib/types/evaluationTypes.d.ts +142 -0
  39. package/dist/lib/types/evaluationTypes.js +1 -0
  40. package/dist/lib/types/middlewareTypes.d.ts +28 -2
  41. package/dist/lib/utils/imageProcessor.d.ts +44 -0
  42. package/dist/lib/utils/imageProcessor.js +159 -8
  43. package/dist/lib/utils/messageBuilder.d.ts +4 -6
  44. package/dist/lib/utils/messageBuilder.js +145 -1
  45. package/dist/middleware/builtin/autoEvaluation.d.ts +14 -0
  46. package/dist/middleware/builtin/autoEvaluation.js +181 -0
  47. package/dist/middleware/factory.js +6 -0
  48. package/dist/providers/azureOpenai.js +36 -3
  49. package/dist/providers/googleAiStudio.js +37 -3
  50. package/dist/providers/googleVertex.js +37 -3
  51. package/dist/types/evaluation.d.ts +2 -0
  52. package/dist/types/evaluationTypes.d.ts +142 -0
  53. package/dist/types/evaluationTypes.js +1 -0
  54. package/dist/types/middlewareTypes.d.ts +28 -2
  55. package/dist/utils/imageProcessor.d.ts +44 -0
  56. package/dist/utils/imageProcessor.js +159 -8
  57. package/dist/utils/messageBuilder.d.ts +4 -6
  58. package/dist/utils/messageBuilder.js +145 -1
  59. package/package.json +1 -1
@@ -0,0 +1,181 @@
1
+ /**
2
+ * @file Implements the Auto-Evaluation Middleware for ensuring response quality.
3
+ */
4
+ import { Evaluator } from "../../evaluation/index.js";
5
+ import { logger } from "../../utils/logger.js";
6
+ /**
7
+ * Creates the Auto-Evaluation middleware, which intercepts generation requests
8
+ * to evaluate the quality of the response. If the response quality is below a
9
+ * configured threshold, it can trigger retries with feedback.
10
+ *
11
+ * @param config - Configuration for the auto-evaluation middleware.
12
+ * @returns A `NeuroLinkMiddleware` object.
13
+ */
14
+ export function createAutoEvaluationMiddleware(config = {}) {
15
+ const metadata = {
16
+ id: "autoEvaluation",
17
+ name: "Auto Evaluation",
18
+ description: "Automatically evaluates response quality and retries if needed.",
19
+ priority: 90,
20
+ defaultEnabled: false, // Should be explicitly enabled
21
+ };
22
+ logger.debug("Auto-Evaluation Middleware Config:", config);
23
+ const middleware = {
24
+ wrapGenerate: async ({ doGenerate, params }) => {
25
+ const options = params;
26
+ const rawResult = await doGenerate();
27
+ const result = {
28
+ ...rawResult,
29
+ content: rawResult.text ?? "",
30
+ usage: {
31
+ input: rawResult.usage.promptTokens,
32
+ output: rawResult.usage.completionTokens,
33
+ total: rawResult.usage.promptTokens + rawResult.usage.completionTokens,
34
+ },
35
+ toolCalls: rawResult.toolCalls?.map((tc) => {
36
+ let parsedArgs = tc.args;
37
+ if (typeof tc.args === "string") {
38
+ try {
39
+ parsedArgs = JSON.parse(tc.args);
40
+ }
41
+ catch (e) {
42
+ logger.warn(`Failed to parse tool call args for tool ${tc.toolName}:`, e);
43
+ parsedArgs = tc.args; // Fallback to original string if parsing fails
44
+ }
45
+ }
46
+ return {
47
+ ...tc,
48
+ args: parsedArgs,
49
+ };
50
+ }),
51
+ };
52
+ const isBlocking = config.blocking !== false;
53
+ if (isBlocking) {
54
+ const evaluationResult = await performEvaluation(config, options, result);
55
+ return {
56
+ ...rawResult,
57
+ evaluationResult,
58
+ };
59
+ }
60
+ else {
61
+ performEvaluation(config, options, result).catch((err) => {
62
+ logger.error("Non-blocking auto-evaluation error:", err);
63
+ });
64
+ return rawResult;
65
+ }
66
+ },
67
+ wrapStream: async ({ doStream, params }) => {
68
+ const options = params;
69
+ const rawResult = await doStream();
70
+ const [streamForUser, streamForEvaluation] = rawResult.stream.tee();
71
+ // Non-blocking evaluation for streams
72
+ consumeAndEvaluateStream(config, options, streamForEvaluation).catch((err) => {
73
+ logger.error("Non-blocking stream auto-evaluation error:", err);
74
+ });
75
+ return {
76
+ ...rawResult,
77
+ stream: streamForUser,
78
+ };
79
+ },
80
+ };
81
+ return {
82
+ ...middleware,
83
+ metadata,
84
+ };
85
+ }
86
+ /**
87
+ * A common function to perform the evaluation logic.
88
+ * @param config The middleware configuration.
89
+ * @param options The text generation options.
90
+ * @param result The generation result.
91
+ */
92
+ async function performEvaluation(config, options, result) {
93
+ const isBlocking = config.blocking !== false;
94
+ const threshold = config.threshold ??
95
+ (Number(process.env.NEUROLINK_EVALUATION_THRESHOLD) || 7);
96
+ try {
97
+ const evaluator = new Evaluator({
98
+ threshold,
99
+ provider: config.provider,
100
+ promptGenerator: config.promptGenerator,
101
+ evaluationModel: config.evaluationModel,
102
+ });
103
+ const evaluationResult = await evaluator.evaluate(options, result, threshold, config);
104
+ if (config.onEvaluationComplete) {
105
+ await config.onEvaluationComplete(evaluationResult);
106
+ }
107
+ }
108
+ catch (error) {
109
+ logger.error("Error during auto-evaluation:", error);
110
+ if (isBlocking) {
111
+ throw error;
112
+ }
113
+ }
114
+ }
115
+ /**
116
+ * Consumes a stream to build the full response and then evaluates it.
117
+ * @param config The middleware configuration.
118
+ * @param options The generation options.
119
+ * @param stream The stream to consume.
120
+ */
121
+ async function consumeAndEvaluateStream(config, options, stream) {
122
+ let fullText = "";
123
+ let usage;
124
+ const toolCalls = [];
125
+ const reader = stream.getReader();
126
+ try {
127
+ while (true) {
128
+ const { done, value } = await reader.read();
129
+ if (done) {
130
+ break;
131
+ }
132
+ switch (value.type) {
133
+ case "text-delta":
134
+ fullText += value.textDelta;
135
+ break;
136
+ case "tool-call":
137
+ {
138
+ let parsedArgs;
139
+ try {
140
+ parsedArgs = JSON.parse(value.args);
141
+ }
142
+ catch (e) {
143
+ logger.warn(`Failed to parse tool call args for tool ${value.toolName}:`, e);
144
+ // In case of parsing failure, we can't assign a string.
145
+ // Let's use an object with the raw string to maintain type safety.
146
+ parsedArgs = { raw: value.args };
147
+ }
148
+ toolCalls.push({
149
+ toolCallId: value.toolCallId,
150
+ toolName: value.toolName,
151
+ args: parsedArgs,
152
+ });
153
+ }
154
+ break;
155
+ case "finish":
156
+ usage = {
157
+ input: value.usage.promptTokens,
158
+ output: value.usage.completionTokens,
159
+ total: value.usage.promptTokens + value.usage.completionTokens,
160
+ };
161
+ break;
162
+ }
163
+ }
164
+ }
165
+ finally {
166
+ reader.releaseLock();
167
+ }
168
+ const result = {
169
+ content: fullText,
170
+ usage,
171
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
172
+ };
173
+ // For streams, evaluation is always non-blocking from the user's perspective.
174
+ if (config.blocking) {
175
+ logger.warn("Auto-evaluation 'blocking' mode is not supported for streaming responses. Evaluation will proceed non-blockingly.");
176
+ }
177
+ // Create a new config object to force non-blocking behavior for the evaluation function
178
+ const nonBlockingConfig = { ...config, blocking: false };
179
+ await performEvaluation(nonBlockingConfig, options, result);
180
+ }
181
+ export default createAutoEvaluationMiddleware;
@@ -2,6 +2,7 @@ import { wrapLanguageModel } from "ai";
2
2
  import { MiddlewareRegistry } from "./registry.js";
3
3
  import { createAnalyticsMiddleware } from "./builtin/analytics.js";
4
4
  import { createGuardrailsMiddleware } from "./builtin/guardrails.js";
5
+ import { createAutoEvaluationMiddleware } from "./builtin/autoEvaluation.js";
5
6
  import { logger } from "../utils/logger.js";
6
7
  /**
7
8
  * Middleware factory for creating and applying middleware chains.
@@ -24,6 +25,7 @@ export class MiddlewareFactory {
24
25
  const builtInMiddlewareCreators = {
25
26
  analytics: createAnalyticsMiddleware,
26
27
  guardrails: createGuardrailsMiddleware,
28
+ autoEvaluation: createAutoEvaluationMiddleware,
27
29
  };
28
30
  // Register built-in presets
29
31
  this.registerPreset({
@@ -54,6 +56,7 @@ export class MiddlewareFactory {
54
56
  if (!this.registry.has(middlewareId)) {
55
57
  const creator = builtInMiddlewareCreators[middlewareId];
56
58
  const config = options.middlewareConfig?.[middlewareId]?.config;
59
+ logger.debug(`Registering built-in middleware '${middlewareId}'`, config);
57
60
  this.registry.register(creator(config));
58
61
  }
59
62
  }
@@ -92,6 +95,7 @@ export class MiddlewareFactory {
92
95
  const middlewareConfig = this.buildMiddlewareConfig(mergedOptions);
93
96
  // Re-register middleware with the correct configuration for this call
94
97
  for (const [id, config] of Object.entries(middlewareConfig)) {
98
+ logger.debug(`Configuring middleware '${id}'`, { config });
95
99
  if (config.enabled && this.registry.has(id)) {
96
100
  const creator = this.getCreator(id);
97
101
  if (creator) {
@@ -137,7 +141,9 @@ export class MiddlewareFactory {
137
141
  const builtInMiddlewareCreators = {
138
142
  analytics: createAnalyticsMiddleware,
139
143
  guardrails: createGuardrailsMiddleware,
144
+ autoEvaluation: createAutoEvaluationMiddleware,
140
145
  };
146
+ logger.debug("Getting creator for middleware ID:", id);
141
147
  return builtInMiddlewareCreators[id];
142
148
  }
143
149
  /**
@@ -4,7 +4,7 @@ import { BaseProvider } from "../core/baseProvider.js";
4
4
  import { APIVersions } from "../types/providers.js";
5
5
  import { validateApiKey, createAzureAPIKeyConfig, createAzureEndpointConfig, } from "../utils/providerConfig.js";
6
6
  import { logger } from "../utils/logger.js";
7
- import { buildMessagesArray } from "../utils/messageBuilder.js";
7
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
8
8
  import { createProxyFetch } from "../proxy/proxyFetch.js";
9
9
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
10
10
  export class AzureOpenAIProvider extends BaseProvider {
@@ -109,8 +109,41 @@ export class AzureOpenAIProvider extends BaseProvider {
109
109
  })),
110
110
  });
111
111
  }
112
- // Build message array from options
113
- const messages = buildMessagesArray(options);
112
+ // Build message array from options with multimodal support
113
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
114
+ let messages;
115
+ if (hasMultimodalInput) {
116
+ logger.debug(`Azure OpenAI: Detected multimodal input, using multimodal message builder`, {
117
+ hasImages: !!options.input?.images?.length,
118
+ imageCount: options.input?.images?.length || 0,
119
+ hasContent: !!options.input?.content?.length,
120
+ contentCount: options.input?.content?.length || 0,
121
+ });
122
+ // Create multimodal options for buildMultimodalMessagesArray
123
+ const multimodalOptions = {
124
+ input: {
125
+ text: options.input?.text || "",
126
+ images: options.input?.images,
127
+ content: options.input?.content,
128
+ },
129
+ systemPrompt: options.systemPrompt,
130
+ conversationHistory: options.conversationMessages,
131
+ provider: this.providerName,
132
+ model: this.modelName,
133
+ temperature: options.temperature,
134
+ maxTokens: options.maxTokens,
135
+ enableAnalytics: options.enableAnalytics,
136
+ enableEvaluation: options.enableEvaluation,
137
+ context: options.context,
138
+ };
139
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
140
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
141
+ messages = convertToCoreMessages(mm);
142
+ }
143
+ else {
144
+ logger.debug(`Azure OpenAI: Text-only input, using standard message builder`);
145
+ messages = buildMessagesArray(options);
146
+ }
114
147
  const model = await this.getAISDKModelWithMiddleware(options);
115
148
  const stream = await streamText({
116
149
  model,
@@ -7,8 +7,9 @@ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
7
7
  import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
8
8
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
- import { buildMessagesArray } from "../utils/messageBuilder.js";
10
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
11
11
  // Google AI Live API types now imported from ../types/providerSpecific.js
12
+ // Import proper types for multimodal message handling
12
13
  // Create Google GenAI client
13
14
  async function createGoogleGenAIClient(apiKey) {
14
15
  const mod = await import("@google/genai");
@@ -90,8 +91,41 @@ export class GoogleAIStudioProvider extends BaseProvider {
90
91
  // Get tools consistently with generate method
91
92
  const shouldUseTools = !options.disableTools && this.supportsTools();
92
93
  const tools = shouldUseTools ? await this.getAllTools() : {};
93
- // Build message array from options
94
- const messages = buildMessagesArray(options);
94
+ // Build message array from options with multimodal support
95
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
96
+ let messages;
97
+ if (hasMultimodalInput) {
98
+ logger.debug(`Google AI Studio: Detected multimodal input, using multimodal message builder`, {
99
+ hasImages: !!options.input?.images?.length,
100
+ imageCount: options.input?.images?.length || 0,
101
+ hasContent: !!options.input?.content?.length,
102
+ contentCount: options.input?.content?.length || 0,
103
+ });
104
+ // Create multimodal options for buildMultimodalMessagesArray
105
+ const multimodalOptions = {
106
+ input: {
107
+ text: options.input?.text || "",
108
+ images: options.input?.images,
109
+ content: options.input?.content,
110
+ },
111
+ systemPrompt: options.systemPrompt,
112
+ conversationHistory: options.conversationMessages,
113
+ provider: this.providerName,
114
+ model: this.modelName,
115
+ temperature: options.temperature,
116
+ maxTokens: options.maxTokens,
117
+ enableAnalytics: options.enableAnalytics,
118
+ enableEvaluation: options.enableEvaluation,
119
+ context: options.context,
120
+ };
121
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
122
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
123
+ messages = convertToCoreMessages(mm);
124
+ }
125
+ else {
126
+ logger.debug(`Google AI Studio: Text-only input, using standard message builder`);
127
+ messages = buildMessagesArray(options);
128
+ }
95
129
  const result = await streamText({
96
130
  model,
97
131
  messages: messages,
@@ -11,8 +11,9 @@ import fs from "fs";
11
11
  import path from "path";
12
12
  import os from "os";
13
13
  import dns from "dns";
14
- import { buildMessagesArray } from "../utils/messageBuilder.js";
14
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
15
15
  import { createProxyFetch } from "../proxy/proxyFetch.js";
16
+ // Import proper types for multimodal message handling
16
17
  // Enhanced Anthropic support with direct imports
17
18
  // Using the dual provider architecture from Vercel AI SDK
18
19
  const hasAnthropicSupport = () => {
@@ -594,8 +595,41 @@ export class GoogleVertexProvider extends BaseProvider {
594
595
  try {
595
596
  // Validate stream options
596
597
  this.validateStreamOptionsOnly(options);
597
- // Build message array from options
598
- const messages = buildMessagesArray(options);
598
+ // Build message array from options with multimodal support
599
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
600
+ let messages;
601
+ if (hasMultimodalInput) {
602
+ logger.debug(`${functionTag}: Detected multimodal input, using multimodal message builder`, {
603
+ hasImages: !!options.input?.images?.length,
604
+ imageCount: options.input?.images?.length || 0,
605
+ hasContent: !!options.input?.content?.length,
606
+ contentCount: options.input?.content?.length || 0,
607
+ });
608
+ // Create multimodal options for buildMultimodalMessagesArray
609
+ const multimodalOptions = {
610
+ input: {
611
+ text: options.input?.text || "",
612
+ images: options.input?.images,
613
+ content: options.input?.content,
614
+ },
615
+ systemPrompt: options.systemPrompt,
616
+ conversationHistory: options.conversationMessages,
617
+ provider: this.providerName,
618
+ model: this.modelName,
619
+ temperature: options.temperature,
620
+ maxTokens: options.maxTokens,
621
+ enableAnalytics: options.enableAnalytics,
622
+ enableEvaluation: options.enableEvaluation,
623
+ context: options.context,
624
+ };
625
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
626
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
627
+ messages = convertToCoreMessages(mm);
628
+ }
629
+ else {
630
+ logger.debug(`${functionTag}: Text-only input, using standard message builder`);
631
+ messages = buildMessagesArray(options);
632
+ }
599
633
  const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
600
634
  // Get all available tools (direct + MCP + external) for streaming
601
635
  const shouldUseTools = !options.disableTools && this.supportsTools();
@@ -25,6 +25,8 @@ export type EvaluationData = {
25
25
  domainAlignment?: number;
26
26
  terminologyAccuracy?: number;
27
27
  toolEffectiveness?: number;
28
+ responseContent?: string;
29
+ queryContent?: string;
28
30
  isOffTopic: boolean;
29
31
  alertSeverity: AlertSeverity;
30
32
  reasoning: string;
@@ -0,0 +1,142 @@
1
+ import type { LanguageModelV1CallOptions } from "ai";
2
+ import type { TokenUsage } from "./analytics.js";
3
+ import type { GenerateResult } from "./generateTypes.js";
4
+ import type { ToolExecution } from "./tools.js";
5
+ /**
6
+ * Represents the analysis of the user's query intent.
7
+ * This provides a basic understanding of what the user is trying to achieve.
8
+ */
9
+ export interface QueryIntentAnalysis {
10
+ /** The type of query, e.g., asking a question or giving a command. */
11
+ type: "question" | "command" | "greeting" | "unknown";
12
+ /** The estimated complexity of the query. */
13
+ complexity: "low" | "medium" | "high";
14
+ /** Whether the query likely required the use of tools to be answered correctly. */
15
+ shouldHaveUsedTools: boolean;
16
+ }
17
+ /**
18
+ * Represents a single turn in an enhanced conversation history,
19
+ * including tool executions and evaluations for richer context.
20
+ */
21
+ export interface EnhancedConversationTurn {
22
+ /** The role of the speaker, either 'user' or 'assistant'. */
23
+ role: "user" | "assistant";
24
+ /** The content of the message. */
25
+ content: string;
26
+ /** The timestamp of the message. */
27
+ timestamp: string;
28
+ /** Any tools that were executed as part of this turn. */
29
+ toolExecutions?: ToolExecution[];
30
+ /** The evaluation result for this turn, if applicable. */
31
+ evaluation?: EvaluationResult;
32
+ }
33
+ /**
34
+ * Contains all the rich context needed for a thorough, RAGAS-style evaluation.
35
+ * This object is constructed by the `ContextBuilder` and used by the `RAGASEvaluator`.
36
+ */
37
+ export interface EnhancedEvaluationContext {
38
+ /** The original user query. */
39
+ userQuery: string;
40
+ /** An analysis of the user's query intent. */
41
+ queryAnalysis: QueryIntentAnalysis;
42
+ /** The AI's response that is being evaluated. */
43
+ aiResponse: string;
44
+ /** The AI provider that generated the response. */
45
+ provider: string;
46
+ /** The specific model that generated the response. */
47
+ model: string;
48
+ /** The parameters used for the generation call. */
49
+ generationParams: {
50
+ temperature?: number;
51
+ maxTokens?: number;
52
+ systemPrompt?: string;
53
+ };
54
+ /** A list of tools that were executed. */
55
+ toolExecutions: ToolExecution[];
56
+ /** The history of the conversation leading up to this turn. */
57
+ conversationHistory: EnhancedConversationTurn[];
58
+ /** The response time of the AI in milliseconds. */
59
+ responseTime: number;
60
+ /** The token usage for the generation. */
61
+ tokenUsage: TokenUsage;
62
+ /** The results of any previous evaluation attempts for this response. */
63
+ previousEvaluations?: EvaluationResult[];
64
+ /** The current attempt number for this evaluation (1-based). */
65
+ attemptNumber: number;
66
+ }
67
+ /**
68
+ * Represents the result of a single evaluation attempt, based on RAGAS principles.
69
+ */
70
+ export interface EvaluationResult {
71
+ /** The final, overall score for the response, typically from 1 to 10. */
72
+ finalScore: number;
73
+ /** How well the response addresses the user's query. */
74
+ relevanceScore: number;
75
+ /** The factual accuracy of the information in the response. */
76
+ accuracyScore: number;
77
+ /** How completely the response answers the user's query. */
78
+ completenessScore: number;
79
+ /** Whether the final score meets the passing threshold. */
80
+ isPassing: boolean;
81
+ /** Constructive response from the judge LLM on how to improve the response. */
82
+ reasoning: string;
83
+ /** Specific suggestions for improving the response. */
84
+ suggestedImprovements: string;
85
+ /** The raw, unparsed response from the judge LLM. */
86
+ rawEvaluationResponse: string;
87
+ /** The model used to perform the evaluation. */
88
+ evaluationModel: string;
89
+ /** The time taken for the evaluation in milliseconds. */
90
+ evaluationTime: number;
91
+ /** The attempt number for this evaluation. */
92
+ attemptNumber: number;
93
+ }
94
+ /**
95
+ * Provides detailed information when a response fails quality assurance checks.
96
+ */
97
+ export interface QualityErrorDetails {
98
+ /** The history of all evaluation attempts for this response. */
99
+ evaluationHistory: EvaluationResult[];
100
+ /** The final score of the last attempt. */
101
+ finalScore: number;
102
+ /** The total number of evaluation attempts made. */
103
+ attempts: number;
104
+ /** A summary message of the failure. */
105
+ message: string;
106
+ }
107
+ /**
108
+ * Configuration for the main `Evaluator` class.
109
+ */
110
+ export interface EvaluationConfig {
111
+ /** The minimum score (1-10) for a response to be considered passing. */
112
+ threshold?: number;
113
+ /** The evaluation strategy to use. Currently only 'ragas' is supported. */
114
+ evaluationStrategy?: "ragas" | "custom";
115
+ /** The model to use for the LLM-as-judge evaluation. */
116
+ evaluationModel?: string;
117
+ /** The maximum number of evaluation attempts before failing. */
118
+ maxAttempts?: number;
119
+ /** The provider to use for the evaluation model. */
120
+ provider?: string;
121
+ /** A custom evaluator function to override the default behavior. */
122
+ customEvaluator?: (options: LanguageModelV1CallOptions, result: GenerateResult) => Promise<{
123
+ evaluationResult: EvaluationResult;
124
+ evalContext: EnhancedEvaluationContext;
125
+ }>;
126
+ /** The score below which a response is considered off-topic. */
127
+ offTopicThreshold?: number;
128
+ /** The score below which a failing response is considered a high severity alert. */
129
+ highSeverityThreshold?: number;
130
+ /** An optional function to generate custom evaluation prompts. */
131
+ promptGenerator?: GetPromptFunction;
132
+ }
133
+ /**
134
+ * A function that generates the main body of an evaluation prompt.
135
+ */
136
+ export type GetPromptFunction = (context: {
137
+ userQuery: string;
138
+ history: string;
139
+ tools: string;
140
+ retryInfo: string;
141
+ aiResponse: string;
142
+ }) => string;
@@ -0,0 +1 @@
1
+ export {};
@@ -1,5 +1,7 @@
1
1
  import type { LanguageModelV1Middleware } from "ai";
2
2
  import type { JsonValue } from "../types/common.js";
3
+ import type { EvaluationData } from "./evaluation.js";
4
+ import type { GetPromptFunction } from "./evaluationTypes.js";
3
5
  /**
4
6
  * Metadata interface for NeuroLink middleware
5
7
  * Provides additional information about middleware without affecting execution
@@ -33,7 +35,7 @@ export interface MiddlewareConfig {
33
35
  /** Whether the middleware is enabled */
34
36
  enabled?: boolean;
35
37
  /** Middleware-specific configuration */
36
- config?: Record<string, JsonValue>;
38
+ config?: Record<string, unknown>;
37
39
  /** Conditions under which to apply this middleware */
38
40
  conditions?: MiddlewareConditions;
39
41
  }
@@ -108,7 +110,7 @@ export interface MiddlewareChainStats {
108
110
  /**
109
111
  * Built-in middleware types
110
112
  */
111
- export type BuiltInMiddlewareType = "analytics" | "guardrails" | "logging" | "caching" | "rateLimit" | "retry" | "timeout";
113
+ export type BuiltInMiddlewareType = "analytics" | "guardrails" | "logging" | "caching" | "rateLimit" | "retry" | "timeout" | "autoEvaluation";
112
114
  /**
113
115
  * Middleware preset configurations
114
116
  */
@@ -144,3 +146,27 @@ export interface MiddlewareFactoryOptions {
144
146
  collectStats?: boolean;
145
147
  };
146
148
  }
149
+ /**
150
+ * Configuration for the Auto-Evaluation Middleware.
151
+ */
152
+ export interface AutoEvaluationConfig {
153
+ /** The minimum score (1-10) for a response to be considered passing. */
154
+ threshold?: number;
155
+ /** The maximum number of retry attempts before failing. */
156
+ maxRetries?: number;
157
+ /** The model to use for the LLM-as-judge evaluation. */
158
+ evaluationModel?: string;
159
+ /**
160
+ * If true, the middleware will wait for the evaluation to complete before returning.
161
+ * If the evaluation fails, it will throw an error. Defaults to true.
162
+ */
163
+ blocking?: boolean;
164
+ /** A callback function to be invoked with the evaluation result. */
165
+ onEvaluationComplete?: (evaluation: EvaluationData) => void | Promise<void>;
166
+ /** The score below which a response is considered off-topic. */
167
+ offTopicThreshold?: number;
168
+ /** The score below which a failing response is considered a high severity alert. */
169
+ highSeverityThreshold?: number;
170
+ promptGenerator?: GetPromptFunction;
171
+ provider?: string;
172
+ }
@@ -81,4 +81,48 @@ export declare const imageUtils: {
81
81
  * Convert file size to human readable format
82
82
  */
83
83
  formatFileSize: (bytes: number) => string;
84
+ /**
85
+ * Convert Buffer to base64 string
86
+ */
87
+ bufferToBase64: (buffer: Buffer) => string;
88
+ /**
89
+ * Convert base64 string to Buffer
90
+ */
91
+ base64ToBuffer: (base64: string) => Buffer;
92
+ /**
93
+ * Convert file path to base64 data URI
94
+ */
95
+ fileToBase64DataUri: (filePath: string, maxBytes?: number) => Promise<string>;
96
+ /**
97
+ * Convert URL to base64 data URI by downloading the image
98
+ */
99
+ urlToBase64DataUri: (url: string, { timeoutMs, maxBytes }?: {
100
+ timeoutMs?: number | undefined;
101
+ maxBytes?: number | undefined;
102
+ }) => Promise<string>;
103
+ /**
104
+ * Extract base64 data from data URI
105
+ */
106
+ extractBase64FromDataUri: (dataUri: string) => string;
107
+ /**
108
+ * Extract MIME type from data URI
109
+ */
110
+ extractMimeTypeFromDataUri: (dataUri: string) => string;
111
+ /**
112
+ * Create data URI from base64 and MIME type
113
+ */
114
+ createDataUri: (base64: string, mimeType?: string) => string;
115
+ /**
116
+ * Validate base64 string format
117
+ */
118
+ isValidBase64: (str: string) => boolean;
119
+ /**
120
+ * Get base64 string size in bytes
121
+ */
122
+ getBase64Size: (base64: string) => number;
123
+ /**
124
+ * Compress base64 image by reducing quality (basic implementation)
125
+ * Note: This is a placeholder - for production use, consider using sharp or similar
126
+ */
127
+ compressBase64: (base64: string, _quality?: number) => string;
84
128
  };