@juspay/neurolink 7.44.0 → 7.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/cli/commands/config.d.ts +2 -2
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/core/factory.d.ts +3 -1
- package/dist/core/factory.js +5 -3
- package/dist/evaluation/contextBuilder.d.ts +48 -0
- package/dist/evaluation/contextBuilder.js +134 -0
- package/dist/evaluation/index.d.ts +36 -0
- package/dist/evaluation/index.js +61 -0
- package/dist/evaluation/prompts.d.ts +22 -0
- package/dist/evaluation/prompts.js +73 -0
- package/dist/evaluation/ragasEvaluator.d.ts +28 -0
- package/dist/evaluation/ragasEvaluator.js +90 -0
- package/dist/evaluation/retryManager.d.ts +40 -0
- package/dist/evaluation/retryManager.js +78 -0
- package/dist/evaluation/scoring.d.ts +16 -0
- package/dist/evaluation/scoring.js +35 -0
- package/dist/factories/providerFactory.d.ts +3 -3
- package/dist/factories/providerFactory.js +3 -3
- package/dist/factories/providerRegistry.js +6 -6
- package/dist/lib/core/factory.d.ts +3 -1
- package/dist/lib/core/factory.js +5 -3
- package/dist/lib/evaluation/contextBuilder.d.ts +48 -0
- package/dist/lib/evaluation/contextBuilder.js +134 -0
- package/dist/lib/evaluation/index.d.ts +36 -0
- package/dist/lib/evaluation/index.js +61 -0
- package/dist/lib/evaluation/prompts.d.ts +22 -0
- package/dist/lib/evaluation/prompts.js +73 -0
- package/dist/lib/evaluation/ragasEvaluator.d.ts +28 -0
- package/dist/lib/evaluation/ragasEvaluator.js +90 -0
- package/dist/lib/evaluation/retryManager.d.ts +40 -0
- package/dist/lib/evaluation/retryManager.js +78 -0
- package/dist/lib/evaluation/scoring.d.ts +16 -0
- package/dist/lib/evaluation/scoring.js +35 -0
- package/dist/lib/factories/providerFactory.d.ts +3 -3
- package/dist/lib/factories/providerFactory.js +3 -3
- package/dist/lib/factories/providerRegistry.js +6 -6
- package/dist/lib/middleware/builtin/autoEvaluation.d.ts +14 -0
- package/dist/lib/middleware/builtin/autoEvaluation.js +181 -0
- package/dist/lib/middleware/factory.js +6 -0
- package/dist/lib/neurolink.js +7 -3
- package/dist/lib/providers/amazonBedrock.d.ts +2 -1
- package/dist/lib/providers/amazonBedrock.js +6 -4
- package/dist/lib/providers/amazonSagemaker.d.ts +1 -1
- package/dist/lib/providers/amazonSagemaker.js +2 -2
- package/dist/lib/providers/googleVertex.d.ts +1 -1
- package/dist/lib/providers/googleVertex.js +9 -10
- package/dist/lib/providers/sagemaker/config.d.ts +7 -5
- package/dist/lib/providers/sagemaker/config.js +11 -6
- package/dist/lib/types/evaluation.d.ts +2 -0
- package/dist/lib/types/evaluationTypes.d.ts +142 -0
- package/dist/lib/types/evaluationTypes.js +1 -0
- package/dist/lib/types/generateTypes.d.ts +2 -0
- package/dist/lib/types/middlewareTypes.d.ts +28 -2
- package/dist/lib/types/streamTypes.d.ts +1 -0
- package/dist/middleware/builtin/autoEvaluation.d.ts +14 -0
- package/dist/middleware/builtin/autoEvaluation.js +181 -0
- package/dist/middleware/factory.js +6 -0
- package/dist/neurolink.js +7 -3
- package/dist/providers/amazonBedrock.d.ts +2 -1
- package/dist/providers/amazonBedrock.js +6 -4
- package/dist/providers/amazonSagemaker.d.ts +1 -1
- package/dist/providers/amazonSagemaker.js +2 -2
- package/dist/providers/googleVertex.d.ts +1 -1
- package/dist/providers/googleVertex.js +9 -10
- package/dist/providers/sagemaker/config.d.ts +7 -5
- package/dist/providers/sagemaker/config.js +11 -6
- package/dist/types/evaluation.d.ts +2 -0
- package/dist/types/evaluationTypes.d.ts +142 -0
- package/dist/types/evaluationTypes.js +1 -0
- package/dist/types/generateTypes.d.ts +2 -0
- package/dist/types/middlewareTypes.d.ts +28 -2
- package/dist/types/streamTypes.d.ts +1 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
## [7.46.0](https://github.com/juspay/neurolink/compare/v7.45.0...v7.46.0) (2025-09-24)
|
|
2
|
+
|
|
3
|
+
### Features
|
|
4
|
+
|
|
5
|
+
- **(auto-evaluation):** added auto evaluation for LLM response ([6f23fae](https://github.com/juspay/neurolink/commit/6f23fae5cacb1c0686257cc7ed547be675b68b23))
|
|
6
|
+
|
|
7
|
+
## [7.45.0](https://github.com/juspay/neurolink/compare/v7.44.0...v7.45.0) (2025-09-24)
|
|
8
|
+
|
|
9
|
+
### Features
|
|
10
|
+
|
|
11
|
+
- **(provider):** Add support to provide region while streaming or generating for few providers ([a0a5bed](https://github.com/juspay/neurolink/commit/a0a5bed2bba4118dde149713708e36d4d29e1aae))
|
|
12
|
+
|
|
1
13
|
## [7.44.0](https://github.com/juspay/neurolink/compare/v7.43.0...v7.44.0) (2025-09-24)
|
|
2
14
|
|
|
3
15
|
### Features
|
|
@@ -256,8 +256,8 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
256
256
|
maxTokens?: number | undefined;
|
|
257
257
|
defaultEvaluationDomain?: string | undefined;
|
|
258
258
|
}, {
|
|
259
|
-
maxTokens?: number | undefined;
|
|
260
259
|
temperature?: number | undefined;
|
|
260
|
+
maxTokens?: number | undefined;
|
|
261
261
|
outputFormat?: "text" | "json" | "yaml" | undefined;
|
|
262
262
|
enableLogging?: boolean | undefined;
|
|
263
263
|
enableCaching?: boolean | undefined;
|
|
@@ -606,8 +606,8 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
606
606
|
defaultProvider?: "openai" | "anthropic" | "vertex" | "google-ai" | "auto" | "bedrock" | "azure" | "ollama" | "huggingface" | "mistral" | undefined;
|
|
607
607
|
profiles?: Record<string, any> | undefined;
|
|
608
608
|
preferences?: {
|
|
609
|
-
maxTokens?: number | undefined;
|
|
610
609
|
temperature?: number | undefined;
|
|
610
|
+
maxTokens?: number | undefined;
|
|
611
611
|
outputFormat?: "text" | "json" | "yaml" | undefined;
|
|
612
612
|
enableLogging?: boolean | undefined;
|
|
613
613
|
enableCaching?: boolean | undefined;
|
|
@@ -12,4 +12,4 @@ export interface OptionSchema {
|
|
|
12
12
|
* This object provides metadata for validation and help text in the CLI loop.
|
|
13
13
|
* It is derived from the main TextGenerationOptions interface to ensure consistency.
|
|
14
14
|
*/
|
|
15
|
-
export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria">, OptionSchema>;
|
|
15
|
+
export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria" | "region">, OptionSchema>;
|
package/dist/core/factory.d.ts
CHANGED
|
@@ -19,9 +19,11 @@ export declare class AIProviderFactory {
|
|
|
19
19
|
* @param providerName - Name of the provider ('vertex', 'bedrock', 'openai')
|
|
20
20
|
* @param modelName - Optional model name override
|
|
21
21
|
* @param enableMCP - Optional flag to enable MCP integration (default: true)
|
|
22
|
+
* @param sdk - SDK instance
|
|
23
|
+
* @param region - Optional region override for cloud providers
|
|
22
24
|
* @returns AIProvider instance
|
|
23
25
|
*/
|
|
24
|
-
static createProvider(providerName: string, modelName?: string | null, enableMCP?: boolean, sdk?: UnknownRecord): Promise<AIProvider>;
|
|
26
|
+
static createProvider(providerName: string, modelName?: string | null, enableMCP?: boolean, sdk?: UnknownRecord, region?: string): Promise<AIProvider>;
|
|
25
27
|
/**
|
|
26
28
|
* Create a provider instance with specific provider enum and model
|
|
27
29
|
* @param provider - Provider enum value
|
package/dist/core/factory.js
CHANGED
|
@@ -52,9 +52,11 @@ export class AIProviderFactory {
|
|
|
52
52
|
* @param providerName - Name of the provider ('vertex', 'bedrock', 'openai')
|
|
53
53
|
* @param modelName - Optional model name override
|
|
54
54
|
* @param enableMCP - Optional flag to enable MCP integration (default: true)
|
|
55
|
+
* @param sdk - SDK instance
|
|
56
|
+
* @param region - Optional region override for cloud providers
|
|
55
57
|
* @returns AIProvider instance
|
|
56
58
|
*/
|
|
57
|
-
static async createProvider(providerName, modelName, enableMCP = true, sdk) {
|
|
59
|
+
static async createProvider(providerName, modelName, enableMCP = true, sdk, region) {
|
|
58
60
|
const functionTag = "AIProviderFactory.createProvider";
|
|
59
61
|
// Providers are registered via ProviderFactory.initialize() on first use
|
|
60
62
|
logger.debug(`[${functionTag}] Provider creation started`, {
|
|
@@ -198,8 +200,8 @@ export class AIProviderFactory {
|
|
|
198
200
|
resolvedModelName: resolvedModelName || "not resolved",
|
|
199
201
|
finalModelName: finalModelName || "using provider default",
|
|
200
202
|
});
|
|
201
|
-
// Create provider with enhanced SDK
|
|
202
|
-
const provider = await ProviderFactory.createProvider(normalizedName, finalModelName, sdk);
|
|
203
|
+
// Create provider with enhanced SDK and region support
|
|
204
|
+
const provider = await ProviderFactory.createProvider(normalizedName, finalModelName, sdk, region);
|
|
203
205
|
// Summary logging in format expected by debugging tools
|
|
204
206
|
logger.debug(`[AIProviderFactory] Provider creation completed { providerName: '${normalizedName}', modelName: '${finalModelName}' }`);
|
|
205
207
|
logger.debug(`[AIProviderFactory] Resolved model: ${finalModelName}`);
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the ContextBuilder class for creating rich evaluation context.
|
|
3
|
+
*/
|
|
4
|
+
import type { EnhancedEvaluationContext, EvaluationResult } from "../types/evaluationTypes.js";
|
|
5
|
+
import type { GenerateResult } from "../types/generateTypes.js";
|
|
6
|
+
import type { LanguageModelV1CallOptions } from "ai";
|
|
7
|
+
/**
|
|
8
|
+
* Builds the enhanced context required for a RAGAS-style evaluation.
|
|
9
|
+
* This class gathers data from the generation options and results to create a
|
|
10
|
+
* rich snapshot of the interaction, which is then used by the evaluator.
|
|
11
|
+
*/
|
|
12
|
+
export declare class ContextBuilder {
|
|
13
|
+
private attemptNumber;
|
|
14
|
+
private previousEvaluations;
|
|
15
|
+
private extractTextFromContent;
|
|
16
|
+
/**
|
|
17
|
+
* Builds the full evaluation context for a single evaluation attempt.
|
|
18
|
+
*
|
|
19
|
+
* @param options The original `TextGenerationOptions` used for the request.
|
|
20
|
+
* @param result The `GenerateResult` from the provider.
|
|
21
|
+
* @returns An `EnhancedEvaluationContext` object ready for evaluation.
|
|
22
|
+
*/
|
|
23
|
+
buildContext(options: LanguageModelV1CallOptions, result: GenerateResult): EnhancedEvaluationContext;
|
|
24
|
+
/**
|
|
25
|
+
* Records the result of an evaluation and increments the internal attempt counter.
|
|
26
|
+
* This is used to build up the `previousEvaluations` array for subsequent retries.
|
|
27
|
+
*
|
|
28
|
+
* @param evaluation The `EvaluationResult` from the last attempt.
|
|
29
|
+
*/
|
|
30
|
+
recordEvaluation(evaluation: EvaluationResult): void;
|
|
31
|
+
/**
|
|
32
|
+
* Resets the internal state of the context builder. This should be called
|
|
33
|
+
* before starting a new, independent evaluation sequence.
|
|
34
|
+
*/
|
|
35
|
+
reset(): void;
|
|
36
|
+
/**
|
|
37
|
+
* Analyzes the user's query to determine intent and complexity.
|
|
38
|
+
* @param query The user's input query.
|
|
39
|
+
* @returns A QueryIntentAnalysis object.
|
|
40
|
+
*/
|
|
41
|
+
private analyzeQuery;
|
|
42
|
+
/**
|
|
43
|
+
* Maps the tool execution format from GenerateResult to the canonical ToolExecution type.
|
|
44
|
+
* @param result The result from the generate call.
|
|
45
|
+
* @returns An array of ToolExecution objects.
|
|
46
|
+
*/
|
|
47
|
+
private mapToolExecutions;
|
|
48
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the ContextBuilder class for creating rich evaluation context.
|
|
3
|
+
*/
|
|
4
|
+
import { logger } from "../utils/logger.js";
|
|
5
|
+
/**
|
|
6
|
+
* Builds the enhanced context required for a RAGAS-style evaluation.
|
|
7
|
+
* This class gathers data from the generation options and results to create a
|
|
8
|
+
* rich snapshot of the interaction, which is then used by the evaluator.
|
|
9
|
+
*/
|
|
10
|
+
export class ContextBuilder {
|
|
11
|
+
attemptNumber = 1;
|
|
12
|
+
previousEvaluations = [];
|
|
13
|
+
extractTextFromContent(content) {
|
|
14
|
+
if (typeof content === "string") {
|
|
15
|
+
return content;
|
|
16
|
+
}
|
|
17
|
+
if (Array.isArray(content)) {
|
|
18
|
+
return content
|
|
19
|
+
.filter((part) => part.type === "text" && "text" in part)
|
|
20
|
+
.map((part) => part.text)
|
|
21
|
+
.join("");
|
|
22
|
+
}
|
|
23
|
+
return "";
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Builds the full evaluation context for a single evaluation attempt.
|
|
27
|
+
*
|
|
28
|
+
* @param options The original `TextGenerationOptions` used for the request.
|
|
29
|
+
* @param result The `GenerateResult` from the provider.
|
|
30
|
+
* @returns An `EnhancedEvaluationContext` object ready for evaluation.
|
|
31
|
+
*/
|
|
32
|
+
buildContext(options, result) {
|
|
33
|
+
const userMessages = options.prompt.filter((p) => p.role === "user");
|
|
34
|
+
const lastUserMessage = userMessages[userMessages.length - 1];
|
|
35
|
+
const userQuery = this.extractTextFromContent(lastUserMessage?.content ?? "");
|
|
36
|
+
const systemPromptMessage = options.prompt.find((p) => p.role === "system");
|
|
37
|
+
const systemPrompt = this.extractTextFromContent(systemPromptMessage?.content ?? "");
|
|
38
|
+
const queryAnalysis = this.analyzeQuery(userQuery);
|
|
39
|
+
const toolExecutions = this.mapToolExecutions(result);
|
|
40
|
+
const context = {
|
|
41
|
+
userQuery,
|
|
42
|
+
queryAnalysis,
|
|
43
|
+
aiResponse: result.content,
|
|
44
|
+
provider: result.provider || "unknown",
|
|
45
|
+
model: result.model || "unknown",
|
|
46
|
+
generationParams: {
|
|
47
|
+
temperature: options.temperature,
|
|
48
|
+
maxTokens: options.maxTokens,
|
|
49
|
+
systemPrompt: systemPrompt || undefined,
|
|
50
|
+
},
|
|
51
|
+
toolExecutions,
|
|
52
|
+
conversationHistory: (options.prompt || [])
|
|
53
|
+
.filter((p) => p.role !== "system")
|
|
54
|
+
.map((turn) => ({
|
|
55
|
+
role: turn.role,
|
|
56
|
+
content: this.extractTextFromContent(turn.content),
|
|
57
|
+
timestamp: new Date().toISOString(),
|
|
58
|
+
})),
|
|
59
|
+
responseTime: result.responseTime || 0,
|
|
60
|
+
tokenUsage: result.usage || { input: 0, output: 0, total: 0 },
|
|
61
|
+
previousEvaluations: this.previousEvaluations,
|
|
62
|
+
attemptNumber: this.attemptNumber,
|
|
63
|
+
};
|
|
64
|
+
logger.debug("Built Evaluation Context:", context);
|
|
65
|
+
return context;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Records the result of an evaluation and increments the internal attempt counter.
|
|
69
|
+
* This is used to build up the `previousEvaluations` array for subsequent retries.
|
|
70
|
+
*
|
|
71
|
+
* @param evaluation The `EvaluationResult` from the last attempt.
|
|
72
|
+
*/
|
|
73
|
+
recordEvaluation(evaluation) {
|
|
74
|
+
this.previousEvaluations.push(evaluation);
|
|
75
|
+
this.attemptNumber++;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Resets the internal state of the context builder. This should be called
|
|
79
|
+
* before starting a new, independent evaluation sequence.
|
|
80
|
+
*/
|
|
81
|
+
reset() {
|
|
82
|
+
this.attemptNumber = 1;
|
|
83
|
+
this.previousEvaluations = [];
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Analyzes the user's query to determine intent and complexity.
|
|
87
|
+
* @param query The user's input query.
|
|
88
|
+
* @returns A QueryIntentAnalysis object.
|
|
89
|
+
*/
|
|
90
|
+
analyzeQuery(query) {
|
|
91
|
+
const lowerCaseQuery = query.toLowerCase();
|
|
92
|
+
let type = "unknown";
|
|
93
|
+
if (lowerCaseQuery.startsWith("what") ||
|
|
94
|
+
lowerCaseQuery.startsWith("how") ||
|
|
95
|
+
lowerCaseQuery.startsWith("why")) {
|
|
96
|
+
type = "question";
|
|
97
|
+
}
|
|
98
|
+
else if (lowerCaseQuery.length < 20) {
|
|
99
|
+
type = "greeting";
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
type = "command";
|
|
103
|
+
}
|
|
104
|
+
const complexity = query.length > 100 ? "high" : query.length > 40 ? "medium" : "low";
|
|
105
|
+
return {
|
|
106
|
+
type,
|
|
107
|
+
complexity,
|
|
108
|
+
shouldHaveUsedTools: false, // This would require deeper analysis
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Maps the tool execution format from GenerateResult to the canonical ToolExecution type.
|
|
113
|
+
* @param result The result from the generate call.
|
|
114
|
+
* @returns An array of ToolExecution objects.
|
|
115
|
+
*/
|
|
116
|
+
mapToolExecutions(result) {
|
|
117
|
+
if (!result.toolExecutions) {
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
return result.toolExecutions.map((exec) => {
|
|
121
|
+
const toolResult = {
|
|
122
|
+
success: true,
|
|
123
|
+
data: exec.output,
|
|
124
|
+
};
|
|
125
|
+
return {
|
|
126
|
+
toolName: exec.name,
|
|
127
|
+
params: exec.input,
|
|
128
|
+
result: toolResult,
|
|
129
|
+
executionTime: 0,
|
|
130
|
+
timestamp: Date.now(),
|
|
131
|
+
};
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file This file exports the main Evaluator class, which serves as the central entry point for the evaluation system.
|
|
3
|
+
*/
|
|
4
|
+
import type { GenerateResult } from "../types/generateTypes.js";
|
|
5
|
+
import type { EvaluationConfig } from "../types/evaluationTypes.js";
|
|
6
|
+
import type { LanguageModelV1CallOptions } from "ai";
|
|
7
|
+
import type { AutoEvaluationConfig } from "../types/middlewareTypes.js";
|
|
8
|
+
import type { EvaluationData } from "../types/evaluation.js";
|
|
9
|
+
/**
|
|
10
|
+
* A centralized class for performing response evaluations. It supports different
|
|
11
|
+
* evaluation strategies, with RAGAS-style model-based evaluation as the default.
|
|
12
|
+
* This class orchestrates the context building and evaluation process.
|
|
13
|
+
*/
|
|
14
|
+
export declare class Evaluator {
|
|
15
|
+
private contextBuilder;
|
|
16
|
+
private config;
|
|
17
|
+
private ragasEvaluator;
|
|
18
|
+
constructor(config?: EvaluationConfig);
|
|
19
|
+
/**
|
|
20
|
+
* The main entry point for performing an evaluation. It selects the evaluation
|
|
21
|
+
* strategy based on the configuration and executes it.
|
|
22
|
+
*
|
|
23
|
+
* @param options The original `TextGenerationOptions` from the user request.
|
|
24
|
+
* @param result The `GenerateResult` from the provider.
|
|
25
|
+
* @returns A promise that resolves to the `EvaluationResult`.
|
|
26
|
+
*/
|
|
27
|
+
evaluate(options: LanguageModelV1CallOptions, result: GenerateResult, threshold: number, config: AutoEvaluationConfig): Promise<EvaluationData>;
|
|
28
|
+
/**
|
|
29
|
+
* Performs evaluation using the RAGAS-style model-based evaluator.
|
|
30
|
+
*
|
|
31
|
+
* @param options The original `TextGenerationOptions`.
|
|
32
|
+
* @param result The `GenerateResult` to be evaluated.
|
|
33
|
+
* @returns A promise that resolves to the `EvaluationResult`.
|
|
34
|
+
*/
|
|
35
|
+
private evaluateWithRAGAS;
|
|
36
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file This file exports the main Evaluator class, which serves as the central entry point for the evaluation system.
|
|
3
|
+
*/
|
|
4
|
+
import { ContextBuilder } from "./contextBuilder.js";
|
|
5
|
+
import { RAGASEvaluator } from "./ragasEvaluator.js";
|
|
6
|
+
import { mapToEvaluationData } from "./scoring.js";
|
|
7
|
+
/**
|
|
8
|
+
* A centralized class for performing response evaluations. It supports different
|
|
9
|
+
* evaluation strategies, with RAGAS-style model-based evaluation as the default.
|
|
10
|
+
* This class orchestrates the context building and evaluation process.
|
|
11
|
+
*/
|
|
12
|
+
export class Evaluator {
|
|
13
|
+
contextBuilder;
|
|
14
|
+
config;
|
|
15
|
+
ragasEvaluator;
|
|
16
|
+
constructor(config = {}) {
|
|
17
|
+
this.config = config;
|
|
18
|
+
this.contextBuilder = new ContextBuilder();
|
|
19
|
+
this.ragasEvaluator = new RAGASEvaluator(this.config.evaluationModel, this.config.provider, this.config.threshold, this.config.promptGenerator);
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* The main entry point for performing an evaluation. It selects the evaluation
|
|
23
|
+
* strategy based on the configuration and executes it.
|
|
24
|
+
*
|
|
25
|
+
* @param options The original `TextGenerationOptions` from the user request.
|
|
26
|
+
* @param result The `GenerateResult` from the provider.
|
|
27
|
+
* @returns A promise that resolves to the `EvaluationResult`.
|
|
28
|
+
*/
|
|
29
|
+
async evaluate(options, result, threshold, config) {
|
|
30
|
+
const evaluationStrategy = this.config.evaluationStrategy || "ragas";
|
|
31
|
+
const customEvaluator = this.config.customEvaluator;
|
|
32
|
+
switch (evaluationStrategy) {
|
|
33
|
+
case "ragas": {
|
|
34
|
+
const { evaluationResult, evalContext } = await this.evaluateWithRAGAS(options, result);
|
|
35
|
+
const evaluationData = mapToEvaluationData(evalContext, evaluationResult, threshold, config.offTopicThreshold, config.highSeverityThreshold);
|
|
36
|
+
return evaluationData;
|
|
37
|
+
}
|
|
38
|
+
case "custom": {
|
|
39
|
+
if (customEvaluator) {
|
|
40
|
+
const { evaluationResult, evalContext } = await customEvaluator(options, result);
|
|
41
|
+
return mapToEvaluationData(evalContext, evaluationResult, threshold, config.offTopicThreshold, config.highSeverityThreshold);
|
|
42
|
+
}
|
|
43
|
+
throw new Error("Custom evaluator function not provided in config.");
|
|
44
|
+
}
|
|
45
|
+
default:
|
|
46
|
+
throw new Error(`Unsupported evaluation strategy: ${evaluationStrategy} `);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Performs evaluation using the RAGAS-style model-based evaluator.
|
|
51
|
+
*
|
|
52
|
+
* @param options The original `TextGenerationOptions`.
|
|
53
|
+
* @param result The `GenerateResult` to be evaluated.
|
|
54
|
+
* @returns A promise that resolves to the `EvaluationResult`.
|
|
55
|
+
*/
|
|
56
|
+
async evaluateWithRAGAS(options, result) {
|
|
57
|
+
const evalContext = this.contextBuilder.buildContext(options, result);
|
|
58
|
+
const evaluationResult = await this.ragasEvaluator.evaluate(evalContext);
|
|
59
|
+
return { evaluationResult, evalContext };
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { EnhancedEvaluationContext, GetPromptFunction } from "../types/evaluationTypes.js";
|
|
2
|
+
/**
|
|
3
|
+
* A flexible class for building evaluation prompts. It allows for custom prompt
|
|
4
|
+
* generation logic to be injected while ensuring a consistent output format.
|
|
5
|
+
*/
|
|
6
|
+
export declare class PromptBuilder {
|
|
7
|
+
/**
|
|
8
|
+
* Builds the final evaluation prompt.
|
|
9
|
+
*
|
|
10
|
+
* @param context The rich context for the evaluation.
|
|
11
|
+
* @param getPrompt An optional function to generate the main body of the prompt.
|
|
12
|
+
* If not provided, a default prompt is used.
|
|
13
|
+
* @returns The complete prompt string to be sent to the judge LLM.
|
|
14
|
+
*/
|
|
15
|
+
buildEvaluationPrompt(context: EnhancedEvaluationContext, getPrompt?: GetPromptFunction): string;
|
|
16
|
+
/**
|
|
17
|
+
* The default prompt generation logic.
|
|
18
|
+
* @param context The prepared context strings.
|
|
19
|
+
* @returns The default prompt body.
|
|
20
|
+
*/
|
|
21
|
+
private getDefaultPrompt;
|
|
22
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A flexible class for building evaluation prompts. It allows for custom prompt
|
|
3
|
+
* generation logic to be injected while ensuring a consistent output format.
|
|
4
|
+
*/
|
|
5
|
+
export class PromptBuilder {
|
|
6
|
+
/**
|
|
7
|
+
* Builds the final evaluation prompt.
|
|
8
|
+
*
|
|
9
|
+
* @param context The rich context for the evaluation.
|
|
10
|
+
* @param getPrompt An optional function to generate the main body of the prompt.
|
|
11
|
+
* If not provided, a default prompt is used.
|
|
12
|
+
* @returns The complete prompt string to be sent to the judge LLM.
|
|
13
|
+
*/
|
|
14
|
+
buildEvaluationPrompt(context, getPrompt) {
|
|
15
|
+
const { userQuery, aiResponse, conversationHistory, toolExecutions, previousEvaluations, } = context;
|
|
16
|
+
const historyStr = conversationHistory
|
|
17
|
+
.map((turn) => `${turn.role}: ${turn.content}`)
|
|
18
|
+
.join("\n");
|
|
19
|
+
const toolsStr = toolExecutions.length > 0
|
|
20
|
+
? `Tools were used: ${toolExecutions.map((t) => t.toolName).join(", ")}`
|
|
21
|
+
: "No tools were used.";
|
|
22
|
+
const retryStr = previousEvaluations && previousEvaluations.length > 0
|
|
23
|
+
? `This is attempt #${context.attemptNumber}. Previous reasoning: ${previousEvaluations
|
|
24
|
+
.map((e) => e.reasoning)
|
|
25
|
+
.join("; ")} Previous suggested Improvements: ${previousEvaluations.map((e) => e.suggestedImprovements).join("; ")}`
|
|
26
|
+
: "This is the first attempt.";
|
|
27
|
+
const promptContext = {
|
|
28
|
+
userQuery,
|
|
29
|
+
history: historyStr,
|
|
30
|
+
tools: toolsStr,
|
|
31
|
+
retryInfo: retryStr,
|
|
32
|
+
aiResponse,
|
|
33
|
+
};
|
|
34
|
+
const mainPrompt = getPrompt
|
|
35
|
+
? getPrompt(promptContext)
|
|
36
|
+
: this.getDefaultPrompt(promptContext);
|
|
37
|
+
return `
|
|
38
|
+
${mainPrompt}
|
|
39
|
+
|
|
40
|
+
**Output Format (JSON):**
|
|
41
|
+
{
|
|
42
|
+
"relevanceScore": <1-10>,
|
|
43
|
+
"accuracyScore": <1-10>,
|
|
44
|
+
"completenessScore": <1-10>,
|
|
45
|
+
"finalScore": <1-10>,
|
|
46
|
+
"reasoning": "<Your constructive reasoning here>",
|
|
47
|
+
"suggestedImprovements": "<How the response can be improved>"
|
|
48
|
+
}
|
|
49
|
+
`;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* The default prompt generation logic.
|
|
53
|
+
* @param context The prepared context strings.
|
|
54
|
+
* @returns The default prompt body.
|
|
55
|
+
*/
|
|
56
|
+
getDefaultPrompt(context) {
|
|
57
|
+
return `
|
|
58
|
+
You are an expert AI quality evaluator. Your task is to evaluate the AI assistant's response based on the provided context.
|
|
59
|
+
Provide a score from 1 to 10 for each of the following criteria: Relevance, Accuracy, and Completeness.
|
|
60
|
+
Finally, provide an overall finalScore and constructive feedback for improvement.
|
|
61
|
+
|
|
62
|
+
**Evaluation Context:**
|
|
63
|
+
- User Query: ${context.userQuery}
|
|
64
|
+
- Conversation History:
|
|
65
|
+
${context.history}
|
|
66
|
+
- Tools Executed: ${context.tools}
|
|
67
|
+
- Retry Information: ${context.retryInfo}
|
|
68
|
+
|
|
69
|
+
**AI Assistant's Response to Evaluate:**
|
|
70
|
+
${context.aiResponse}
|
|
71
|
+
`;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { EnhancedEvaluationContext, EvaluationResult, GetPromptFunction } from "../types/evaluationTypes.js";
|
|
2
|
+
/**
|
|
3
|
+
* Implements a RAGAS-style evaluator that uses a "judge" LLM to score the
|
|
4
|
+
* quality of an AI response based on rich, contextual information.
|
|
5
|
+
*/
|
|
6
|
+
export declare class RAGASEvaluator {
|
|
7
|
+
private evaluationModel;
|
|
8
|
+
private providerName;
|
|
9
|
+
private threshold;
|
|
10
|
+
private promptBuilder;
|
|
11
|
+
private promptGenerator?;
|
|
12
|
+
constructor(evaluationModel?: string, providerName?: string, threshold?: number, promptGenerator?: GetPromptFunction);
|
|
13
|
+
/**
|
|
14
|
+
* Evaluates an AI-generated response using a model-based approach.
|
|
15
|
+
*
|
|
16
|
+
* @param context The rich, contextual information for the evaluation.
|
|
17
|
+
* @returns A promise that resolves to a detailed `EvaluationResult`.
|
|
18
|
+
*/
|
|
19
|
+
evaluate(context: EnhancedEvaluationContext): Promise<EvaluationResult>;
|
|
20
|
+
/**
|
|
21
|
+
* Parses the raw JSON string from the judge LLM into a structured `EvaluationResult` object.
|
|
22
|
+
* It includes error handling to gracefully manage malformed JSON.
|
|
23
|
+
*
|
|
24
|
+
* @param rawResponse The raw string response from the evaluation model.
|
|
25
|
+
* @returns A structured object containing the evaluation scores and feedback.
|
|
26
|
+
*/
|
|
27
|
+
private parseEvaluationResponse;
|
|
28
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { AIProviderFactory } from "../core/factory.js";
|
|
2
|
+
import { PromptBuilder } from "./prompts.js";
|
|
3
|
+
import { logger } from "../utils/logger.js";
|
|
4
|
+
/**
|
|
5
|
+
* Implements a RAGAS-style evaluator that uses a "judge" LLM to score the
|
|
6
|
+
* quality of an AI response based on rich, contextual information.
|
|
7
|
+
*/
|
|
8
|
+
export class RAGASEvaluator {
|
|
9
|
+
evaluationModel;
|
|
10
|
+
providerName;
|
|
11
|
+
threshold;
|
|
12
|
+
promptBuilder;
|
|
13
|
+
promptGenerator;
|
|
14
|
+
constructor(evaluationModel, providerName, threshold, promptGenerator) {
|
|
15
|
+
this.evaluationModel =
|
|
16
|
+
evaluationModel ||
|
|
17
|
+
process.env.NEUROLINK_RAGAS_EVALUATION_MODEL ||
|
|
18
|
+
"gemini-1.5-flash";
|
|
19
|
+
this.providerName =
|
|
20
|
+
providerName ||
|
|
21
|
+
process.env.NEUROLINK_RAGAS_EVALUATION_PROVIDER ||
|
|
22
|
+
"vertex";
|
|
23
|
+
this.threshold =
|
|
24
|
+
threshold || Number(process.env.NEUROLINK_EVALUATION_THRESHOLD) || 7;
|
|
25
|
+
this.promptBuilder = new PromptBuilder();
|
|
26
|
+
this.promptGenerator = promptGenerator;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Evaluates an AI-generated response using a model-based approach.
|
|
30
|
+
*
|
|
31
|
+
* @param context The rich, contextual information for the evaluation.
|
|
32
|
+
* @returns A promise that resolves to a detailed `EvaluationResult`.
|
|
33
|
+
*/
|
|
34
|
+
async evaluate(context) {
|
|
35
|
+
const startTime = Date.now();
|
|
36
|
+
const prompt = this.promptBuilder.buildEvaluationPrompt(context, this.promptGenerator);
|
|
37
|
+
const provider = await AIProviderFactory.createProvider(this.providerName, this.evaluationModel);
|
|
38
|
+
const result = await provider.generate({
|
|
39
|
+
input: { text: prompt },
|
|
40
|
+
});
|
|
41
|
+
if (!result) {
|
|
42
|
+
throw new Error("Evaluation generation failed to return a result.");
|
|
43
|
+
}
|
|
44
|
+
const rawEvaluationResponse = result.content;
|
|
45
|
+
const parsedResult = this.parseEvaluationResponse(rawEvaluationResponse);
|
|
46
|
+
const evaluationTime = Date.now() - startTime;
|
|
47
|
+
const finalResult = {
|
|
48
|
+
...parsedResult,
|
|
49
|
+
isPassing: parsedResult.finalScore >= this.threshold, // This will be recalculated, but is needed for the type
|
|
50
|
+
evaluationModel: this.evaluationModel,
|
|
51
|
+
evaluationTime,
|
|
52
|
+
attemptNumber: context.attemptNumber,
|
|
53
|
+
rawEvaluationResponse,
|
|
54
|
+
};
|
|
55
|
+
return finalResult;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Parses the raw JSON string from the judge LLM into a structured `EvaluationResult` object.
|
|
59
|
+
* It includes error handling to gracefully manage malformed JSON.
|
|
60
|
+
*
|
|
61
|
+
* @param rawResponse The raw string response from the evaluation model.
|
|
62
|
+
* @returns A structured object containing the evaluation scores and feedback.
|
|
63
|
+
*/
|
|
64
|
+
parseEvaluationResponse(rawResponse) {
|
|
65
|
+
try {
|
|
66
|
+
const cleanedResponse = rawResponse.replace(/```json\n|```/g, "").trim();
|
|
67
|
+
const parsed = JSON.parse(cleanedResponse);
|
|
68
|
+
logger.debug("Parsed evaluation response for RAGAS Evaluator:", parsed);
|
|
69
|
+
return {
|
|
70
|
+
relevanceScore: Number(parsed.relevanceScore) || 0,
|
|
71
|
+
accuracyScore: Number(parsed.accuracyScore) || 0,
|
|
72
|
+
completenessScore: Number(parsed.completenessScore) || 0,
|
|
73
|
+
finalScore: Number(parsed.finalScore) || 0,
|
|
74
|
+
suggestedImprovements: parsed.suggestedImprovements || "No suggestions provided.",
|
|
75
|
+
reasoning: parsed.reasoning || "No reasoning provided.",
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
logger.error("Failed to parse evaluation response:", error);
|
|
80
|
+
return {
|
|
81
|
+
relevanceScore: 0,
|
|
82
|
+
accuracyScore: 0,
|
|
83
|
+
completenessScore: 0,
|
|
84
|
+
finalScore: 0,
|
|
85
|
+
reasoning: "Error parsing evaluation response.",
|
|
86
|
+
suggestedImprovements: "Error parsing evaluation response.",
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the RetryManager class for handling evaluation retries.
|
|
3
|
+
*/
|
|
4
|
+
import type { EvaluationResult } from "../types/evaluationTypes.js";
|
|
5
|
+
import type { TextGenerationOptions } from "../types/generateTypes.js";
|
|
6
|
+
/**
|
|
7
|
+
* Manages the retry logic for the auto-evaluation middleware. It decides if a
|
|
8
|
+
* retry is warranted based on the evaluation score and prepares the options
|
|
9
|
+
* for the next generation attempt by incorporating feedback into the prompt.
|
|
10
|
+
*/
|
|
11
|
+
export declare class RetryManager {
|
|
12
|
+
private maxRetries;
|
|
13
|
+
constructor(maxRetries?: number);
|
|
14
|
+
/**
|
|
15
|
+
* Determines if a retry should be attempted based on the evaluation result.
|
|
16
|
+
*
|
|
17
|
+
* @param evaluation The `EvaluationResult` of the last attempt.
|
|
18
|
+
* @returns `true` if the response did not pass and the maximum number of retries has not been reached.
|
|
19
|
+
*/
|
|
20
|
+
shouldRetry(evaluation: EvaluationResult): boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Prepares the options for the next generation attempt by creating a new,
|
|
23
|
+
* improved prompt that includes feedback from the failed evaluation.
|
|
24
|
+
*
|
|
25
|
+
* @param originalOptions The original `TextGenerationOptions` from the user request.
|
|
26
|
+
* @param evaluation The `EvaluationResult` of the failed attempt.
|
|
27
|
+
* @returns A new `TextGenerationOptions` object with an improved prompt.
|
|
28
|
+
*/
|
|
29
|
+
prepareRetryOptions(originalOptions: TextGenerationOptions, evaluation: EvaluationResult): TextGenerationOptions;
|
|
30
|
+
/**
|
|
31
|
+
* Builds a new prompt for a retry attempt by incorporating feedback from the
|
|
32
|
+
* evaluation. The instructions become progressively more direct with each attempt.
|
|
33
|
+
*
|
|
34
|
+
* @param originalPrompt The user's original prompt.
|
|
35
|
+
* @param feedback The constructive feedback from the evaluation.
|
|
36
|
+
* @param attemptNumber The upcoming attempt number (e.g., 2 for the first retry).
|
|
37
|
+
* @returns A new, enhanced prompt string.
|
|
38
|
+
*/
|
|
39
|
+
private buildRetryPrompt;
|
|
40
|
+
}
|