@juspay/neurolink 7.44.0 → 7.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/cli/commands/config.d.ts +2 -2
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/core/factory.d.ts +3 -1
- package/dist/core/factory.js +5 -3
- package/dist/evaluation/contextBuilder.d.ts +48 -0
- package/dist/evaluation/contextBuilder.js +134 -0
- package/dist/evaluation/index.d.ts +36 -0
- package/dist/evaluation/index.js +61 -0
- package/dist/evaluation/prompts.d.ts +22 -0
- package/dist/evaluation/prompts.js +73 -0
- package/dist/evaluation/ragasEvaluator.d.ts +28 -0
- package/dist/evaluation/ragasEvaluator.js +90 -0
- package/dist/evaluation/retryManager.d.ts +40 -0
- package/dist/evaluation/retryManager.js +78 -0
- package/dist/evaluation/scoring.d.ts +16 -0
- package/dist/evaluation/scoring.js +35 -0
- package/dist/factories/providerFactory.d.ts +3 -3
- package/dist/factories/providerFactory.js +3 -3
- package/dist/factories/providerRegistry.js +6 -6
- package/dist/lib/core/factory.d.ts +3 -1
- package/dist/lib/core/factory.js +5 -3
- package/dist/lib/evaluation/contextBuilder.d.ts +48 -0
- package/dist/lib/evaluation/contextBuilder.js +134 -0
- package/dist/lib/evaluation/index.d.ts +36 -0
- package/dist/lib/evaluation/index.js +61 -0
- package/dist/lib/evaluation/prompts.d.ts +22 -0
- package/dist/lib/evaluation/prompts.js +73 -0
- package/dist/lib/evaluation/ragasEvaluator.d.ts +28 -0
- package/dist/lib/evaluation/ragasEvaluator.js +90 -0
- package/dist/lib/evaluation/retryManager.d.ts +40 -0
- package/dist/lib/evaluation/retryManager.js +78 -0
- package/dist/lib/evaluation/scoring.d.ts +16 -0
- package/dist/lib/evaluation/scoring.js +35 -0
- package/dist/lib/factories/providerFactory.d.ts +3 -3
- package/dist/lib/factories/providerFactory.js +3 -3
- package/dist/lib/factories/providerRegistry.js +6 -6
- package/dist/lib/middleware/builtin/autoEvaluation.d.ts +14 -0
- package/dist/lib/middleware/builtin/autoEvaluation.js +181 -0
- package/dist/lib/middleware/factory.js +6 -0
- package/dist/lib/neurolink.js +7 -3
- package/dist/lib/providers/amazonBedrock.d.ts +2 -1
- package/dist/lib/providers/amazonBedrock.js +6 -4
- package/dist/lib/providers/amazonSagemaker.d.ts +1 -1
- package/dist/lib/providers/amazonSagemaker.js +2 -2
- package/dist/lib/providers/googleVertex.d.ts +1 -1
- package/dist/lib/providers/googleVertex.js +9 -10
- package/dist/lib/providers/sagemaker/config.d.ts +7 -5
- package/dist/lib/providers/sagemaker/config.js +11 -6
- package/dist/lib/types/evaluation.d.ts +2 -0
- package/dist/lib/types/evaluationTypes.d.ts +142 -0
- package/dist/lib/types/evaluationTypes.js +1 -0
- package/dist/lib/types/generateTypes.d.ts +2 -0
- package/dist/lib/types/middlewareTypes.d.ts +28 -2
- package/dist/lib/types/streamTypes.d.ts +1 -0
- package/dist/middleware/builtin/autoEvaluation.d.ts +14 -0
- package/dist/middleware/builtin/autoEvaluation.js +181 -0
- package/dist/middleware/factory.js +6 -0
- package/dist/neurolink.js +7 -3
- package/dist/providers/amazonBedrock.d.ts +2 -1
- package/dist/providers/amazonBedrock.js +6 -4
- package/dist/providers/amazonSagemaker.d.ts +1 -1
- package/dist/providers/amazonSagemaker.js +2 -2
- package/dist/providers/googleVertex.d.ts +1 -1
- package/dist/providers/googleVertex.js +9 -10
- package/dist/providers/sagemaker/config.d.ts +7 -5
- package/dist/providers/sagemaker/config.js +11 -6
- package/dist/types/evaluation.d.ts +2 -0
- package/dist/types/evaluationTypes.d.ts +142 -0
- package/dist/types/evaluationTypes.js +1 -0
- package/dist/types/generateTypes.d.ts +2 -0
- package/dist/types/middlewareTypes.d.ts +28 -2
- package/dist/types/streamTypes.d.ts +1 -0
- package/package.json +1 -1
|
@@ -7,8 +7,10 @@ import { convertZodToJsonSchema } from "../utils/schemaConversion.js";
|
|
|
7
7
|
export class AmazonBedrockProvider extends BaseProvider {
|
|
8
8
|
bedrockClient;
|
|
9
9
|
conversationHistory = [];
|
|
10
|
-
|
|
10
|
+
region;
|
|
11
|
+
constructor(modelName, neurolink, region) {
|
|
11
12
|
super(modelName, "bedrock", neurolink);
|
|
13
|
+
this.region = region || process.env.AWS_REGION || "us-east-1";
|
|
12
14
|
logger.debug("[AmazonBedrockProvider] Starting constructor with extensive logging for debugging");
|
|
13
15
|
// Log environment variables for debugging
|
|
14
16
|
logger.debug(`[AmazonBedrockProvider] Environment check: AWS_REGION=${process.env.AWS_REGION || "undefined"}, AWS_ACCESS_KEY_ID=${process.env.AWS_ACCESS_KEY_ID ? "SET" : "undefined"}, AWS_SECRET_ACCESS_KEY=${process.env.AWS_SECRET_ACCESS_KEY ? "SET" : "undefined"}`);
|
|
@@ -17,14 +19,14 @@ export class AmazonBedrockProvider extends BaseProvider {
|
|
|
17
19
|
// Absolutely no proxy interference - let AWS SDK handle everything natively
|
|
18
20
|
logger.debug("[AmazonBedrockProvider] Creating BedrockRuntimeClient with clean configuration");
|
|
19
21
|
this.bedrockClient = new BedrockRuntimeClient({
|
|
20
|
-
region:
|
|
22
|
+
region: this.region,
|
|
21
23
|
// Clean configuration - AWS SDK will handle credentials via:
|
|
22
24
|
// 1. IAM roles (preferred in production)
|
|
23
25
|
// 2. Environment variables
|
|
24
26
|
// 3. AWS config files
|
|
25
27
|
// 4. Instance metadata
|
|
26
28
|
});
|
|
27
|
-
logger.debug(`[AmazonBedrockProvider] Successfully created BedrockRuntimeClient with model: ${this.modelName}, region: ${
|
|
29
|
+
logger.debug(`[AmazonBedrockProvider] Successfully created BedrockRuntimeClient with model: ${this.modelName}, region: ${this.region}`);
|
|
28
30
|
// Immediate health check to catch credential issues early
|
|
29
31
|
this.performInitialHealthCheck();
|
|
30
32
|
}
|
|
@@ -39,7 +41,7 @@ export class AmazonBedrockProvider extends BaseProvider {
|
|
|
39
41
|
*/
|
|
40
42
|
async performInitialHealthCheck() {
|
|
41
43
|
const bedrockClient = new BedrockClient({
|
|
42
|
-
region:
|
|
44
|
+
region: this.region,
|
|
43
45
|
});
|
|
44
46
|
try {
|
|
45
47
|
logger.debug("[AmazonBedrockProvider] Starting initial health check to validate credentials and connectivity");
|
|
@@ -16,7 +16,7 @@ export declare class AmazonSageMakerProvider extends BaseProvider {
|
|
|
16
16
|
private sagemakerModel;
|
|
17
17
|
private sagemakerConfig;
|
|
18
18
|
private modelConfig;
|
|
19
|
-
constructor(modelName?: string, endpointName?: string);
|
|
19
|
+
constructor(modelName?: string, endpointName?: string, region?: string);
|
|
20
20
|
protected getProviderName(): AIProviderName;
|
|
21
21
|
protected getDefaultModel(): string;
|
|
22
22
|
protected getAISDKModel(): LanguageModelV1;
|
|
@@ -17,11 +17,11 @@ export class AmazonSageMakerProvider extends BaseProvider {
|
|
|
17
17
|
sagemakerModel;
|
|
18
18
|
sagemakerConfig;
|
|
19
19
|
modelConfig;
|
|
20
|
-
constructor(modelName, endpointName) {
|
|
20
|
+
constructor(modelName, endpointName, region) {
|
|
21
21
|
super(modelName, "sagemaker");
|
|
22
22
|
try {
|
|
23
23
|
// Load and validate configuration
|
|
24
|
-
this.sagemakerConfig = getSageMakerConfig();
|
|
24
|
+
this.sagemakerConfig = getSageMakerConfig(region);
|
|
25
25
|
this.modelConfig = getSageMakerModelConfig(endpointName || getDefaultSageMakerEndpoint());
|
|
26
26
|
// Create the proper LanguageModel (v2) implementation
|
|
27
27
|
this.sagemakerModel = new SageMakerLanguageModel(this.modelName, this.sagemakerConfig, this.modelConfig);
|
|
@@ -25,7 +25,7 @@ export declare class GoogleVertexProvider extends BaseProvider {
|
|
|
25
25
|
private static readonly MAX_CACHE_SIZE;
|
|
26
26
|
private static maxTokensCache;
|
|
27
27
|
private static maxTokensCacheTime;
|
|
28
|
-
constructor(modelName?: string, _providerName?: string, sdk?: unknown);
|
|
28
|
+
constructor(modelName?: string, _providerName?: string, sdk?: unknown, region?: string);
|
|
29
29
|
protected getProviderName(): AIProviderName;
|
|
30
30
|
protected getDefaultModel(): string;
|
|
31
31
|
/**
|
|
@@ -46,10 +46,10 @@ const hasGoogleCredentials = () => {
|
|
|
46
46
|
process.env.GOOGLE_AUTH_PRIVATE_KEY));
|
|
47
47
|
};
|
|
48
48
|
// Enhanced Vertex settings creation with authentication fallback and proxy support
|
|
49
|
-
const createVertexSettings = async () => {
|
|
49
|
+
const createVertexSettings = async (region) => {
|
|
50
50
|
const baseSettings = {
|
|
51
51
|
project: getVertexProjectId(),
|
|
52
|
-
location: getVertexLocation(),
|
|
52
|
+
location: region || getVertexLocation(),
|
|
53
53
|
fetch: createProxyFetch(),
|
|
54
54
|
};
|
|
55
55
|
// 🎯 OPTION 2: Create credentials file from environment variables at runtime
|
|
@@ -157,8 +157,7 @@ const createVertexSettings = async () => {
|
|
|
157
157
|
private_key: requiredEnvVars.private_key.replace(/\\n/g, "\n"),
|
|
158
158
|
client_email: requiredEnvVars.client_email,
|
|
159
159
|
client_id: requiredEnvVars.client_id || "",
|
|
160
|
-
auth_uri: requiredEnvVars.auth_uri ||
|
|
161
|
-
"https://accounts.google.com/o/oauth2/auth",
|
|
160
|
+
auth_uri: requiredEnvVars.auth_uri || "https://accounts.google.com/o/oauth2/auth",
|
|
162
161
|
token_uri: requiredEnvVars.token_uri || "https://oauth2.googleapis.com/token",
|
|
163
162
|
auth_provider_x509_cert_url: requiredEnvVars.auth_provider_x509_cert_url ||
|
|
164
163
|
"https://www.googleapis.com/oauth2/v1/certs",
|
|
@@ -199,8 +198,8 @@ const createVertexSettings = async () => {
|
|
|
199
198
|
return baseSettings;
|
|
200
199
|
};
|
|
201
200
|
// Create Anthropic-specific Vertex settings with the same authentication and proxy support
|
|
202
|
-
const createVertexAnthropicSettings = async () => {
|
|
203
|
-
const baseVertexSettings = await createVertexSettings();
|
|
201
|
+
const createVertexAnthropicSettings = async (region) => {
|
|
202
|
+
const baseVertexSettings = await createVertexSettings(region);
|
|
204
203
|
// GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
|
|
205
204
|
// so we can use the same settings with proper typing
|
|
206
205
|
return {
|
|
@@ -241,7 +240,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
241
240
|
// Memory-managed cache for maxTokens handling decisions to optimize streaming performance
|
|
242
241
|
static maxTokensCache = new Map();
|
|
243
242
|
static maxTokensCacheTime = 0;
|
|
244
|
-
constructor(modelName, _providerName, sdk) {
|
|
243
|
+
constructor(modelName, _providerName, sdk, region) {
|
|
245
244
|
super(modelName, "vertex", sdk);
|
|
246
245
|
// Validate Google Cloud credentials - now using consolidated utility
|
|
247
246
|
if (!hasGoogleCredentials()) {
|
|
@@ -249,7 +248,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
249
248
|
}
|
|
250
249
|
// Initialize Google Cloud configuration
|
|
251
250
|
this.projectId = getVertexProjectId();
|
|
252
|
-
this.location = getVertexLocation();
|
|
251
|
+
this.location = region || getVertexLocation();
|
|
253
252
|
logger.debug("Google Vertex AI BaseProvider v2 initialized", {
|
|
254
253
|
modelName: this.modelName,
|
|
255
254
|
projectId: this.projectId,
|
|
@@ -360,7 +359,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
360
359
|
message: "Starting Vertex settings creation with network configuration analysis",
|
|
361
360
|
});
|
|
362
361
|
try {
|
|
363
|
-
const vertexSettings = await createVertexSettings();
|
|
362
|
+
const vertexSettings = await createVertexSettings(this.location);
|
|
364
363
|
const vertexSettingsEndTime = process.hrtime.bigint();
|
|
365
364
|
const vertexSettingsDurationNs = vertexSettingsEndTime - vertexSettingsStartTime;
|
|
366
365
|
logger.debug(`[GoogleVertexProvider] ✅ LOG_POINT_V009_VERTEX_SETTINGS_SUCCESS`, {
|
|
@@ -920,7 +919,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
920
919
|
projectId: projectValidation.projectId,
|
|
921
920
|
region: projectValidation.region,
|
|
922
921
|
});
|
|
923
|
-
const vertexAnthropicSettings = await createVertexAnthropicSettings();
|
|
922
|
+
const vertexAnthropicSettings = await createVertexAnthropicSettings(this.location);
|
|
924
923
|
// 7. Settings Validation
|
|
925
924
|
if (!vertexAnthropicSettings.project ||
|
|
926
925
|
!vertexAnthropicSettings.location) {
|
|
@@ -8,15 +8,17 @@ import type { SageMakerConfig, SageMakerModelConfig } from "./types.js";
|
|
|
8
8
|
/**
|
|
9
9
|
* Load and validate SageMaker configuration from environment variables
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
* 1.
|
|
13
|
-
* 2.
|
|
14
|
-
* 3.
|
|
11
|
+
* Region priority:
|
|
12
|
+
* 1. region parameter (highest priority)
|
|
13
|
+
* 2. SAGEMAKER_REGION environment variable
|
|
14
|
+
* 3. AWS_REGION environment variable
|
|
15
|
+
* 4. Default value "us-east-1" (lowest priority)
|
|
15
16
|
*
|
|
17
|
+
* @param region - Optional region parameter override
|
|
16
18
|
* @returns Validated SageMaker configuration
|
|
17
19
|
* @throws {Error} When required configuration is missing or invalid
|
|
18
20
|
*/
|
|
19
|
-
export declare function getSageMakerConfig(): SageMakerConfig;
|
|
21
|
+
export declare function getSageMakerConfig(region?: string): SageMakerConfig;
|
|
20
22
|
/**
|
|
21
23
|
* Load and validate SageMaker model configuration
|
|
22
24
|
*
|
|
@@ -45,21 +45,26 @@ const modelConfigCache = new Map();
|
|
|
45
45
|
/**
|
|
46
46
|
* Load and validate SageMaker configuration from environment variables
|
|
47
47
|
*
|
|
48
|
-
*
|
|
49
|
-
* 1.
|
|
50
|
-
* 2.
|
|
51
|
-
* 3.
|
|
48
|
+
* Region priority:
|
|
49
|
+
* 1. region parameter (highest priority)
|
|
50
|
+
* 2. SAGEMAKER_REGION environment variable
|
|
51
|
+
* 3. AWS_REGION environment variable
|
|
52
|
+
* 4. Default value "us-east-1" (lowest priority)
|
|
52
53
|
*
|
|
54
|
+
* @param region - Optional region parameter override
|
|
53
55
|
* @returns Validated SageMaker configuration
|
|
54
56
|
* @throws {Error} When required configuration is missing or invalid
|
|
55
57
|
*/
|
|
56
|
-
export function getSageMakerConfig() {
|
|
58
|
+
export function getSageMakerConfig(region) {
|
|
57
59
|
// Return cached config if available
|
|
58
60
|
if (configCache) {
|
|
59
61
|
return configCache;
|
|
60
62
|
}
|
|
61
63
|
const config = {
|
|
62
|
-
region:
|
|
64
|
+
region: region ||
|
|
65
|
+
process.env.SAGEMAKER_REGION ||
|
|
66
|
+
process.env.AWS_REGION ||
|
|
67
|
+
"us-east-1",
|
|
63
68
|
accessKeyId: process.env.AWS_ACCESS_KEY_ID || "",
|
|
64
69
|
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || "",
|
|
65
70
|
sessionToken: process.env.AWS_SESSION_TOKEN,
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import type { LanguageModelV1CallOptions } from "ai";
|
|
2
|
+
import type { TokenUsage } from "./analytics.js";
|
|
3
|
+
import type { GenerateResult } from "./generateTypes.js";
|
|
4
|
+
import type { ToolExecution } from "./tools.js";
|
|
5
|
+
/**
|
|
6
|
+
* Represents the analysis of the user's query intent.
|
|
7
|
+
* This provides a basic understanding of what the user is trying to achieve.
|
|
8
|
+
*/
|
|
9
|
+
export interface QueryIntentAnalysis {
|
|
10
|
+
/** The type of query, e.g., asking a question or giving a command. */
|
|
11
|
+
type: "question" | "command" | "greeting" | "unknown";
|
|
12
|
+
/** The estimated complexity of the query. */
|
|
13
|
+
complexity: "low" | "medium" | "high";
|
|
14
|
+
/** Whether the query likely required the use of tools to be answered correctly. */
|
|
15
|
+
shouldHaveUsedTools: boolean;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Represents a single turn in an enhanced conversation history,
|
|
19
|
+
* including tool executions and evaluations for richer context.
|
|
20
|
+
*/
|
|
21
|
+
export interface EnhancedConversationTurn {
|
|
22
|
+
/** The role of the speaker, either 'user' or 'assistant'. */
|
|
23
|
+
role: "user" | "assistant";
|
|
24
|
+
/** The content of the message. */
|
|
25
|
+
content: string;
|
|
26
|
+
/** The timestamp of the message. */
|
|
27
|
+
timestamp: string;
|
|
28
|
+
/** Any tools that were executed as part of this turn. */
|
|
29
|
+
toolExecutions?: ToolExecution[];
|
|
30
|
+
/** The evaluation result for this turn, if applicable. */
|
|
31
|
+
evaluation?: EvaluationResult;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Contains all the rich context needed for a thorough, RAGAS-style evaluation.
|
|
35
|
+
* This object is constructed by the `ContextBuilder` and used by the `RAGASEvaluator`.
|
|
36
|
+
*/
|
|
37
|
+
export interface EnhancedEvaluationContext {
|
|
38
|
+
/** The original user query. */
|
|
39
|
+
userQuery: string;
|
|
40
|
+
/** An analysis of the user's query intent. */
|
|
41
|
+
queryAnalysis: QueryIntentAnalysis;
|
|
42
|
+
/** The AI's response that is being evaluated. */
|
|
43
|
+
aiResponse: string;
|
|
44
|
+
/** The AI provider that generated the response. */
|
|
45
|
+
provider: string;
|
|
46
|
+
/** The specific model that generated the response. */
|
|
47
|
+
model: string;
|
|
48
|
+
/** The parameters used for the generation call. */
|
|
49
|
+
generationParams: {
|
|
50
|
+
temperature?: number;
|
|
51
|
+
maxTokens?: number;
|
|
52
|
+
systemPrompt?: string;
|
|
53
|
+
};
|
|
54
|
+
/** A list of tools that were executed. */
|
|
55
|
+
toolExecutions: ToolExecution[];
|
|
56
|
+
/** The history of the conversation leading up to this turn. */
|
|
57
|
+
conversationHistory: EnhancedConversationTurn[];
|
|
58
|
+
/** The response time of the AI in milliseconds. */
|
|
59
|
+
responseTime: number;
|
|
60
|
+
/** The token usage for the generation. */
|
|
61
|
+
tokenUsage: TokenUsage;
|
|
62
|
+
/** The results of any previous evaluation attempts for this response. */
|
|
63
|
+
previousEvaluations?: EvaluationResult[];
|
|
64
|
+
/** The current attempt number for this evaluation (1-based). */
|
|
65
|
+
attemptNumber: number;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Represents the result of a single evaluation attempt, based on RAGAS principles.
|
|
69
|
+
*/
|
|
70
|
+
export interface EvaluationResult {
|
|
71
|
+
/** The final, overall score for the response, typically from 1 to 10. */
|
|
72
|
+
finalScore: number;
|
|
73
|
+
/** How well the response addresses the user's query. */
|
|
74
|
+
relevanceScore: number;
|
|
75
|
+
/** The factual accuracy of the information in the response. */
|
|
76
|
+
accuracyScore: number;
|
|
77
|
+
/** How completely the response answers the user's query. */
|
|
78
|
+
completenessScore: number;
|
|
79
|
+
/** Whether the final score meets the passing threshold. */
|
|
80
|
+
isPassing: boolean;
|
|
81
|
+
/** Constructive response from the judge LLM on how to improve the response. */
|
|
82
|
+
reasoning: string;
|
|
83
|
+
/** Specific suggestions for improving the response. */
|
|
84
|
+
suggestedImprovements: string;
|
|
85
|
+
/** The raw, unparsed response from the judge LLM. */
|
|
86
|
+
rawEvaluationResponse: string;
|
|
87
|
+
/** The model used to perform the evaluation. */
|
|
88
|
+
evaluationModel: string;
|
|
89
|
+
/** The time taken for the evaluation in milliseconds. */
|
|
90
|
+
evaluationTime: number;
|
|
91
|
+
/** The attempt number for this evaluation. */
|
|
92
|
+
attemptNumber: number;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Provides detailed information when a response fails quality assurance checks.
|
|
96
|
+
*/
|
|
97
|
+
export interface QualityErrorDetails {
|
|
98
|
+
/** The history of all evaluation attempts for this response. */
|
|
99
|
+
evaluationHistory: EvaluationResult[];
|
|
100
|
+
/** The final score of the last attempt. */
|
|
101
|
+
finalScore: number;
|
|
102
|
+
/** The total number of evaluation attempts made. */
|
|
103
|
+
attempts: number;
|
|
104
|
+
/** A summary message of the failure. */
|
|
105
|
+
message: string;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Configuration for the main `Evaluator` class.
|
|
109
|
+
*/
|
|
110
|
+
export interface EvaluationConfig {
|
|
111
|
+
/** The minimum score (1-10) for a response to be considered passing. */
|
|
112
|
+
threshold?: number;
|
|
113
|
+
/** The evaluation strategy to use. Currently only 'ragas' is supported. */
|
|
114
|
+
evaluationStrategy?: "ragas" | "custom";
|
|
115
|
+
/** The model to use for the LLM-as-judge evaluation. */
|
|
116
|
+
evaluationModel?: string;
|
|
117
|
+
/** The maximum number of evaluation attempts before failing. */
|
|
118
|
+
maxAttempts?: number;
|
|
119
|
+
/** The provider to use for the evaluation model. */
|
|
120
|
+
provider?: string;
|
|
121
|
+
/** A custom evaluator function to override the default behavior. */
|
|
122
|
+
customEvaluator?: (options: LanguageModelV1CallOptions, result: GenerateResult) => Promise<{
|
|
123
|
+
evaluationResult: EvaluationResult;
|
|
124
|
+
evalContext: EnhancedEvaluationContext;
|
|
125
|
+
}>;
|
|
126
|
+
/** The score below which a response is considered off-topic. */
|
|
127
|
+
offTopicThreshold?: number;
|
|
128
|
+
/** The score below which a failing response is considered a high severity alert. */
|
|
129
|
+
highSeverityThreshold?: number;
|
|
130
|
+
/** An optional function to generate custom evaluation prompts. */
|
|
131
|
+
promptGenerator?: GetPromptFunction;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* A function that generates the main body of an evaluation prompt.
|
|
135
|
+
*/
|
|
136
|
+
export type GetPromptFunction = (context: {
|
|
137
|
+
userQuery: string;
|
|
138
|
+
history: string;
|
|
139
|
+
tools: string;
|
|
140
|
+
retryInfo: string;
|
|
141
|
+
aiResponse: string;
|
|
142
|
+
}) => string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -22,6 +22,7 @@ export type GenerateOptions = {
|
|
|
22
22
|
};
|
|
23
23
|
provider?: AIProviderName | string;
|
|
24
24
|
model?: string;
|
|
25
|
+
region?: string;
|
|
25
26
|
temperature?: number;
|
|
26
27
|
maxTokens?: number;
|
|
27
28
|
systemPrompt?: string;
|
|
@@ -143,6 +144,7 @@ export type TextGenerationOptions = {
|
|
|
143
144
|
};
|
|
144
145
|
provider?: AIProviderName;
|
|
145
146
|
model?: string;
|
|
147
|
+
region?: string;
|
|
146
148
|
temperature?: number;
|
|
147
149
|
maxTokens?: number;
|
|
148
150
|
systemPrompt?: string;
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type { LanguageModelV1Middleware } from "ai";
|
|
2
2
|
import type { JsonValue } from "../types/common.js";
|
|
3
|
+
import type { EvaluationData } from "./evaluation.js";
|
|
4
|
+
import type { GetPromptFunction } from "./evaluationTypes.js";
|
|
3
5
|
/**
|
|
4
6
|
* Metadata interface for NeuroLink middleware
|
|
5
7
|
* Provides additional information about middleware without affecting execution
|
|
@@ -33,7 +35,7 @@ export interface MiddlewareConfig {
|
|
|
33
35
|
/** Whether the middleware is enabled */
|
|
34
36
|
enabled?: boolean;
|
|
35
37
|
/** Middleware-specific configuration */
|
|
36
|
-
config?: Record<string,
|
|
38
|
+
config?: Record<string, unknown>;
|
|
37
39
|
/** Conditions under which to apply this middleware */
|
|
38
40
|
conditions?: MiddlewareConditions;
|
|
39
41
|
}
|
|
@@ -108,7 +110,7 @@ export interface MiddlewareChainStats {
|
|
|
108
110
|
/**
|
|
109
111
|
* Built-in middleware types
|
|
110
112
|
*/
|
|
111
|
-
export type BuiltInMiddlewareType = "analytics" | "guardrails" | "logging" | "caching" | "rateLimit" | "retry" | "timeout";
|
|
113
|
+
export type BuiltInMiddlewareType = "analytics" | "guardrails" | "logging" | "caching" | "rateLimit" | "retry" | "timeout" | "autoEvaluation";
|
|
112
114
|
/**
|
|
113
115
|
* Middleware preset configurations
|
|
114
116
|
*/
|
|
@@ -144,3 +146,27 @@ export interface MiddlewareFactoryOptions {
|
|
|
144
146
|
collectStats?: boolean;
|
|
145
147
|
};
|
|
146
148
|
}
|
|
149
|
+
/**
|
|
150
|
+
* Configuration for the Auto-Evaluation Middleware.
|
|
151
|
+
*/
|
|
152
|
+
export interface AutoEvaluationConfig {
|
|
153
|
+
/** The minimum score (1-10) for a response to be considered passing. */
|
|
154
|
+
threshold?: number;
|
|
155
|
+
/** The maximum number of retry attempts before failing. */
|
|
156
|
+
maxRetries?: number;
|
|
157
|
+
/** The model to use for the LLM-as-judge evaluation. */
|
|
158
|
+
evaluationModel?: string;
|
|
159
|
+
/**
|
|
160
|
+
* If true, the middleware will wait for the evaluation to complete before returning.
|
|
161
|
+
* If the evaluation fails, it will throw an error. Defaults to true.
|
|
162
|
+
*/
|
|
163
|
+
blocking?: boolean;
|
|
164
|
+
/** A callback function to be invoked with the evaluation result. */
|
|
165
|
+
onEvaluationComplete?: (evaluation: EvaluationData) => void | Promise<void>;
|
|
166
|
+
/** The score below which a response is considered off-topic. */
|
|
167
|
+
offTopicThreshold?: number;
|
|
168
|
+
/** The score below which a failing response is considered a high severity alert. */
|
|
169
|
+
highSeverityThreshold?: number;
|
|
170
|
+
promptGenerator?: GetPromptFunction;
|
|
171
|
+
provider?: string;
|
|
172
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the Auto-Evaluation Middleware for ensuring response quality.
|
|
3
|
+
*/
|
|
4
|
+
import type { NeuroLinkMiddleware, AutoEvaluationConfig } from "../../types/middlewareTypes.js";
|
|
5
|
+
/**
|
|
6
|
+
* Creates the Auto-Evaluation middleware, which intercepts generation requests
|
|
7
|
+
* to evaluate the quality of the response. If the response quality is below a
|
|
8
|
+
* configured threshold, it can trigger retries with feedback.
|
|
9
|
+
*
|
|
10
|
+
* @param config - Configuration for the auto-evaluation middleware.
|
|
11
|
+
* @returns A `NeuroLinkMiddleware` object.
|
|
12
|
+
*/
|
|
13
|
+
export declare function createAutoEvaluationMiddleware(config?: AutoEvaluationConfig): NeuroLinkMiddleware;
|
|
14
|
+
export default createAutoEvaluationMiddleware;
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the Auto-Evaluation Middleware for ensuring response quality.
|
|
3
|
+
*/
|
|
4
|
+
import { Evaluator } from "../../evaluation/index.js";
|
|
5
|
+
import { logger } from "../../utils/logger.js";
|
|
6
|
+
/**
|
|
7
|
+
* Creates the Auto-Evaluation middleware, which intercepts generation requests
|
|
8
|
+
* to evaluate the quality of the response. If the response quality is below a
|
|
9
|
+
* configured threshold, it can trigger retries with feedback.
|
|
10
|
+
*
|
|
11
|
+
* @param config - Configuration for the auto-evaluation middleware.
|
|
12
|
+
* @returns A `NeuroLinkMiddleware` object.
|
|
13
|
+
*/
|
|
14
|
+
export function createAutoEvaluationMiddleware(config = {}) {
|
|
15
|
+
const metadata = {
|
|
16
|
+
id: "autoEvaluation",
|
|
17
|
+
name: "Auto Evaluation",
|
|
18
|
+
description: "Automatically evaluates response quality and retries if needed.",
|
|
19
|
+
priority: 90,
|
|
20
|
+
defaultEnabled: false, // Should be explicitly enabled
|
|
21
|
+
};
|
|
22
|
+
logger.debug("Auto-Evaluation Middleware Config:", config);
|
|
23
|
+
const middleware = {
|
|
24
|
+
wrapGenerate: async ({ doGenerate, params }) => {
|
|
25
|
+
const options = params;
|
|
26
|
+
const rawResult = await doGenerate();
|
|
27
|
+
const result = {
|
|
28
|
+
...rawResult,
|
|
29
|
+
content: rawResult.text ?? "",
|
|
30
|
+
usage: {
|
|
31
|
+
input: rawResult.usage.promptTokens,
|
|
32
|
+
output: rawResult.usage.completionTokens,
|
|
33
|
+
total: rawResult.usage.promptTokens + rawResult.usage.completionTokens,
|
|
34
|
+
},
|
|
35
|
+
toolCalls: rawResult.toolCalls?.map((tc) => {
|
|
36
|
+
let parsedArgs = tc.args;
|
|
37
|
+
if (typeof tc.args === "string") {
|
|
38
|
+
try {
|
|
39
|
+
parsedArgs = JSON.parse(tc.args);
|
|
40
|
+
}
|
|
41
|
+
catch (e) {
|
|
42
|
+
logger.warn(`Failed to parse tool call args for tool ${tc.toolName}:`, e);
|
|
43
|
+
parsedArgs = tc.args; // Fallback to original string if parsing fails
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
...tc,
|
|
48
|
+
args: parsedArgs,
|
|
49
|
+
};
|
|
50
|
+
}),
|
|
51
|
+
};
|
|
52
|
+
const isBlocking = config.blocking !== false;
|
|
53
|
+
if (isBlocking) {
|
|
54
|
+
const evaluationResult = await performEvaluation(config, options, result);
|
|
55
|
+
return {
|
|
56
|
+
...rawResult,
|
|
57
|
+
evaluationResult,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
performEvaluation(config, options, result).catch((err) => {
|
|
62
|
+
logger.error("Non-blocking auto-evaluation error:", err);
|
|
63
|
+
});
|
|
64
|
+
return rawResult;
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
wrapStream: async ({ doStream, params }) => {
|
|
68
|
+
const options = params;
|
|
69
|
+
const rawResult = await doStream();
|
|
70
|
+
const [streamForUser, streamForEvaluation] = rawResult.stream.tee();
|
|
71
|
+
// Non-blocking evaluation for streams
|
|
72
|
+
consumeAndEvaluateStream(config, options, streamForEvaluation).catch((err) => {
|
|
73
|
+
logger.error("Non-blocking stream auto-evaluation error:", err);
|
|
74
|
+
});
|
|
75
|
+
return {
|
|
76
|
+
...rawResult,
|
|
77
|
+
stream: streamForUser,
|
|
78
|
+
};
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
return {
|
|
82
|
+
...middleware,
|
|
83
|
+
metadata,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* A common function to perform the evaluation logic.
|
|
88
|
+
* @param config The middleware configuration.
|
|
89
|
+
* @param options The text generation options.
|
|
90
|
+
* @param result The generation result.
|
|
91
|
+
*/
|
|
92
|
+
async function performEvaluation(config, options, result) {
|
|
93
|
+
const isBlocking = config.blocking !== false;
|
|
94
|
+
const threshold = config.threshold ??
|
|
95
|
+
(Number(process.env.NEUROLINK_EVALUATION_THRESHOLD) || 7);
|
|
96
|
+
try {
|
|
97
|
+
const evaluator = new Evaluator({
|
|
98
|
+
threshold,
|
|
99
|
+
provider: config.provider,
|
|
100
|
+
promptGenerator: config.promptGenerator,
|
|
101
|
+
evaluationModel: config.evaluationModel,
|
|
102
|
+
});
|
|
103
|
+
const evaluationResult = await evaluator.evaluate(options, result, threshold, config);
|
|
104
|
+
if (config.onEvaluationComplete) {
|
|
105
|
+
await config.onEvaluationComplete(evaluationResult);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
logger.error("Error during auto-evaluation:", error);
|
|
110
|
+
if (isBlocking) {
|
|
111
|
+
throw error;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Consumes a stream to build the full response and then evaluates it.
|
|
117
|
+
* @param config The middleware configuration.
|
|
118
|
+
* @param options The generation options.
|
|
119
|
+
* @param stream The stream to consume.
|
|
120
|
+
*/
|
|
121
|
+
async function consumeAndEvaluateStream(config, options, stream) {
|
|
122
|
+
let fullText = "";
|
|
123
|
+
let usage;
|
|
124
|
+
const toolCalls = [];
|
|
125
|
+
const reader = stream.getReader();
|
|
126
|
+
try {
|
|
127
|
+
while (true) {
|
|
128
|
+
const { done, value } = await reader.read();
|
|
129
|
+
if (done) {
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
switch (value.type) {
|
|
133
|
+
case "text-delta":
|
|
134
|
+
fullText += value.textDelta;
|
|
135
|
+
break;
|
|
136
|
+
case "tool-call":
|
|
137
|
+
{
|
|
138
|
+
let parsedArgs;
|
|
139
|
+
try {
|
|
140
|
+
parsedArgs = JSON.parse(value.args);
|
|
141
|
+
}
|
|
142
|
+
catch (e) {
|
|
143
|
+
logger.warn(`Failed to parse tool call args for tool ${value.toolName}:`, e);
|
|
144
|
+
// In case of parsing failure, we can't assign a string.
|
|
145
|
+
// Let's use an object with the raw string to maintain type safety.
|
|
146
|
+
parsedArgs = { raw: value.args };
|
|
147
|
+
}
|
|
148
|
+
toolCalls.push({
|
|
149
|
+
toolCallId: value.toolCallId,
|
|
150
|
+
toolName: value.toolName,
|
|
151
|
+
args: parsedArgs,
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
break;
|
|
155
|
+
case "finish":
|
|
156
|
+
usage = {
|
|
157
|
+
input: value.usage.promptTokens,
|
|
158
|
+
output: value.usage.completionTokens,
|
|
159
|
+
total: value.usage.promptTokens + value.usage.completionTokens,
|
|
160
|
+
};
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
finally {
|
|
166
|
+
reader.releaseLock();
|
|
167
|
+
}
|
|
168
|
+
const result = {
|
|
169
|
+
content: fullText,
|
|
170
|
+
usage,
|
|
171
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
172
|
+
};
|
|
173
|
+
// For streams, evaluation is always non-blocking from the user's perspective.
|
|
174
|
+
if (config.blocking) {
|
|
175
|
+
logger.warn("Auto-evaluation 'blocking' mode is not supported for streaming responses. Evaluation will proceed non-blockingly.");
|
|
176
|
+
}
|
|
177
|
+
// Create a new config object to force non-blocking behavior for the evaluation function
|
|
178
|
+
const nonBlockingConfig = { ...config, blocking: false };
|
|
179
|
+
await performEvaluation(nonBlockingConfig, options, result);
|
|
180
|
+
}
|
|
181
|
+
export default createAutoEvaluationMiddleware;
|