@juspay/neurolink 7.44.0 → 7.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/cli/commands/config.d.ts +2 -2
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/core/factory.d.ts +3 -1
- package/dist/core/factory.js +5 -3
- package/dist/evaluation/contextBuilder.d.ts +48 -0
- package/dist/evaluation/contextBuilder.js +134 -0
- package/dist/evaluation/index.d.ts +36 -0
- package/dist/evaluation/index.js +61 -0
- package/dist/evaluation/prompts.d.ts +22 -0
- package/dist/evaluation/prompts.js +73 -0
- package/dist/evaluation/ragasEvaluator.d.ts +28 -0
- package/dist/evaluation/ragasEvaluator.js +90 -0
- package/dist/evaluation/retryManager.d.ts +40 -0
- package/dist/evaluation/retryManager.js +78 -0
- package/dist/evaluation/scoring.d.ts +16 -0
- package/dist/evaluation/scoring.js +35 -0
- package/dist/factories/providerFactory.d.ts +3 -3
- package/dist/factories/providerFactory.js +3 -3
- package/dist/factories/providerRegistry.js +6 -6
- package/dist/lib/core/factory.d.ts +3 -1
- package/dist/lib/core/factory.js +5 -3
- package/dist/lib/evaluation/contextBuilder.d.ts +48 -0
- package/dist/lib/evaluation/contextBuilder.js +134 -0
- package/dist/lib/evaluation/index.d.ts +36 -0
- package/dist/lib/evaluation/index.js +61 -0
- package/dist/lib/evaluation/prompts.d.ts +22 -0
- package/dist/lib/evaluation/prompts.js +73 -0
- package/dist/lib/evaluation/ragasEvaluator.d.ts +28 -0
- package/dist/lib/evaluation/ragasEvaluator.js +90 -0
- package/dist/lib/evaluation/retryManager.d.ts +40 -0
- package/dist/lib/evaluation/retryManager.js +78 -0
- package/dist/lib/evaluation/scoring.d.ts +16 -0
- package/dist/lib/evaluation/scoring.js +35 -0
- package/dist/lib/factories/providerFactory.d.ts +3 -3
- package/dist/lib/factories/providerFactory.js +3 -3
- package/dist/lib/factories/providerRegistry.js +6 -6
- package/dist/lib/middleware/builtin/autoEvaluation.d.ts +14 -0
- package/dist/lib/middleware/builtin/autoEvaluation.js +181 -0
- package/dist/lib/middleware/factory.js +6 -0
- package/dist/lib/neurolink.js +7 -3
- package/dist/lib/providers/amazonBedrock.d.ts +2 -1
- package/dist/lib/providers/amazonBedrock.js +6 -4
- package/dist/lib/providers/amazonSagemaker.d.ts +1 -1
- package/dist/lib/providers/amazonSagemaker.js +2 -2
- package/dist/lib/providers/googleVertex.d.ts +1 -1
- package/dist/lib/providers/googleVertex.js +9 -10
- package/dist/lib/providers/sagemaker/config.d.ts +7 -5
- package/dist/lib/providers/sagemaker/config.js +11 -6
- package/dist/lib/types/evaluation.d.ts +2 -0
- package/dist/lib/types/evaluationTypes.d.ts +142 -0
- package/dist/lib/types/evaluationTypes.js +1 -0
- package/dist/lib/types/generateTypes.d.ts +2 -0
- package/dist/lib/types/middlewareTypes.d.ts +28 -2
- package/dist/lib/types/streamTypes.d.ts +1 -0
- package/dist/middleware/builtin/autoEvaluation.d.ts +14 -0
- package/dist/middleware/builtin/autoEvaluation.js +181 -0
- package/dist/middleware/factory.js +6 -0
- package/dist/neurolink.js +7 -3
- package/dist/providers/amazonBedrock.d.ts +2 -1
- package/dist/providers/amazonBedrock.js +6 -4
- package/dist/providers/amazonSagemaker.d.ts +1 -1
- package/dist/providers/amazonSagemaker.js +2 -2
- package/dist/providers/googleVertex.d.ts +1 -1
- package/dist/providers/googleVertex.js +9 -10
- package/dist/providers/sagemaker/config.d.ts +7 -5
- package/dist/providers/sagemaker/config.js +11 -6
- package/dist/types/evaluation.d.ts +2 -0
- package/dist/types/evaluationTypes.d.ts +142 -0
- package/dist/types/evaluationTypes.js +1 -0
- package/dist/types/generateTypes.d.ts +2 -0
- package/dist/types/middlewareTypes.d.ts +28 -2
- package/dist/types/streamTypes.d.ts +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { AIProviderFactory } from "../core/factory.js";
|
|
2
|
+
import { PromptBuilder } from "./prompts.js";
|
|
3
|
+
import { logger } from "../utils/logger.js";
|
|
4
|
+
/**
|
|
5
|
+
* Implements a RAGAS-style evaluator that uses a "judge" LLM to score the
|
|
6
|
+
* quality of an AI response based on rich, contextual information.
|
|
7
|
+
*/
|
|
8
|
+
export class RAGASEvaluator {
|
|
9
|
+
evaluationModel;
|
|
10
|
+
providerName;
|
|
11
|
+
threshold;
|
|
12
|
+
promptBuilder;
|
|
13
|
+
promptGenerator;
|
|
14
|
+
constructor(evaluationModel, providerName, threshold, promptGenerator) {
|
|
15
|
+
this.evaluationModel =
|
|
16
|
+
evaluationModel ||
|
|
17
|
+
process.env.NEUROLINK_RAGAS_EVALUATION_MODEL ||
|
|
18
|
+
"gemini-1.5-flash";
|
|
19
|
+
this.providerName =
|
|
20
|
+
providerName ||
|
|
21
|
+
process.env.NEUROLINK_RAGAS_EVALUATION_PROVIDER ||
|
|
22
|
+
"vertex";
|
|
23
|
+
this.threshold =
|
|
24
|
+
threshold || Number(process.env.NEUROLINK_EVALUATION_THRESHOLD) || 7;
|
|
25
|
+
this.promptBuilder = new PromptBuilder();
|
|
26
|
+
this.promptGenerator = promptGenerator;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Evaluates an AI-generated response using a model-based approach.
|
|
30
|
+
*
|
|
31
|
+
* @param context The rich, contextual information for the evaluation.
|
|
32
|
+
* @returns A promise that resolves to a detailed `EvaluationResult`.
|
|
33
|
+
*/
|
|
34
|
+
async evaluate(context) {
|
|
35
|
+
const startTime = Date.now();
|
|
36
|
+
const prompt = this.promptBuilder.buildEvaluationPrompt(context, this.promptGenerator);
|
|
37
|
+
const provider = await AIProviderFactory.createProvider(this.providerName, this.evaluationModel);
|
|
38
|
+
const result = await provider.generate({
|
|
39
|
+
input: { text: prompt },
|
|
40
|
+
});
|
|
41
|
+
if (!result) {
|
|
42
|
+
throw new Error("Evaluation generation failed to return a result.");
|
|
43
|
+
}
|
|
44
|
+
const rawEvaluationResponse = result.content;
|
|
45
|
+
const parsedResult = this.parseEvaluationResponse(rawEvaluationResponse);
|
|
46
|
+
const evaluationTime = Date.now() - startTime;
|
|
47
|
+
const finalResult = {
|
|
48
|
+
...parsedResult,
|
|
49
|
+
isPassing: parsedResult.finalScore >= this.threshold, // This will be recalculated, but is needed for the type
|
|
50
|
+
evaluationModel: this.evaluationModel,
|
|
51
|
+
evaluationTime,
|
|
52
|
+
attemptNumber: context.attemptNumber,
|
|
53
|
+
rawEvaluationResponse,
|
|
54
|
+
};
|
|
55
|
+
return finalResult;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Parses the raw JSON string from the judge LLM into a structured `EvaluationResult` object.
|
|
59
|
+
* It includes error handling to gracefully manage malformed JSON.
|
|
60
|
+
*
|
|
61
|
+
* @param rawResponse The raw string response from the evaluation model.
|
|
62
|
+
* @returns A structured object containing the evaluation scores and feedback.
|
|
63
|
+
*/
|
|
64
|
+
parseEvaluationResponse(rawResponse) {
|
|
65
|
+
try {
|
|
66
|
+
const cleanedResponse = rawResponse.replace(/```json\n|```/g, "").trim();
|
|
67
|
+
const parsed = JSON.parse(cleanedResponse);
|
|
68
|
+
logger.debug("Parsed evaluation response for RAGAS Evaluator:", parsed);
|
|
69
|
+
return {
|
|
70
|
+
relevanceScore: Number(parsed.relevanceScore) || 0,
|
|
71
|
+
accuracyScore: Number(parsed.accuracyScore) || 0,
|
|
72
|
+
completenessScore: Number(parsed.completenessScore) || 0,
|
|
73
|
+
finalScore: Number(parsed.finalScore) || 0,
|
|
74
|
+
suggestedImprovements: parsed.suggestedImprovements || "No suggestions provided.",
|
|
75
|
+
reasoning: parsed.reasoning || "No reasoning provided.",
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
logger.error("Failed to parse evaluation response:", error);
|
|
80
|
+
return {
|
|
81
|
+
relevanceScore: 0,
|
|
82
|
+
accuracyScore: 0,
|
|
83
|
+
completenessScore: 0,
|
|
84
|
+
finalScore: 0,
|
|
85
|
+
reasoning: "Error parsing evaluation response.",
|
|
86
|
+
suggestedImprovements: "Error parsing evaluation response.",
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the RetryManager class for handling evaluation retries.
|
|
3
|
+
*/
|
|
4
|
+
import type { EvaluationResult } from "../types/evaluationTypes.js";
|
|
5
|
+
import type { TextGenerationOptions } from "../types/generateTypes.js";
|
|
6
|
+
/**
|
|
7
|
+
* Manages the retry logic for the auto-evaluation middleware. It decides if a
|
|
8
|
+
* retry is warranted based on the evaluation score and prepares the options
|
|
9
|
+
* for the next generation attempt by incorporating feedback into the prompt.
|
|
10
|
+
*/
|
|
11
|
+
export declare class RetryManager {
|
|
12
|
+
private maxRetries;
|
|
13
|
+
constructor(maxRetries?: number);
|
|
14
|
+
/**
|
|
15
|
+
* Determines if a retry should be attempted based on the evaluation result.
|
|
16
|
+
*
|
|
17
|
+
* @param evaluation The `EvaluationResult` of the last attempt.
|
|
18
|
+
* @returns `true` if the response did not pass and the maximum number of retries has not been reached.
|
|
19
|
+
*/
|
|
20
|
+
shouldRetry(evaluation: EvaluationResult): boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Prepares the options for the next generation attempt by creating a new,
|
|
23
|
+
* improved prompt that includes feedback from the failed evaluation.
|
|
24
|
+
*
|
|
25
|
+
* @param originalOptions The original `TextGenerationOptions` from the user request.
|
|
26
|
+
* @param evaluation The `EvaluationResult` of the failed attempt.
|
|
27
|
+
* @returns A new `TextGenerationOptions` object with an improved prompt.
|
|
28
|
+
*/
|
|
29
|
+
prepareRetryOptions(originalOptions: TextGenerationOptions, evaluation: EvaluationResult): TextGenerationOptions;
|
|
30
|
+
/**
|
|
31
|
+
* Builds a new prompt for a retry attempt by incorporating feedback from the
|
|
32
|
+
* evaluation. The instructions become progressively more direct with each attempt.
|
|
33
|
+
*
|
|
34
|
+
* @param originalPrompt The user's original prompt.
|
|
35
|
+
* @param feedback The constructive feedback from the evaluation.
|
|
36
|
+
* @param attemptNumber The upcoming attempt number (e.g., 2 for the first retry).
|
|
37
|
+
* @returns A new, enhanced prompt string.
|
|
38
|
+
*/
|
|
39
|
+
private buildRetryPrompt;
|
|
40
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the RetryManager class for handling evaluation retries.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Manages the retry logic for the auto-evaluation middleware. It decides if a
|
|
6
|
+
* retry is warranted based on the evaluation score and prepares the options
|
|
7
|
+
* for the next generation attempt by incorporating feedback into the prompt.
|
|
8
|
+
*/
|
|
9
|
+
export class RetryManager {
|
|
10
|
+
maxRetries;
|
|
11
|
+
constructor(maxRetries = 2) {
|
|
12
|
+
// Total 3 attempts: 1 initial + 2 retries
|
|
13
|
+
this.maxRetries = maxRetries;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Determines if a retry should be attempted based on the evaluation result.
|
|
17
|
+
*
|
|
18
|
+
* @param evaluation The `EvaluationResult` of the last attempt.
|
|
19
|
+
* @returns `true` if the response did not pass and the maximum number of retries has not been reached.
|
|
20
|
+
*/
|
|
21
|
+
shouldRetry(evaluation) {
|
|
22
|
+
// Attempt number is 1-based. If attempt 1 fails, we can retry.
|
|
23
|
+
// If attempt 3 (maxRetries + 1) fails, we stop.
|
|
24
|
+
return !evaluation.isPassing && evaluation.attemptNumber <= this.maxRetries;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Prepares the options for the next generation attempt by creating a new,
|
|
28
|
+
* improved prompt that includes feedback from the failed evaluation.
|
|
29
|
+
*
|
|
30
|
+
* @param originalOptions The original `TextGenerationOptions` from the user request.
|
|
31
|
+
* @param evaluation The `EvaluationResult` of the failed attempt.
|
|
32
|
+
* @returns A new `TextGenerationOptions` object with an improved prompt.
|
|
33
|
+
*/
|
|
34
|
+
prepareRetryOptions(originalOptions, evaluation) {
|
|
35
|
+
const originalPrompt = originalOptions.prompt || originalOptions.input?.text || "";
|
|
36
|
+
const newPrompt = this.buildRetryPrompt(originalPrompt, evaluation.suggestedImprovements, evaluation.attemptNumber + 1);
|
|
37
|
+
// Return a new options object with the updated prompt
|
|
38
|
+
return {
|
|
39
|
+
...originalOptions,
|
|
40
|
+
prompt: newPrompt,
|
|
41
|
+
// Ensure input is not carried over if prompt is now the source of truth
|
|
42
|
+
input: undefined,
|
|
43
|
+
// Carry over the original prompt for context in subsequent retries if needed
|
|
44
|
+
originalPrompt: originalOptions.originalPrompt || originalPrompt,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Builds a new prompt for a retry attempt by incorporating feedback from the
|
|
49
|
+
* evaluation. The instructions become progressively more direct with each attempt.
|
|
50
|
+
*
|
|
51
|
+
* @param originalPrompt The user's original prompt.
|
|
52
|
+
* @param feedback The constructive feedback from the evaluation.
|
|
53
|
+
* @param attemptNumber The upcoming attempt number (e.g., 2 for the first retry).
|
|
54
|
+
* @returns A new, enhanced prompt string.
|
|
55
|
+
*/
|
|
56
|
+
buildRetryPrompt(originalPrompt, feedback, attemptNumber) {
|
|
57
|
+
let instruction = "";
|
|
58
|
+
switch (attemptNumber) {
|
|
59
|
+
case 2: // First retry
|
|
60
|
+
instruction = `The previous response was not satisfactory. Please improve it based on the following feedback: "${feedback}".`;
|
|
61
|
+
break;
|
|
62
|
+
case 3: // Second retry
|
|
63
|
+
instruction = `The last response still requires improvement. Pay close attention to this feedback: "${feedback}". You MUST address these points.`;
|
|
64
|
+
break;
|
|
65
|
+
default: // Final retry or unexpected attempt number
|
|
66
|
+
instruction = `This is the final attempt. You MUST address the following feedback to generate a satisfactory response: "${feedback}".`;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
return `
|
|
70
|
+
Original Request: ${originalPrompt}
|
|
71
|
+
|
|
72
|
+
**Correction Instructions:**
|
|
73
|
+
${instruction}
|
|
74
|
+
|
|
75
|
+
Generate a new, complete response that incorporates this feedback.
|
|
76
|
+
`;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Contains the logic for mapping raw evaluation results to the structured EvaluationData type.
|
|
3
|
+
*/
|
|
4
|
+
import type { EnhancedEvaluationContext, EvaluationResult } from "../types/evaluationTypes.js";
|
|
5
|
+
import type { EvaluationData } from "../types/evaluation.js";
|
|
6
|
+
/**
|
|
7
|
+
* Maps a raw `EvaluationResult` to the structured `EvaluationData` format.
|
|
8
|
+
* This includes calculating derived fields like `isOffTopic` and `alertSeverity`.
|
|
9
|
+
*
|
|
10
|
+
* @param result The raw `EvaluationResult` from the evaluator.
|
|
11
|
+
* @param threshold The score threshold to determine if the evaluation is passing.
|
|
12
|
+
* @param offTopicThreshold The score below which a response is considered off-topic.
|
|
13
|
+
* @param highSeverityThreshold The score below which a failing response is high severity.
|
|
14
|
+
* @returns A structured `EvaluationData` object.
|
|
15
|
+
*/
|
|
16
|
+
export declare function mapToEvaluationData(evalContext: EnhancedEvaluationContext, result: EvaluationResult, threshold: number, offTopicThreshold?: number, highSeverityThreshold?: number): EvaluationData;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Contains the logic for mapping raw evaluation results to the structured EvaluationData type.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Maps a raw `EvaluationResult` to the structured `EvaluationData` format.
|
|
6
|
+
* This includes calculating derived fields like `isOffTopic` and `alertSeverity`.
|
|
7
|
+
*
|
|
8
|
+
* @param result The raw `EvaluationResult` from the evaluator.
|
|
9
|
+
* @param threshold The score threshold to determine if the evaluation is passing.
|
|
10
|
+
* @param offTopicThreshold The score below which a response is considered off-topic.
|
|
11
|
+
* @param highSeverityThreshold The score below which a failing response is high severity.
|
|
12
|
+
* @returns A structured `EvaluationData` object.
|
|
13
|
+
*/
|
|
14
|
+
export function mapToEvaluationData(evalContext, result, threshold, offTopicThreshold = 5, highSeverityThreshold = 4) {
|
|
15
|
+
const isPassing = result.finalScore >= threshold;
|
|
16
|
+
return {
|
|
17
|
+
relevance: result.relevanceScore,
|
|
18
|
+
accuracy: result.accuracyScore,
|
|
19
|
+
completeness: result.completenessScore,
|
|
20
|
+
overall: result.finalScore,
|
|
21
|
+
isOffTopic: result.finalScore < offTopicThreshold,
|
|
22
|
+
alertSeverity: isPassing
|
|
23
|
+
? "none"
|
|
24
|
+
: result.finalScore < highSeverityThreshold
|
|
25
|
+
? "high"
|
|
26
|
+
: "medium",
|
|
27
|
+
reasoning: result.reasoning,
|
|
28
|
+
suggestedImprovements: result.suggestedImprovements,
|
|
29
|
+
evaluationModel: result.evaluationModel,
|
|
30
|
+
evaluationTime: result.evaluationTime,
|
|
31
|
+
evaluationAttempt: result.attemptNumber,
|
|
32
|
+
responseContent: evalContext.aiResponse,
|
|
33
|
+
queryContent: evalContext.userQuery,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
@@ -4,8 +4,8 @@ import type { UnknownRecord } from "../types/common.js";
|
|
|
4
4
|
* Provider constructor interface - supports both sync constructors and async factory functions
|
|
5
5
|
*/
|
|
6
6
|
type ProviderConstructor = {
|
|
7
|
-
new (modelName?: string, providerName?: string, sdk?: UnknownRecord): AIProvider;
|
|
8
|
-
} | ((modelName?: string, providerName?: string, sdk?: UnknownRecord) => Promise<AIProvider>);
|
|
7
|
+
new (modelName?: string, providerName?: string, sdk?: UnknownRecord, region?: string): AIProvider;
|
|
8
|
+
} | ((modelName?: string, providerName?: string, sdk?: UnknownRecord, region?: string) => Promise<AIProvider>);
|
|
9
9
|
/**
|
|
10
10
|
* Provider registration entry
|
|
11
11
|
*/
|
|
@@ -30,7 +30,7 @@ export declare class ProviderFactory {
|
|
|
30
30
|
/**
|
|
31
31
|
* Create a provider instance
|
|
32
32
|
*/
|
|
33
|
-
static createProvider(providerName: AIProviderName | string, modelName?: string, sdk?: UnknownRecord): Promise<AIProvider>;
|
|
33
|
+
static createProvider(providerName: AIProviderName | string, modelName?: string, sdk?: UnknownRecord, region?: string): Promise<AIProvider>;
|
|
34
34
|
/**
|
|
35
35
|
* Check if a provider is registered
|
|
36
36
|
*/
|
|
@@ -28,7 +28,7 @@ export class ProviderFactory {
|
|
|
28
28
|
/**
|
|
29
29
|
* Create a provider instance
|
|
30
30
|
*/
|
|
31
|
-
static async createProvider(providerName, modelName, sdk) {
|
|
31
|
+
static async createProvider(providerName, modelName, sdk, region) {
|
|
32
32
|
// Note: Providers are registered explicitly by ProviderRegistry to avoid circular dependencies
|
|
33
33
|
const normalizedName = providerName.toLowerCase();
|
|
34
34
|
const registration = this.providers.get(normalizedName);
|
|
@@ -54,7 +54,7 @@ export class ProviderFactory {
|
|
|
54
54
|
}
|
|
55
55
|
let result;
|
|
56
56
|
try {
|
|
57
|
-
const factoryResult = registration.constructor(model, providerName, sdk);
|
|
57
|
+
const factoryResult = registration.constructor(model, providerName, sdk, region);
|
|
58
58
|
// Handle both sync and async results
|
|
59
59
|
result =
|
|
60
60
|
factoryResult instanceof Promise
|
|
@@ -66,7 +66,7 @@ export class ProviderFactory {
|
|
|
66
66
|
registration.constructor.prototype.constructor ===
|
|
67
67
|
registration.constructor) {
|
|
68
68
|
try {
|
|
69
|
-
result = new registration.constructor(model, providerName, sdk);
|
|
69
|
+
result = new registration.constructor(model, providerName, sdk, region);
|
|
70
70
|
}
|
|
71
71
|
catch (constructorError) {
|
|
72
72
|
throw new Error(`Both factory function and constructor failed. Factory error: ${factoryError}. Constructor error: ${constructorError}`);
|
|
@@ -39,9 +39,9 @@ export class ProviderRegistry {
|
|
|
39
39
|
return new AnthropicProvider(modelName, sdk);
|
|
40
40
|
}, "claude-3-5-sonnet-20241022", ["claude", "anthropic"]);
|
|
41
41
|
// Register Amazon Bedrock provider
|
|
42
|
-
ProviderFactory.registerProvider(AIProviderName.BEDROCK, async (modelName, _providerName, sdk) => {
|
|
42
|
+
ProviderFactory.registerProvider(AIProviderName.BEDROCK, async (modelName, _providerName, sdk, region) => {
|
|
43
43
|
const { AmazonBedrockProvider } = await import("../providers/amazonBedrock.js");
|
|
44
|
-
return new AmazonBedrockProvider(modelName, sdk);
|
|
44
|
+
return new AmazonBedrockProvider(modelName, sdk, region);
|
|
45
45
|
}, undefined, // Let provider read BEDROCK_MODEL from .env
|
|
46
46
|
["bedrock", "aws"]);
|
|
47
47
|
// Register Azure OpenAI provider
|
|
@@ -54,9 +54,9 @@ export class ProviderRegistry {
|
|
|
54
54
|
process.env.AZURE_OPENAI_DEPLOYMENT_ID ||
|
|
55
55
|
"gpt-4o-mini", ["azure", "azureOpenai"]);
|
|
56
56
|
// Register Google Vertex AI provider
|
|
57
|
-
ProviderFactory.registerProvider(AIProviderName.VERTEX, async (modelName, providerName, sdk) => {
|
|
57
|
+
ProviderFactory.registerProvider(AIProviderName.VERTEX, async (modelName, providerName, sdk, region) => {
|
|
58
58
|
const { GoogleVertexProvider } = await import("../providers/googleVertex.js");
|
|
59
|
-
return new GoogleVertexProvider(modelName, providerName, sdk);
|
|
59
|
+
return new GoogleVertexProvider(modelName, providerName, sdk, region);
|
|
60
60
|
}, "claude-sonnet-4@20250514", ["vertex", "googleVertex"]);
|
|
61
61
|
// Register Hugging Face provider (Unified Router implementation)
|
|
62
62
|
ProviderFactory.registerProvider(AIProviderName.HUGGINGFACE, async (modelName) => {
|
|
@@ -85,9 +85,9 @@ export class ProviderRegistry {
|
|
|
85
85
|
}, process.env.OPENAI_COMPATIBLE_MODEL || undefined, // Enable auto-discovery when no model specified
|
|
86
86
|
["openai-compatible", "openrouter", "vllm", "compatible"]);
|
|
87
87
|
// Register Amazon SageMaker provider
|
|
88
|
-
ProviderFactory.registerProvider(AIProviderName.SAGEMAKER, async (modelName, _providerName, _sdk) => {
|
|
88
|
+
ProviderFactory.registerProvider(AIProviderName.SAGEMAKER, async (modelName, _providerName, _sdk, region) => {
|
|
89
89
|
const { AmazonSageMakerProvider } = await import("../providers/amazonSagemaker.js");
|
|
90
|
-
return new AmazonSageMakerProvider(modelName);
|
|
90
|
+
return new AmazonSageMakerProvider(modelName, region);
|
|
91
91
|
}, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
|
|
92
92
|
logger.debug("All providers registered successfully");
|
|
93
93
|
this.registered = true;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the Auto-Evaluation Middleware for ensuring response quality.
|
|
3
|
+
*/
|
|
4
|
+
import type { NeuroLinkMiddleware, AutoEvaluationConfig } from "../../types/middlewareTypes.js";
|
|
5
|
+
/**
|
|
6
|
+
* Creates the Auto-Evaluation middleware, which intercepts generation requests
|
|
7
|
+
* to evaluate the quality of the response. If the response quality is below a
|
|
8
|
+
* configured threshold, it can trigger retries with feedback.
|
|
9
|
+
*
|
|
10
|
+
* @param config - Configuration for the auto-evaluation middleware.
|
|
11
|
+
* @returns A `NeuroLinkMiddleware` object.
|
|
12
|
+
*/
|
|
13
|
+
export declare function createAutoEvaluationMiddleware(config?: AutoEvaluationConfig): NeuroLinkMiddleware;
|
|
14
|
+
export default createAutoEvaluationMiddleware;
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Implements the Auto-Evaluation Middleware for ensuring response quality.
|
|
3
|
+
*/
|
|
4
|
+
import { Evaluator } from "../../evaluation/index.js";
|
|
5
|
+
import { logger } from "../../utils/logger.js";
|
|
6
|
+
/**
|
|
7
|
+
* Creates the Auto-Evaluation middleware, which intercepts generation requests
|
|
8
|
+
* to evaluate the quality of the response. If the response quality is below a
|
|
9
|
+
* configured threshold, it can trigger retries with feedback.
|
|
10
|
+
*
|
|
11
|
+
* @param config - Configuration for the auto-evaluation middleware.
|
|
12
|
+
* @returns A `NeuroLinkMiddleware` object.
|
|
13
|
+
*/
|
|
14
|
+
export function createAutoEvaluationMiddleware(config = {}) {
|
|
15
|
+
const metadata = {
|
|
16
|
+
id: "autoEvaluation",
|
|
17
|
+
name: "Auto Evaluation",
|
|
18
|
+
description: "Automatically evaluates response quality and retries if needed.",
|
|
19
|
+
priority: 90,
|
|
20
|
+
defaultEnabled: false, // Should be explicitly enabled
|
|
21
|
+
};
|
|
22
|
+
logger.debug("Auto-Evaluation Middleware Config:", config);
|
|
23
|
+
const middleware = {
|
|
24
|
+
wrapGenerate: async ({ doGenerate, params }) => {
|
|
25
|
+
const options = params;
|
|
26
|
+
const rawResult = await doGenerate();
|
|
27
|
+
const result = {
|
|
28
|
+
...rawResult,
|
|
29
|
+
content: rawResult.text ?? "",
|
|
30
|
+
usage: {
|
|
31
|
+
input: rawResult.usage.promptTokens,
|
|
32
|
+
output: rawResult.usage.completionTokens,
|
|
33
|
+
total: rawResult.usage.promptTokens + rawResult.usage.completionTokens,
|
|
34
|
+
},
|
|
35
|
+
toolCalls: rawResult.toolCalls?.map((tc) => {
|
|
36
|
+
let parsedArgs = tc.args;
|
|
37
|
+
if (typeof tc.args === "string") {
|
|
38
|
+
try {
|
|
39
|
+
parsedArgs = JSON.parse(tc.args);
|
|
40
|
+
}
|
|
41
|
+
catch (e) {
|
|
42
|
+
logger.warn(`Failed to parse tool call args for tool ${tc.toolName}:`, e);
|
|
43
|
+
parsedArgs = tc.args; // Fallback to original string if parsing fails
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
...tc,
|
|
48
|
+
args: parsedArgs,
|
|
49
|
+
};
|
|
50
|
+
}),
|
|
51
|
+
};
|
|
52
|
+
const isBlocking = config.blocking !== false;
|
|
53
|
+
if (isBlocking) {
|
|
54
|
+
const evaluationResult = await performEvaluation(config, options, result);
|
|
55
|
+
return {
|
|
56
|
+
...rawResult,
|
|
57
|
+
evaluationResult,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
performEvaluation(config, options, result).catch((err) => {
|
|
62
|
+
logger.error("Non-blocking auto-evaluation error:", err);
|
|
63
|
+
});
|
|
64
|
+
return rawResult;
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
wrapStream: async ({ doStream, params }) => {
|
|
68
|
+
const options = params;
|
|
69
|
+
const rawResult = await doStream();
|
|
70
|
+
const [streamForUser, streamForEvaluation] = rawResult.stream.tee();
|
|
71
|
+
// Non-blocking evaluation for streams
|
|
72
|
+
consumeAndEvaluateStream(config, options, streamForEvaluation).catch((err) => {
|
|
73
|
+
logger.error("Non-blocking stream auto-evaluation error:", err);
|
|
74
|
+
});
|
|
75
|
+
return {
|
|
76
|
+
...rawResult,
|
|
77
|
+
stream: streamForUser,
|
|
78
|
+
};
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
return {
|
|
82
|
+
...middleware,
|
|
83
|
+
metadata,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* A common function to perform the evaluation logic.
|
|
88
|
+
* @param config The middleware configuration.
|
|
89
|
+
* @param options The text generation options.
|
|
90
|
+
* @param result The generation result.
|
|
91
|
+
*/
|
|
92
|
+
async function performEvaluation(config, options, result) {
|
|
93
|
+
const isBlocking = config.blocking !== false;
|
|
94
|
+
const threshold = config.threshold ??
|
|
95
|
+
(Number(process.env.NEUROLINK_EVALUATION_THRESHOLD) || 7);
|
|
96
|
+
try {
|
|
97
|
+
const evaluator = new Evaluator({
|
|
98
|
+
threshold,
|
|
99
|
+
provider: config.provider,
|
|
100
|
+
promptGenerator: config.promptGenerator,
|
|
101
|
+
evaluationModel: config.evaluationModel,
|
|
102
|
+
});
|
|
103
|
+
const evaluationResult = await evaluator.evaluate(options, result, threshold, config);
|
|
104
|
+
if (config.onEvaluationComplete) {
|
|
105
|
+
await config.onEvaluationComplete(evaluationResult);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
logger.error("Error during auto-evaluation:", error);
|
|
110
|
+
if (isBlocking) {
|
|
111
|
+
throw error;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Consumes a stream to build the full response and then evaluates it.
|
|
117
|
+
* @param config The middleware configuration.
|
|
118
|
+
* @param options The generation options.
|
|
119
|
+
* @param stream The stream to consume.
|
|
120
|
+
*/
|
|
121
|
+
async function consumeAndEvaluateStream(config, options, stream) {
|
|
122
|
+
let fullText = "";
|
|
123
|
+
let usage;
|
|
124
|
+
const toolCalls = [];
|
|
125
|
+
const reader = stream.getReader();
|
|
126
|
+
try {
|
|
127
|
+
while (true) {
|
|
128
|
+
const { done, value } = await reader.read();
|
|
129
|
+
if (done) {
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
switch (value.type) {
|
|
133
|
+
case "text-delta":
|
|
134
|
+
fullText += value.textDelta;
|
|
135
|
+
break;
|
|
136
|
+
case "tool-call":
|
|
137
|
+
{
|
|
138
|
+
let parsedArgs;
|
|
139
|
+
try {
|
|
140
|
+
parsedArgs = JSON.parse(value.args);
|
|
141
|
+
}
|
|
142
|
+
catch (e) {
|
|
143
|
+
logger.warn(`Failed to parse tool call args for tool ${value.toolName}:`, e);
|
|
144
|
+
// In case of parsing failure, we can't assign a string.
|
|
145
|
+
// Let's use an object with the raw string to maintain type safety.
|
|
146
|
+
parsedArgs = { raw: value.args };
|
|
147
|
+
}
|
|
148
|
+
toolCalls.push({
|
|
149
|
+
toolCallId: value.toolCallId,
|
|
150
|
+
toolName: value.toolName,
|
|
151
|
+
args: parsedArgs,
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
break;
|
|
155
|
+
case "finish":
|
|
156
|
+
usage = {
|
|
157
|
+
input: value.usage.promptTokens,
|
|
158
|
+
output: value.usage.completionTokens,
|
|
159
|
+
total: value.usage.promptTokens + value.usage.completionTokens,
|
|
160
|
+
};
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
finally {
|
|
166
|
+
reader.releaseLock();
|
|
167
|
+
}
|
|
168
|
+
const result = {
|
|
169
|
+
content: fullText,
|
|
170
|
+
usage,
|
|
171
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
172
|
+
};
|
|
173
|
+
// For streams, evaluation is always non-blocking from the user's perspective.
|
|
174
|
+
if (config.blocking) {
|
|
175
|
+
logger.warn("Auto-evaluation 'blocking' mode is not supported for streaming responses. Evaluation will proceed non-blockingly.");
|
|
176
|
+
}
|
|
177
|
+
// Create a new config object to force non-blocking behavior for the evaluation function
|
|
178
|
+
const nonBlockingConfig = { ...config, blocking: false };
|
|
179
|
+
await performEvaluation(nonBlockingConfig, options, result);
|
|
180
|
+
}
|
|
181
|
+
export default createAutoEvaluationMiddleware;
|
|
@@ -2,6 +2,7 @@ import { wrapLanguageModel } from "ai";
|
|
|
2
2
|
import { MiddlewareRegistry } from "./registry.js";
|
|
3
3
|
import { createAnalyticsMiddleware } from "./builtin/analytics.js";
|
|
4
4
|
import { createGuardrailsMiddleware } from "./builtin/guardrails.js";
|
|
5
|
+
import { createAutoEvaluationMiddleware } from "./builtin/autoEvaluation.js";
|
|
5
6
|
import { logger } from "../utils/logger.js";
|
|
6
7
|
/**
|
|
7
8
|
* Middleware factory for creating and applying middleware chains.
|
|
@@ -24,6 +25,7 @@ export class MiddlewareFactory {
|
|
|
24
25
|
const builtInMiddlewareCreators = {
|
|
25
26
|
analytics: createAnalyticsMiddleware,
|
|
26
27
|
guardrails: createGuardrailsMiddleware,
|
|
28
|
+
autoEvaluation: createAutoEvaluationMiddleware,
|
|
27
29
|
};
|
|
28
30
|
// Register built-in presets
|
|
29
31
|
this.registerPreset({
|
|
@@ -54,6 +56,7 @@ export class MiddlewareFactory {
|
|
|
54
56
|
if (!this.registry.has(middlewareId)) {
|
|
55
57
|
const creator = builtInMiddlewareCreators[middlewareId];
|
|
56
58
|
const config = options.middlewareConfig?.[middlewareId]?.config;
|
|
59
|
+
logger.debug(`Registering built-in middleware '${middlewareId}'`, config);
|
|
57
60
|
this.registry.register(creator(config));
|
|
58
61
|
}
|
|
59
62
|
}
|
|
@@ -92,6 +95,7 @@ export class MiddlewareFactory {
|
|
|
92
95
|
const middlewareConfig = this.buildMiddlewareConfig(mergedOptions);
|
|
93
96
|
// Re-register middleware with the correct configuration for this call
|
|
94
97
|
for (const [id, config] of Object.entries(middlewareConfig)) {
|
|
98
|
+
logger.debug(`Configuring middleware '${id}'`, { config });
|
|
95
99
|
if (config.enabled && this.registry.has(id)) {
|
|
96
100
|
const creator = this.getCreator(id);
|
|
97
101
|
if (creator) {
|
|
@@ -137,7 +141,9 @@ export class MiddlewareFactory {
|
|
|
137
141
|
const builtInMiddlewareCreators = {
|
|
138
142
|
analytics: createAnalyticsMiddleware,
|
|
139
143
|
guardrails: createGuardrailsMiddleware,
|
|
144
|
+
autoEvaluation: createAutoEvaluationMiddleware,
|
|
140
145
|
};
|
|
146
|
+
logger.debug("Getting creator for middleware ID:", id);
|
|
141
147
|
return builtInMiddlewareCreators[id];
|
|
142
148
|
}
|
|
143
149
|
/**
|
package/dist/lib/neurolink.js
CHANGED
|
@@ -1082,6 +1082,7 @@ export class NeuroLink {
|
|
|
1082
1082
|
evaluationDomain: options.evaluationDomain,
|
|
1083
1083
|
toolUsageContext: options.toolUsageContext,
|
|
1084
1084
|
input: options.input, // This includes text, images, and content arrays
|
|
1085
|
+
region: options.region,
|
|
1085
1086
|
};
|
|
1086
1087
|
// Apply factory enhancement using centralized utilities
|
|
1087
1088
|
const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
|
|
@@ -1410,7 +1411,8 @@ export class NeuroLink {
|
|
|
1410
1411
|
const conversationMessages = await getConversationMessages(this.conversationMemory, options);
|
|
1411
1412
|
// Create provider and generate
|
|
1412
1413
|
const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
|
|
1413
|
-
this
|
|
1414
|
+
this, // Pass SDK instance
|
|
1415
|
+
options.region);
|
|
1414
1416
|
// ADD: Emit connection events for all providers (Bedrock-compatible)
|
|
1415
1417
|
this.emitter.emit("connected");
|
|
1416
1418
|
this.emitter.emit("message", `${providerName} provider initialized successfully`);
|
|
@@ -1520,7 +1522,8 @@ export class NeuroLink {
|
|
|
1520
1522
|
// Get conversation messages for context
|
|
1521
1523
|
const conversationMessages = await getConversationMessages(this.conversationMemory, options);
|
|
1522
1524
|
const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
|
|
1523
|
-
this
|
|
1525
|
+
this, // Pass SDK instance
|
|
1526
|
+
options.region);
|
|
1524
1527
|
// ADD: Emit connection events for successful provider creation (Bedrock-compatible)
|
|
1525
1528
|
this.emitter.emit("connected");
|
|
1526
1529
|
this.emitter.emit("message", `${providerName} provider initialized successfully`);
|
|
@@ -1897,7 +1900,8 @@ export class NeuroLink {
|
|
|
1897
1900
|
// Simplified placeholder - in the actual implementation this would contain the complex MCP stream logic
|
|
1898
1901
|
const providerName = await getBestProvider(options.provider);
|
|
1899
1902
|
const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
|
|
1900
|
-
this
|
|
1903
|
+
this, // Pass SDK instance
|
|
1904
|
+
options.region);
|
|
1901
1905
|
// Enable tool execution for the provider using BaseProvider method
|
|
1902
1906
|
provider.setupToolExecutor({
|
|
1903
1907
|
customTools: this.getCustomTools(),
|
|
@@ -6,7 +6,8 @@ import type { NeuroLink } from "../neurolink.js";
|
|
|
6
6
|
export declare class AmazonBedrockProvider extends BaseProvider {
|
|
7
7
|
private bedrockClient;
|
|
8
8
|
private conversationHistory;
|
|
9
|
-
|
|
9
|
+
private region;
|
|
10
|
+
constructor(modelName?: string, neurolink?: NeuroLink, region?: string);
|
|
10
11
|
/**
|
|
11
12
|
* Perform initial health check to catch credential/connectivity issues early
|
|
12
13
|
* This prevents the health check failure we saw in production logs
|