@juspay/neurolink 9.36.1 → 9.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +921 -423
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +1 -1
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file EvaluatorFactory - Factory for creating evaluator instances.
|
|
3
|
+
* Extends BaseFactory to provide dynamic evaluator creation with configuration support.
|
|
4
|
+
*/
|
|
5
|
+
import { BaseFactory } from "../core/infrastructure/index.js";
|
|
6
|
+
import type { EvaluationConfig } from "../types/evaluationTypes.js";
|
|
7
|
+
import { Evaluator } from "./index.js";
|
|
8
|
+
/**
|
|
9
|
+
* Configuration presets for common evaluation scenarios.
|
|
10
|
+
*/
|
|
11
|
+
export interface EvaluatorPreset {
|
|
12
|
+
/** Preset name for identification */
|
|
13
|
+
name: string;
|
|
14
|
+
/** Description of the preset use case */
|
|
15
|
+
description: string;
|
|
16
|
+
/** The underlying evaluation configuration (optional for built-in presets) */
|
|
17
|
+
config?: EvaluationConfig;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Factory for creating Evaluator instances with various configurations.
|
|
21
|
+
* Supports presets for common use cases and custom configurations.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* const factory = EvaluatorFactory.getInstance();
|
|
26
|
+
*
|
|
27
|
+
* // Create with default configuration
|
|
28
|
+
* const evaluator = await factory.create('default');
|
|
29
|
+
*
|
|
30
|
+
* // Create with a preset
|
|
31
|
+
* const strictEvaluator = await factory.create('strict');
|
|
32
|
+
*
|
|
33
|
+
* // Create with custom config
|
|
34
|
+
* const customEvaluator = await factory.create('default', {
|
|
35
|
+
* threshold: 9,
|
|
36
|
+
* evaluationModel: 'gpt-4',
|
|
37
|
+
* provider: 'openai'
|
|
38
|
+
* });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export declare class EvaluatorFactory extends BaseFactory<Evaluator, EvaluationConfig> {
|
|
42
|
+
private static instance;
|
|
43
|
+
private constructor();
|
|
44
|
+
/**
|
|
45
|
+
* Gets the singleton instance of the EvaluatorFactory.
|
|
46
|
+
*/
|
|
47
|
+
static getInstance(): EvaluatorFactory;
|
|
48
|
+
/**
|
|
49
|
+
* Resets the singleton instance (useful for testing).
|
|
50
|
+
*/
|
|
51
|
+
static resetInstance(): void;
|
|
52
|
+
/**
|
|
53
|
+
* Registers all built-in evaluator configurations.
|
|
54
|
+
* This is called automatically on first access.
|
|
55
|
+
*/
|
|
56
|
+
protected registerAll(): Promise<void>;
|
|
57
|
+
/**
|
|
58
|
+
* Creates an evaluator instance with the specified preset and optional config overrides.
|
|
59
|
+
*
|
|
60
|
+
* @param presetOrName - The preset name or alias
|
|
61
|
+
* @param config - Optional configuration overrides
|
|
62
|
+
* @returns A configured Evaluator instance
|
|
63
|
+
*/
|
|
64
|
+
createEvaluator(presetOrName?: string, config?: EvaluationConfig): Promise<Evaluator>;
|
|
65
|
+
/**
|
|
66
|
+
* Creates an evaluator with a fully custom configuration (not based on a preset).
|
|
67
|
+
*
|
|
68
|
+
* @param config - The evaluation configuration
|
|
69
|
+
* @returns A configured Evaluator instance
|
|
70
|
+
*/
|
|
71
|
+
createCustomEvaluator(config: EvaluationConfig): Evaluator;
|
|
72
|
+
/**
|
|
73
|
+
* Gets information about a preset by name or alias.
|
|
74
|
+
*
|
|
75
|
+
* @param presetOrName - The preset name or alias
|
|
76
|
+
* @returns The preset information or undefined if not found
|
|
77
|
+
*/
|
|
78
|
+
getPresetInfo(presetOrName: string): Promise<EvaluatorPreset | undefined>;
|
|
79
|
+
/**
|
|
80
|
+
* Lists all available presets with their descriptions.
|
|
81
|
+
*
|
|
82
|
+
* @returns Array of preset information
|
|
83
|
+
*/
|
|
84
|
+
listPresets(): Promise<Array<{
|
|
85
|
+
name: string;
|
|
86
|
+
aliases: string[];
|
|
87
|
+
preset: EvaluatorPreset;
|
|
88
|
+
}>>;
|
|
89
|
+
/**
|
|
90
|
+
* Validates an evaluation configuration.
|
|
91
|
+
*
|
|
92
|
+
* @param config - The configuration to validate
|
|
93
|
+
* @throws {NeuroLinkFeatureError} If the configuration is invalid
|
|
94
|
+
*/
|
|
95
|
+
validateConfig(config: EvaluationConfig): void;
|
|
96
|
+
/**
|
|
97
|
+
* Registers a custom evaluator preset.
|
|
98
|
+
*
|
|
99
|
+
* @param name - Unique name for the preset
|
|
100
|
+
* @param config - The evaluation configuration for this preset
|
|
101
|
+
* @param aliases - Alternative names for the preset
|
|
102
|
+
* @param description - Human-readable description
|
|
103
|
+
*/
|
|
104
|
+
registerPreset(name: string, config: EvaluationConfig, aliases?: string[], description?: string): void;
|
|
105
|
+
/**
|
|
106
|
+
* Unregisters a preset from the factory.
|
|
107
|
+
*
|
|
108
|
+
* @param name - The preset name to remove
|
|
109
|
+
* @returns true if the preset was removed, false if it didn't exist
|
|
110
|
+
*/
|
|
111
|
+
unregisterPreset(name: string): boolean;
|
|
112
|
+
}
|
|
113
|
+
export declare const getEvaluatorFactory: () => EvaluatorFactory;
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file EvaluatorFactory - Factory for creating evaluator instances.
|
|
3
|
+
* Extends BaseFactory to provide dynamic evaluator creation with configuration support.
|
|
4
|
+
*/
|
|
5
|
+
import { BaseFactory } from "../core/infrastructure/index.js";
|
|
6
|
+
import { Evaluator } from "./index.js";
|
|
7
|
+
import { createConfigurationError } from "./errors/EvaluationError.js";
|
|
8
|
+
/**
|
|
9
|
+
* Factory for creating Evaluator instances with various configurations.
|
|
10
|
+
* Supports presets for common use cases and custom configurations.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const factory = EvaluatorFactory.getInstance();
|
|
15
|
+
*
|
|
16
|
+
* // Create with default configuration
|
|
17
|
+
* const evaluator = await factory.create('default');
|
|
18
|
+
*
|
|
19
|
+
* // Create with a preset
|
|
20
|
+
* const strictEvaluator = await factory.create('strict');
|
|
21
|
+
*
|
|
22
|
+
* // Create with custom config
|
|
23
|
+
* const customEvaluator = await factory.create('default', {
|
|
24
|
+
* threshold: 9,
|
|
25
|
+
* evaluationModel: 'gpt-4',
|
|
26
|
+
* provider: 'openai'
|
|
27
|
+
* });
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export class EvaluatorFactory extends BaseFactory {
|
|
31
|
+
static instance = null;
|
|
32
|
+
constructor() {
|
|
33
|
+
super();
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Gets the singleton instance of the EvaluatorFactory.
|
|
37
|
+
*/
|
|
38
|
+
static getInstance() {
|
|
39
|
+
if (!EvaluatorFactory.instance) {
|
|
40
|
+
EvaluatorFactory.instance = new EvaluatorFactory();
|
|
41
|
+
}
|
|
42
|
+
return EvaluatorFactory.instance;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Resets the singleton instance (useful for testing).
|
|
46
|
+
*/
|
|
47
|
+
static resetInstance() {
|
|
48
|
+
EvaluatorFactory.instance = null;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Registers all built-in evaluator configurations.
|
|
52
|
+
* This is called automatically on first access.
|
|
53
|
+
*/
|
|
54
|
+
async registerAll() {
|
|
55
|
+
// Register default configuration
|
|
56
|
+
this.register("default", async (config) => {
|
|
57
|
+
const mergedConfig = {
|
|
58
|
+
threshold: 7,
|
|
59
|
+
evaluationStrategy: "ragas",
|
|
60
|
+
evaluationModel: process.env.NEUROLINK_RAGAS_EVALUATION_MODEL || "gemini-1.5-flash",
|
|
61
|
+
provider: process.env.NEUROLINK_RAGAS_EVALUATION_PROVIDER || "vertex",
|
|
62
|
+
...config,
|
|
63
|
+
};
|
|
64
|
+
if (config) {
|
|
65
|
+
this.validateConfig(mergedConfig);
|
|
66
|
+
}
|
|
67
|
+
return new Evaluator(mergedConfig);
|
|
68
|
+
}, ["standard", "basic"], {
|
|
69
|
+
preset: {
|
|
70
|
+
name: "Default",
|
|
71
|
+
description: "Standard evaluation with balanced settings",
|
|
72
|
+
},
|
|
73
|
+
});
|
|
74
|
+
// Register strict configuration (higher threshold)
|
|
75
|
+
this.register("strict", async (config) => {
|
|
76
|
+
const mergedConfig = {
|
|
77
|
+
threshold: 8,
|
|
78
|
+
evaluationStrategy: "ragas",
|
|
79
|
+
evaluationModel: "gpt-4",
|
|
80
|
+
provider: "openai",
|
|
81
|
+
...config,
|
|
82
|
+
};
|
|
83
|
+
if (config) {
|
|
84
|
+
this.validateConfig(mergedConfig);
|
|
85
|
+
}
|
|
86
|
+
return new Evaluator(mergedConfig);
|
|
87
|
+
}, ["high-quality", "production"], {
|
|
88
|
+
preset: {
|
|
89
|
+
name: "Strict",
|
|
90
|
+
description: "Strict evaluation with higher quality threshold (8/10)",
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
// Register lenient configuration (lower threshold)
|
|
94
|
+
this.register("lenient", async (config) => {
|
|
95
|
+
const mergedConfig = {
|
|
96
|
+
threshold: 5,
|
|
97
|
+
evaluationStrategy: "ragas",
|
|
98
|
+
evaluationModel: process.env.NEUROLINK_RAGAS_EVALUATION_MODEL || "gemini-1.5-flash",
|
|
99
|
+
provider: process.env.NEUROLINK_RAGAS_EVALUATION_PROVIDER || "vertex",
|
|
100
|
+
...config,
|
|
101
|
+
};
|
|
102
|
+
if (config) {
|
|
103
|
+
this.validateConfig(mergedConfig);
|
|
104
|
+
}
|
|
105
|
+
return new Evaluator(mergedConfig);
|
|
106
|
+
}, ["relaxed", "development"], {
|
|
107
|
+
preset: {
|
|
108
|
+
name: "Lenient",
|
|
109
|
+
description: "Lenient evaluation with lower threshold for development (5/10)",
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
// Register fast configuration (optimized for speed)
|
|
113
|
+
this.register("fast", async (config) => {
|
|
114
|
+
const mergedConfig = {
|
|
115
|
+
threshold: 6,
|
|
116
|
+
evaluationStrategy: "ragas",
|
|
117
|
+
evaluationModel: "gemini-1.5-flash",
|
|
118
|
+
provider: "vertex",
|
|
119
|
+
...config,
|
|
120
|
+
};
|
|
121
|
+
if (config) {
|
|
122
|
+
this.validateConfig(mergedConfig);
|
|
123
|
+
}
|
|
124
|
+
return new Evaluator(mergedConfig);
|
|
125
|
+
}, ["quick", "speed"], {
|
|
126
|
+
preset: {
|
|
127
|
+
name: "Fast",
|
|
128
|
+
description: "Fast evaluation optimized for speed with lighter model",
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
// Register premium configuration (highest quality)
|
|
132
|
+
this.register("premium", async (config) => {
|
|
133
|
+
const mergedConfig = {
|
|
134
|
+
threshold: 9,
|
|
135
|
+
evaluationStrategy: "ragas",
|
|
136
|
+
evaluationModel: "gpt-4-turbo",
|
|
137
|
+
provider: "openai",
|
|
138
|
+
...config,
|
|
139
|
+
};
|
|
140
|
+
if (config) {
|
|
141
|
+
this.validateConfig(mergedConfig);
|
|
142
|
+
}
|
|
143
|
+
return new Evaluator(mergedConfig);
|
|
144
|
+
}, ["enterprise", "highest-quality"], {
|
|
145
|
+
preset: {
|
|
146
|
+
name: "Premium",
|
|
147
|
+
description: "Premium evaluation with highest quality model and strictest threshold (9/10)",
|
|
148
|
+
},
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Creates an evaluator instance with the specified preset and optional config overrides.
|
|
153
|
+
*
|
|
154
|
+
* @param presetOrName - The preset name or alias
|
|
155
|
+
* @param config - Optional configuration overrides
|
|
156
|
+
* @returns A configured Evaluator instance
|
|
157
|
+
*/
|
|
158
|
+
async createEvaluator(presetOrName = "default", config) {
|
|
159
|
+
return this.create(presetOrName, config);
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Creates an evaluator with a fully custom configuration (not based on a preset).
|
|
163
|
+
*
|
|
164
|
+
* @param config - The evaluation configuration
|
|
165
|
+
* @returns A configured Evaluator instance
|
|
166
|
+
*/
|
|
167
|
+
createCustomEvaluator(config) {
|
|
168
|
+
this.validateConfig(config);
|
|
169
|
+
return new Evaluator(config);
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Gets information about a preset by name or alias.
|
|
173
|
+
*
|
|
174
|
+
* @param presetOrName - The preset name or alias
|
|
175
|
+
* @returns The preset information or undefined if not found
|
|
176
|
+
*/
|
|
177
|
+
async getPresetInfo(presetOrName) {
|
|
178
|
+
await this.ensureInitialized();
|
|
179
|
+
const name = this.resolveName(presetOrName);
|
|
180
|
+
const registration = this.items.get(name);
|
|
181
|
+
if (!registration?.metadata?.preset) {
|
|
182
|
+
return undefined;
|
|
183
|
+
}
|
|
184
|
+
return registration.metadata.preset;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Lists all available presets with their descriptions.
|
|
188
|
+
*
|
|
189
|
+
* @returns Array of preset information
|
|
190
|
+
*/
|
|
191
|
+
async listPresets() {
|
|
192
|
+
await this.ensureInitialized();
|
|
193
|
+
const presets = [];
|
|
194
|
+
const entries = Array.from(this.items.entries());
|
|
195
|
+
for (const [name, registration] of entries) {
|
|
196
|
+
if (registration.metadata?.preset) {
|
|
197
|
+
presets.push({
|
|
198
|
+
name,
|
|
199
|
+
aliases: registration.aliases,
|
|
200
|
+
preset: registration.metadata.preset,
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return presets;
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Validates an evaluation configuration.
|
|
208
|
+
*
|
|
209
|
+
* @param config - The configuration to validate
|
|
210
|
+
* @throws {NeuroLinkFeatureError} If the configuration is invalid
|
|
211
|
+
*/
|
|
212
|
+
validateConfig(config) {
|
|
213
|
+
if (config.threshold !== undefined) {
|
|
214
|
+
if (config.threshold < 1 || config.threshold > 10) {
|
|
215
|
+
throw createConfigurationError("Evaluation threshold must be between 1 and 10", "threshold out of range");
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
if (config.evaluationStrategy === "custom" && !config.customEvaluator) {
|
|
219
|
+
throw createConfigurationError("Custom evaluation strategy requires a customEvaluator function", "missing customEvaluator");
|
|
220
|
+
}
|
|
221
|
+
if (config.offTopicThreshold !== undefined) {
|
|
222
|
+
if (config.offTopicThreshold < 1 || config.offTopicThreshold > 10) {
|
|
223
|
+
throw createConfigurationError("Off-topic threshold must be between 1 and 10", "offTopicThreshold out of range");
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
if (config.highSeverityThreshold !== undefined) {
|
|
227
|
+
if (config.highSeverityThreshold < 1 ||
|
|
228
|
+
config.highSeverityThreshold > 10) {
|
|
229
|
+
throw createConfigurationError("High severity threshold must be between 1 and 10", "highSeverityThreshold out of range");
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Registers a custom evaluator preset.
|
|
235
|
+
*
|
|
236
|
+
* @param name - Unique name for the preset
|
|
237
|
+
* @param config - The evaluation configuration for this preset
|
|
238
|
+
* @param aliases - Alternative names for the preset
|
|
239
|
+
* @param description - Human-readable description
|
|
240
|
+
*/
|
|
241
|
+
registerPreset(name, config, aliases = [], description = "") {
|
|
242
|
+
this.validateConfig(config);
|
|
243
|
+
this.register(name, async (overrides) => {
|
|
244
|
+
const mergedConfig = {
|
|
245
|
+
...config,
|
|
246
|
+
...overrides,
|
|
247
|
+
};
|
|
248
|
+
if (overrides) {
|
|
249
|
+
this.validateConfig(mergedConfig);
|
|
250
|
+
}
|
|
251
|
+
return new Evaluator(mergedConfig);
|
|
252
|
+
}, aliases, {
|
|
253
|
+
preset: {
|
|
254
|
+
name,
|
|
255
|
+
description,
|
|
256
|
+
config,
|
|
257
|
+
},
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Unregisters a preset from the factory.
|
|
262
|
+
*
|
|
263
|
+
* @param name - The preset name to remove
|
|
264
|
+
* @returns true if the preset was removed, false if it didn't exist
|
|
265
|
+
*/
|
|
266
|
+
unregisterPreset(name) {
|
|
267
|
+
const registration = this.items.get(name);
|
|
268
|
+
if (registration) {
|
|
269
|
+
// Remove aliases
|
|
270
|
+
for (const alias of registration.aliases) {
|
|
271
|
+
this.aliasMap.delete(alias.toLowerCase());
|
|
272
|
+
}
|
|
273
|
+
this.items.delete(name);
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
// Export singleton instance getter for convenience
|
|
280
|
+
export const getEvaluatorFactory = () => EvaluatorFactory.getInstance();
|
|
281
|
+
//# sourceMappingURL=EvaluatorFactory.js.map
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file EvaluatorRegistry - Registry for evaluation strategies.
|
|
3
|
+
* Extends BaseRegistry to provide dynamic strategy registration and lookup.
|
|
4
|
+
*/
|
|
5
|
+
import { BaseRegistry } from "../core/infrastructure/index.js";
|
|
6
|
+
import type { LanguageModelV3CallOptions } from "@ai-sdk/provider";
|
|
7
|
+
import type { GenerateResult } from "../types/generateTypes.js";
|
|
8
|
+
import type { EvaluationResult, EnhancedEvaluationContext } from "../types/evaluationTypes.js";
|
|
9
|
+
/**
|
|
10
|
+
* A function that performs evaluation and returns results.
|
|
11
|
+
*/
|
|
12
|
+
export type EvaluationStrategyFunction = (options: LanguageModelV3CallOptions, result: GenerateResult, config?: EvaluationStrategyConfig) => Promise<{
|
|
13
|
+
evaluationResult: EvaluationResult;
|
|
14
|
+
evalContext: EnhancedEvaluationContext;
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Configuration for evaluation strategies.
|
|
18
|
+
*/
|
|
19
|
+
export interface EvaluationStrategyConfig {
|
|
20
|
+
/** The model to use for evaluation */
|
|
21
|
+
evaluationModel?: string;
|
|
22
|
+
/** The provider to use for evaluation */
|
|
23
|
+
provider?: string;
|
|
24
|
+
/** The passing threshold (1-10) */
|
|
25
|
+
threshold?: number;
|
|
26
|
+
/** Custom prompt generator */
|
|
27
|
+
promptGenerator?: (context: {
|
|
28
|
+
userQuery: string;
|
|
29
|
+
history: string;
|
|
30
|
+
tools: string;
|
|
31
|
+
retryInfo: string;
|
|
32
|
+
aiResponse: string;
|
|
33
|
+
}) => string;
|
|
34
|
+
/** Additional strategy-specific options */
|
|
35
|
+
options?: Record<string, unknown>;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Metadata for registered evaluation strategies.
|
|
39
|
+
*/
|
|
40
|
+
export interface EvaluationStrategyMetadata {
|
|
41
|
+
/** Human-readable name for the strategy */
|
|
42
|
+
name: string;
|
|
43
|
+
/** Description of what the strategy does */
|
|
44
|
+
description: string;
|
|
45
|
+
/** Whether the strategy requires an external LLM */
|
|
46
|
+
requiresLLM: boolean;
|
|
47
|
+
/** Default model for the strategy (if requiresLLM is true) */
|
|
48
|
+
defaultModel?: string;
|
|
49
|
+
/** Default provider for the strategy (if requiresLLM is true) */
|
|
50
|
+
defaultProvider?: string;
|
|
51
|
+
/** Version of the strategy */
|
|
52
|
+
version: string;
|
|
53
|
+
/** Supported features */
|
|
54
|
+
features: string[];
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Registry for evaluation strategies.
|
|
58
|
+
* Allows dynamic registration and retrieval of evaluation strategies.
|
|
59
|
+
*
|
|
60
|
+
* @example
|
|
61
|
+
* ```typescript
|
|
62
|
+
* // Register a custom strategy
|
|
63
|
+
* EvaluatorRegistry.getInstance().registerStrategy(
|
|
64
|
+
* 'custom-ragas',
|
|
65
|
+
* async () => ({
|
|
66
|
+
* evaluate: async (options, result) => { ... }
|
|
67
|
+
* }),
|
|
68
|
+
* {
|
|
69
|
+
* name: 'Custom RAGAS',
|
|
70
|
+
* description: 'Custom RAGAS implementation',
|
|
71
|
+
* requiresLLM: true,
|
|
72
|
+
* defaultModel: 'gpt-4',
|
|
73
|
+
* defaultProvider: 'openai',
|
|
74
|
+
* version: '1.0.0',
|
|
75
|
+
* features: ['custom-metrics']
|
|
76
|
+
* }
|
|
77
|
+
* );
|
|
78
|
+
*
|
|
79
|
+
* // Get a strategy
|
|
80
|
+
* const strategy = await EvaluatorRegistry.getInstance().getStrategy('ragas');
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
export declare class EvaluatorRegistry extends BaseRegistry<EvaluationStrategyFunction, EvaluationStrategyMetadata> {
|
|
84
|
+
private static instance;
|
|
85
|
+
private constructor();
|
|
86
|
+
/**
|
|
87
|
+
* Gets the singleton instance of the EvaluatorRegistry.
|
|
88
|
+
*/
|
|
89
|
+
static getInstance(): EvaluatorRegistry;
|
|
90
|
+
/**
|
|
91
|
+
* Resets the singleton instance (useful for testing).
|
|
92
|
+
*/
|
|
93
|
+
static resetInstance(): void;
|
|
94
|
+
/**
|
|
95
|
+
* Registers all built-in evaluation strategies.
|
|
96
|
+
* This is called automatically on first access.
|
|
97
|
+
*/
|
|
98
|
+
protected registerAll(): Promise<void>;
|
|
99
|
+
/**
|
|
100
|
+
* Registers an evaluation strategy with the registry.
|
|
101
|
+
*
|
|
102
|
+
* @param id - Unique identifier for the strategy
|
|
103
|
+
* @param factory - Factory function that creates the strategy
|
|
104
|
+
* @param metadata - Metadata about the strategy
|
|
105
|
+
*/
|
|
106
|
+
registerStrategy(id: string, factory: () => Promise<EvaluationStrategyFunction>, metadata: EvaluationStrategyMetadata): void;
|
|
107
|
+
/**
|
|
108
|
+
* Gets an evaluation strategy by ID.
|
|
109
|
+
*
|
|
110
|
+
* @param id - The strategy identifier
|
|
111
|
+
* @returns The evaluation strategy function
|
|
112
|
+
* @throws {NeuroLinkFeatureError} If the strategy is not found
|
|
113
|
+
*/
|
|
114
|
+
getStrategy(id: string): Promise<EvaluationStrategyFunction>;
|
|
115
|
+
/**
|
|
116
|
+
* Checks if a strategy exists in the registry.
|
|
117
|
+
*
|
|
118
|
+
* @param id - The strategy identifier
|
|
119
|
+
* @returns true if the strategy exists
|
|
120
|
+
*/
|
|
121
|
+
hasStrategy(id: string): Promise<boolean>;
|
|
122
|
+
/**
|
|
123
|
+
* Lists all registered strategies with their metadata.
|
|
124
|
+
*
|
|
125
|
+
* @returns Array of strategy IDs and their metadata
|
|
126
|
+
*/
|
|
127
|
+
listStrategies(): Promise<Array<{
|
|
128
|
+
id: string;
|
|
129
|
+
metadata: EvaluationStrategyMetadata;
|
|
130
|
+
}>>;
|
|
131
|
+
/**
|
|
132
|
+
* Gets the metadata for a specific strategy.
|
|
133
|
+
*
|
|
134
|
+
* @param id - The strategy identifier
|
|
135
|
+
* @returns The strategy metadata or undefined if not found
|
|
136
|
+
*/
|
|
137
|
+
getStrategyMetadata(id: string): Promise<EvaluationStrategyMetadata | undefined>;
|
|
138
|
+
/**
|
|
139
|
+
* Unregisters a strategy from the registry.
|
|
140
|
+
*
|
|
141
|
+
* @param id - The strategy identifier
|
|
142
|
+
* @returns true if the strategy was removed, false if it didn't exist
|
|
143
|
+
*/
|
|
144
|
+
unregisterStrategy(id: string): Promise<boolean>;
|
|
145
|
+
/**
|
|
146
|
+
* Gets strategies that support a specific feature.
|
|
147
|
+
*
|
|
148
|
+
* @param feature - The feature to filter by
|
|
149
|
+
* @returns Array of strategy IDs that support the feature
|
|
150
|
+
*/
|
|
151
|
+
getStrategiesWithFeature(feature: string): Promise<string[]>;
|
|
152
|
+
/**
|
|
153
|
+
* Gets strategies that use a specific provider.
|
|
154
|
+
*
|
|
155
|
+
* @param provider - The provider to filter by
|
|
156
|
+
* @returns Array of strategy IDs that use the provider
|
|
157
|
+
*/
|
|
158
|
+
getStrategiesByProvider(provider: string): Promise<string[]>;
|
|
159
|
+
}
|
|
160
|
+
export declare const getEvaluatorRegistry: () => EvaluatorRegistry;
|