@juspay/neurolink 9.36.1 → 9.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +1105 -556
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/lib/processors/media/VideoProcessor.js +90 -41
- package/dist/lib/telemetry/telemetryService.d.ts +1 -1
- package/dist/lib/telemetry/telemetryService.js +27 -13
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/processors/media/VideoProcessor.js +90 -41
- package/dist/telemetry/telemetryService.d.ts +1 -1
- package/dist/telemetry/telemetryService.js +27 -13
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +7 -7
- package/dist/processors/media/ffprobe-static.d.ts +0 -4
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Custom Scorer Utilities
|
|
3
|
+
* Helper functions for creating custom scorers
|
|
4
|
+
*/
|
|
5
|
+
import type { JsonObject } from "../../types/common.js";
|
|
6
|
+
import type { ScorerCategory, ScorerConfig, ScorerInput, ScorerMetadata, ScorerType } from "../../types/scorerTypes.js";
|
|
7
|
+
import { BaseScorer } from "./baseScorer.js";
|
|
8
|
+
/**
|
|
9
|
+
* Function scorer - a simple function-based scorer
|
|
10
|
+
*/
|
|
11
|
+
export type ScorerFunction = (input: ScorerInput) => Promise<{
|
|
12
|
+
score: number;
|
|
13
|
+
reasoning: string;
|
|
14
|
+
metadata?: JsonObject;
|
|
15
|
+
}>;
|
|
16
|
+
/**
|
|
17
|
+
* Create scorer metadata with defaults
|
|
18
|
+
*/
|
|
19
|
+
export declare function createScorerMetadata(id: string, name: string, options?: {
|
|
20
|
+
description?: string;
|
|
21
|
+
type?: ScorerType;
|
|
22
|
+
category?: ScorerCategory;
|
|
23
|
+
version?: string;
|
|
24
|
+
requiredInputs?: (keyof ScorerInput)[];
|
|
25
|
+
optionalInputs?: (keyof ScorerInput)[];
|
|
26
|
+
defaultConfig?: ScorerConfig;
|
|
27
|
+
}): ScorerMetadata;
|
|
28
|
+
/**
|
|
29
|
+
* Create a simple function-based scorer
|
|
30
|
+
*/
|
|
31
|
+
export declare function createFunctionScorer(id: string, name: string, scorerFn: ScorerFunction, options?: {
|
|
32
|
+
description?: string;
|
|
33
|
+
category?: ScorerCategory;
|
|
34
|
+
type?: ScorerType;
|
|
35
|
+
version?: string;
|
|
36
|
+
requiredInputs?: (keyof ScorerInput)[];
|
|
37
|
+
optionalInputs?: (keyof ScorerInput)[];
|
|
38
|
+
config?: ScorerConfig;
|
|
39
|
+
}): BaseScorer;
|
|
40
|
+
/**
|
|
41
|
+
* Create a regex-based scorer
|
|
42
|
+
*/
|
|
43
|
+
export declare function createRegexScorer(id: string, name: string, options: {
|
|
44
|
+
pattern: string | RegExp;
|
|
45
|
+
flags?: string;
|
|
46
|
+
shouldMatch?: boolean;
|
|
47
|
+
description?: string;
|
|
48
|
+
config?: ScorerConfig;
|
|
49
|
+
}): BaseScorer;
|
|
50
|
+
/**
|
|
51
|
+
* Create a keyword presence scorer
|
|
52
|
+
*/
|
|
53
|
+
export declare function createKeywordScorer(id: string, name: string, options: {
|
|
54
|
+
requiredKeywords?: string[];
|
|
55
|
+
forbiddenKeywords?: string[];
|
|
56
|
+
caseInsensitive?: boolean;
|
|
57
|
+
description?: string;
|
|
58
|
+
config?: ScorerConfig;
|
|
59
|
+
}): BaseScorer;
|
|
60
|
+
/**
|
|
61
|
+
* Create a length-based scorer
|
|
62
|
+
*/
|
|
63
|
+
export declare function createSimpleLengthScorer(id: string, name: string, options: {
|
|
64
|
+
minWords?: number;
|
|
65
|
+
maxWords?: number;
|
|
66
|
+
minChars?: number;
|
|
67
|
+
maxChars?: number;
|
|
68
|
+
description?: string;
|
|
69
|
+
config?: ScorerConfig;
|
|
70
|
+
}): BaseScorer;
|
|
71
|
+
/**
|
|
72
|
+
* Compose multiple scorers into a single scorer with aggregation
|
|
73
|
+
*/
|
|
74
|
+
export declare function composeScorers(id: string, name: string, scorers: BaseScorer[], options?: {
|
|
75
|
+
aggregation?: "average" | "min" | "max" | "weighted";
|
|
76
|
+
weights?: number[];
|
|
77
|
+
description?: string;
|
|
78
|
+
config?: ScorerConfig;
|
|
79
|
+
}): BaseScorer;
|
|
80
|
+
/**
|
|
81
|
+
* Create a conditional scorer that only runs if a condition is met
|
|
82
|
+
*/
|
|
83
|
+
export declare function createConditionalScorer(id: string, name: string, condition: (input: ScorerInput) => boolean, scorer: BaseScorer, options?: {
|
|
84
|
+
defaultScore?: number;
|
|
85
|
+
defaultReasoning?: string;
|
|
86
|
+
description?: string;
|
|
87
|
+
config?: ScorerConfig;
|
|
88
|
+
}): BaseScorer;
|
|
89
|
+
/**
|
|
90
|
+
* Create a scorer that inverts the score (10 - score)
|
|
91
|
+
*/
|
|
92
|
+
export declare function createInvertedScorer(id: string, name: string, scorer: BaseScorer, options?: {
|
|
93
|
+
description?: string;
|
|
94
|
+
config?: ScorerConfig;
|
|
95
|
+
}): BaseScorer;
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Custom Scorer Utilities
|
|
3
|
+
* Helper functions for creating custom scorers
|
|
4
|
+
*/
|
|
5
|
+
import { BaseScorer, DEFAULT_SCORE_SCALE } from "./baseScorer.js";
|
|
6
|
+
import { evaluationErrors } from "../errors/EvaluationError.js";
|
|
7
|
+
/**
|
|
8
|
+
* Create scorer metadata with defaults
|
|
9
|
+
*/
|
|
10
|
+
export function createScorerMetadata(id, name, options) {
|
|
11
|
+
return {
|
|
12
|
+
id,
|
|
13
|
+
name,
|
|
14
|
+
description: options?.description ?? `Custom scorer: ${name}`,
|
|
15
|
+
type: options?.type ?? "rule",
|
|
16
|
+
category: options?.category ?? "custom",
|
|
17
|
+
version: options?.version ?? "1.0.0",
|
|
18
|
+
requiredInputs: options?.requiredInputs ?? ["response"],
|
|
19
|
+
optionalInputs: options?.optionalInputs ?? [
|
|
20
|
+
"query",
|
|
21
|
+
"context",
|
|
22
|
+
"groundTruth",
|
|
23
|
+
],
|
|
24
|
+
defaultConfig: options?.defaultConfig ?? {
|
|
25
|
+
enabled: true,
|
|
26
|
+
threshold: 0.7,
|
|
27
|
+
weight: 1.0,
|
|
28
|
+
timeout: 5000,
|
|
29
|
+
retries: 0,
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Function-based scorer implementation
|
|
35
|
+
*/
|
|
36
|
+
class FunctionScorer extends BaseScorer {
|
|
37
|
+
_scorerFn;
|
|
38
|
+
constructor(metadata, scorerFn, config) {
|
|
39
|
+
super(metadata, config);
|
|
40
|
+
this._scorerFn = scorerFn;
|
|
41
|
+
}
|
|
42
|
+
async score(input) {
|
|
43
|
+
return this.executeWithTiming(async () => {
|
|
44
|
+
// Validate input
|
|
45
|
+
const validation = this.validateInput(input);
|
|
46
|
+
if (!validation.valid) {
|
|
47
|
+
return this.createErrorResult(`Invalid input: ${validation.errors.join(", ")}`);
|
|
48
|
+
}
|
|
49
|
+
try {
|
|
50
|
+
const result = await this._scorerFn(input);
|
|
51
|
+
// Clamp score to valid range
|
|
52
|
+
const clampedScore = Math.max(DEFAULT_SCORE_SCALE.min, Math.min(DEFAULT_SCORE_SCALE.max, result.score));
|
|
53
|
+
return this.createScoreResult(clampedScore, result.reasoning, {
|
|
54
|
+
metadata: result.metadata,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
catch (error) {
|
|
58
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
59
|
+
return this.createErrorResult(errorMessage);
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Create a simple function-based scorer
|
|
66
|
+
*/
|
|
67
|
+
export function createFunctionScorer(id, name, scorerFn, options) {
|
|
68
|
+
const metadata = createScorerMetadata(id, name, {
|
|
69
|
+
description: options?.description,
|
|
70
|
+
category: options?.category,
|
|
71
|
+
type: options?.type ?? "rule",
|
|
72
|
+
version: options?.version,
|
|
73
|
+
requiredInputs: options?.requiredInputs,
|
|
74
|
+
optionalInputs: options?.optionalInputs,
|
|
75
|
+
});
|
|
76
|
+
return new FunctionScorer(metadata, scorerFn, options?.config);
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Create a regex-based scorer
|
|
80
|
+
*/
|
|
81
|
+
export function createRegexScorer(id, name, options) {
|
|
82
|
+
const metadata = createScorerMetadata(id, name, {
|
|
83
|
+
description: options.description ??
|
|
84
|
+
`Regex scorer checking for pattern: ${options.pattern}`,
|
|
85
|
+
type: "rule",
|
|
86
|
+
category: "quality",
|
|
87
|
+
});
|
|
88
|
+
let pattern;
|
|
89
|
+
if (typeof options.pattern === "string") {
|
|
90
|
+
if (options.pattern.length > 200) {
|
|
91
|
+
throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern exceeds maximum length of 200 characters", {
|
|
92
|
+
retryable: false,
|
|
93
|
+
details: { patternLength: options.pattern.length },
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
// Check for nested quantifiers that could cause catastrophic backtracking
|
|
97
|
+
if (/(\+|\*|\{)\S*(\+|\*|\{)/.test(options.pattern)) {
|
|
98
|
+
throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern contains nested quantifiers which may cause catastrophic backtracking", { retryable: false, details: { pattern: options.pattern } });
|
|
99
|
+
}
|
|
100
|
+
try {
|
|
101
|
+
pattern = new RegExp(options.pattern, options.flags ?? "i");
|
|
102
|
+
}
|
|
103
|
+
catch (e) {
|
|
104
|
+
throw evaluationErrors.create("CONFIGURATION_ERROR", `Invalid regex pattern: ${e instanceof Error ? e.message : String(e)}`, { retryable: false, cause: e instanceof Error ? e : undefined });
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
// Validate precompiled RegExp with the same safety rules
|
|
109
|
+
const regexSource = options.pattern.source;
|
|
110
|
+
if (regexSource.length > 200) {
|
|
111
|
+
throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern exceeds maximum length of 200 characters", {
|
|
112
|
+
retryable: false,
|
|
113
|
+
details: { patternLength: regexSource.length },
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
if (/(\+|\*|\{)\S*(\+|\*|\{)/.test(regexSource)) {
|
|
117
|
+
throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern contains nested quantifiers which may cause catastrophic backtracking", { retryable: false, details: { pattern: regexSource } });
|
|
118
|
+
}
|
|
119
|
+
pattern = options.pattern;
|
|
120
|
+
}
|
|
121
|
+
const shouldMatch = options.shouldMatch ?? true;
|
|
122
|
+
return new FunctionScorer(metadata, async (input) => {
|
|
123
|
+
if (pattern.global) {
|
|
124
|
+
pattern.lastIndex = 0;
|
|
125
|
+
}
|
|
126
|
+
const matches = pattern.test(input.response);
|
|
127
|
+
const passed = shouldMatch ? matches : !matches;
|
|
128
|
+
return {
|
|
129
|
+
score: passed ? 10 : 0,
|
|
130
|
+
reasoning: passed
|
|
131
|
+
? `Response ${shouldMatch ? "matches" : "does not match"} expected pattern`
|
|
132
|
+
: `Response ${shouldMatch ? "does not match" : "matches"} expected pattern`,
|
|
133
|
+
metadata: {
|
|
134
|
+
pattern: pattern.source,
|
|
135
|
+
flags: pattern.flags,
|
|
136
|
+
matches,
|
|
137
|
+
shouldMatch,
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}, options.config);
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Create a keyword presence scorer
|
|
144
|
+
*/
|
|
145
|
+
export function createKeywordScorer(id, name, options) {
|
|
146
|
+
const metadata = createScorerMetadata(id, name, {
|
|
147
|
+
description: options.description ?? `Keyword presence scorer`,
|
|
148
|
+
type: "rule",
|
|
149
|
+
category: "quality",
|
|
150
|
+
});
|
|
151
|
+
const requiredKeywords = options.requiredKeywords ?? [];
|
|
152
|
+
const forbiddenKeywords = options.forbiddenKeywords ?? [];
|
|
153
|
+
const caseInsensitive = options.caseInsensitive ?? true;
|
|
154
|
+
return new FunctionScorer(metadata, async (input) => {
|
|
155
|
+
const text = caseInsensitive
|
|
156
|
+
? input.response.toLowerCase()
|
|
157
|
+
: input.response;
|
|
158
|
+
// Check required keywords
|
|
159
|
+
const foundRequired = [];
|
|
160
|
+
const missingRequired = [];
|
|
161
|
+
for (const keyword of requiredKeywords) {
|
|
162
|
+
const searchKeyword = caseInsensitive ? keyword.toLowerCase() : keyword;
|
|
163
|
+
if (text.includes(searchKeyword)) {
|
|
164
|
+
foundRequired.push(keyword);
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
missingRequired.push(keyword);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// Check forbidden keywords
|
|
171
|
+
const foundForbidden = [];
|
|
172
|
+
for (const keyword of forbiddenKeywords) {
|
|
173
|
+
const searchKeyword = caseInsensitive ? keyword.toLowerCase() : keyword;
|
|
174
|
+
if (text.includes(searchKeyword)) {
|
|
175
|
+
foundForbidden.push(keyword);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// Calculate score
|
|
179
|
+
let score = 10;
|
|
180
|
+
const totalChecks = requiredKeywords.length + forbiddenKeywords.length;
|
|
181
|
+
if (totalChecks > 0) {
|
|
182
|
+
const passedChecks = foundRequired.length +
|
|
183
|
+
(forbiddenKeywords.length - foundForbidden.length);
|
|
184
|
+
score = (passedChecks / totalChecks) * 10;
|
|
185
|
+
}
|
|
186
|
+
// Generate reasoning
|
|
187
|
+
const reasons = [];
|
|
188
|
+
if (missingRequired.length > 0) {
|
|
189
|
+
reasons.push(`Missing required keywords: ${missingRequired.join(", ")}`);
|
|
190
|
+
}
|
|
191
|
+
if (foundForbidden.length > 0) {
|
|
192
|
+
reasons.push(`Found forbidden keywords: ${foundForbidden.join(", ")}`);
|
|
193
|
+
}
|
|
194
|
+
if (reasons.length === 0) {
|
|
195
|
+
reasons.push("All keyword requirements satisfied");
|
|
196
|
+
}
|
|
197
|
+
return {
|
|
198
|
+
score,
|
|
199
|
+
reasoning: reasons.join(". "),
|
|
200
|
+
metadata: {
|
|
201
|
+
foundRequired,
|
|
202
|
+
missingRequired,
|
|
203
|
+
foundForbidden,
|
|
204
|
+
totalRequired: requiredKeywords.length,
|
|
205
|
+
totalForbidden: forbiddenKeywords.length,
|
|
206
|
+
},
|
|
207
|
+
};
|
|
208
|
+
}, options.config);
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Create a length-based scorer
|
|
212
|
+
*/
|
|
213
|
+
export function createSimpleLengthScorer(id, name, options) {
|
|
214
|
+
const metadata = createScorerMetadata(id, name, {
|
|
215
|
+
description: options.description ?? `Length scorer`,
|
|
216
|
+
type: "rule",
|
|
217
|
+
category: "quality",
|
|
218
|
+
});
|
|
219
|
+
return new FunctionScorer(metadata, async (input) => {
|
|
220
|
+
const wordCount = input.response
|
|
221
|
+
.trim()
|
|
222
|
+
.split(/\s+/)
|
|
223
|
+
.filter((w) => w.length > 0).length;
|
|
224
|
+
const charCount = input.response.length;
|
|
225
|
+
const issues = [];
|
|
226
|
+
let passed = true;
|
|
227
|
+
if (options.minWords !== undefined && wordCount < options.minWords) {
|
|
228
|
+
issues.push(`Too few words: ${wordCount} < ${options.minWords}`);
|
|
229
|
+
passed = false;
|
|
230
|
+
}
|
|
231
|
+
if (options.maxWords !== undefined && wordCount > options.maxWords) {
|
|
232
|
+
issues.push(`Too many words: ${wordCount} > ${options.maxWords}`);
|
|
233
|
+
passed = false;
|
|
234
|
+
}
|
|
235
|
+
if (options.minChars !== undefined && charCount < options.minChars) {
|
|
236
|
+
issues.push(`Too few characters: ${charCount} < ${options.minChars}`);
|
|
237
|
+
passed = false;
|
|
238
|
+
}
|
|
239
|
+
if (options.maxChars !== undefined && charCount > options.maxChars) {
|
|
240
|
+
issues.push(`Too many characters: ${charCount} > ${options.maxChars}`);
|
|
241
|
+
passed = false;
|
|
242
|
+
}
|
|
243
|
+
return {
|
|
244
|
+
score: passed ? 10 : 0,
|
|
245
|
+
reasoning: passed
|
|
246
|
+
? `Length within bounds (${wordCount} words, ${charCount} chars)`
|
|
247
|
+
: issues.join("; "),
|
|
248
|
+
metadata: {
|
|
249
|
+
wordCount,
|
|
250
|
+
charCount,
|
|
251
|
+
minWords: options.minWords ?? null,
|
|
252
|
+
maxWords: options.maxWords ?? null,
|
|
253
|
+
minChars: options.minChars ?? null,
|
|
254
|
+
maxChars: options.maxChars ?? null,
|
|
255
|
+
},
|
|
256
|
+
};
|
|
257
|
+
}, options.config);
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Compose multiple scorers into a single scorer with aggregation
|
|
261
|
+
*/
|
|
262
|
+
export function composeScorers(id, name, scorers, options) {
|
|
263
|
+
if (scorers.length === 0) {
|
|
264
|
+
throw new Error("composeScorers requires at least one scorer. An empty array would produce NaN/Infinity during aggregation.");
|
|
265
|
+
}
|
|
266
|
+
const metadata = createScorerMetadata(id, name, {
|
|
267
|
+
description: options?.description ??
|
|
268
|
+
`Composed scorer with ${scorers.length} sub-scorers`,
|
|
269
|
+
type: "hybrid",
|
|
270
|
+
category: "custom",
|
|
271
|
+
});
|
|
272
|
+
const aggregation = options?.aggregation ?? "average";
|
|
273
|
+
const weights = options?.weights ?? scorers.map(() => 1.0);
|
|
274
|
+
return new FunctionScorer(metadata, async (input) => {
|
|
275
|
+
// Run all scorers
|
|
276
|
+
const results = await Promise.all(scorers.map((scorer) => scorer.score(input)));
|
|
277
|
+
// Aggregate scores
|
|
278
|
+
let aggregatedScore;
|
|
279
|
+
switch (aggregation) {
|
|
280
|
+
case "min":
|
|
281
|
+
aggregatedScore = Math.min(...results.map((r) => r.score));
|
|
282
|
+
break;
|
|
283
|
+
case "max":
|
|
284
|
+
aggregatedScore = Math.max(...results.map((r) => r.score));
|
|
285
|
+
break;
|
|
286
|
+
case "weighted": {
|
|
287
|
+
let totalWeight = 0;
|
|
288
|
+
let weightedSum = 0;
|
|
289
|
+
for (let i = 0; i < results.length; i++) {
|
|
290
|
+
const weight = weights[i] ?? 1.0;
|
|
291
|
+
totalWeight += weight;
|
|
292
|
+
weightedSum += results[i].score * weight;
|
|
293
|
+
}
|
|
294
|
+
aggregatedScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
case "average":
|
|
298
|
+
default:
|
|
299
|
+
aggregatedScore =
|
|
300
|
+
results.reduce((sum, r) => sum + r.score, 0) / results.length;
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
// Generate combined reasoning
|
|
304
|
+
const reasoning = results
|
|
305
|
+
.map((r, i) => `${scorers[i].metadata.name}: ${r.score.toFixed(1)}/10 - ${r.reasoning}`)
|
|
306
|
+
.join("; ");
|
|
307
|
+
return {
|
|
308
|
+
score: aggregatedScore,
|
|
309
|
+
reasoning: `Aggregated (${aggregation}): ${reasoning}`,
|
|
310
|
+
metadata: {
|
|
311
|
+
subScores: results.map((r, i) => ({
|
|
312
|
+
scorerId: scorers[i].metadata.id,
|
|
313
|
+
scorerName: scorers[i].metadata.name,
|
|
314
|
+
score: r.score,
|
|
315
|
+
passed: r.passed,
|
|
316
|
+
})),
|
|
317
|
+
aggregationMethod: aggregation,
|
|
318
|
+
},
|
|
319
|
+
};
|
|
320
|
+
}, options?.config);
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Create a conditional scorer that only runs if a condition is met
|
|
324
|
+
*/
|
|
325
|
+
export function createConditionalScorer(id, name, condition, scorer, options) {
|
|
326
|
+
const metadata = createScorerMetadata(id, name, {
|
|
327
|
+
description: options?.description ??
|
|
328
|
+
`Conditional scorer wrapping ${scorer.metadata.name}`,
|
|
329
|
+
type: scorer.metadata.type,
|
|
330
|
+
category: scorer.metadata.category,
|
|
331
|
+
});
|
|
332
|
+
const defaultScore = options?.defaultScore ?? 10;
|
|
333
|
+
const defaultReasoning = options?.defaultReasoning ?? "Condition not met, using default score";
|
|
334
|
+
return new FunctionScorer(metadata, async (input) => {
|
|
335
|
+
if (condition(input)) {
|
|
336
|
+
const result = await scorer.score(input);
|
|
337
|
+
return {
|
|
338
|
+
score: result.score,
|
|
339
|
+
reasoning: result.reasoning,
|
|
340
|
+
metadata: {
|
|
341
|
+
conditionMet: true,
|
|
342
|
+
wrappedScorer: scorer.metadata.id,
|
|
343
|
+
...(result.metadata ?? {}),
|
|
344
|
+
},
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
return {
|
|
348
|
+
score: defaultScore,
|
|
349
|
+
reasoning: defaultReasoning,
|
|
350
|
+
metadata: {
|
|
351
|
+
conditionMet: false,
|
|
352
|
+
wrappedScorer: scorer.metadata.id,
|
|
353
|
+
},
|
|
354
|
+
};
|
|
355
|
+
}, options?.config);
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Create a scorer that inverts the score (10 - score)
|
|
359
|
+
*/
|
|
360
|
+
export function createInvertedScorer(id, name, scorer, options) {
|
|
361
|
+
const metadata = createScorerMetadata(id, name, {
|
|
362
|
+
description: options?.description ??
|
|
363
|
+
`Inverted scorer wrapping ${scorer.metadata.name}`,
|
|
364
|
+
type: scorer.metadata.type,
|
|
365
|
+
category: scorer.metadata.category,
|
|
366
|
+
});
|
|
367
|
+
return new FunctionScorer(metadata, async (input) => {
|
|
368
|
+
const result = await scorer.score(input);
|
|
369
|
+
const invertedScore = DEFAULT_SCORE_SCALE.max - result.score;
|
|
370
|
+
return {
|
|
371
|
+
score: invertedScore,
|
|
372
|
+
reasoning: `Inverted: ${result.reasoning}`,
|
|
373
|
+
metadata: {
|
|
374
|
+
originalScore: result.score,
|
|
375
|
+
invertedScore,
|
|
376
|
+
wrappedScorer: scorer.metadata.id,
|
|
377
|
+
...(result.metadata ?? {}),
|
|
378
|
+
},
|
|
379
|
+
};
|
|
380
|
+
}, options?.config);
|
|
381
|
+
}
|
|
382
|
+
//# sourceMappingURL=customScorerUtils.js.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Scorers Index
|
|
3
|
+
* Export all scorers and scorer utilities
|
|
4
|
+
*/
|
|
5
|
+
export { BaseScorer, DEFAULT_SCORE_SCALE } from "./baseScorer.js";
|
|
6
|
+
export { composeScorers, createConditionalScorer, createFunctionScorer, createInvertedScorer, createKeywordScorer, createRegexScorer, createScorerMetadata, createSimpleLengthScorer, type ScorerFunction, } from "./customScorerUtils.js";
|
|
7
|
+
export * from "./llm/index.js";
|
|
8
|
+
export * from "./rule/index.js";
|
|
9
|
+
export { ScorerBuilder, Scorers } from "./scorerBuilder.js";
|
|
10
|
+
export { ScorerRegistry } from "./scorerRegistry.js";
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Scorers Index
|
|
3
|
+
* Export all scorers and scorer utilities
|
|
4
|
+
*/
|
|
5
|
+
// Base classes
|
|
6
|
+
export { BaseScorer, DEFAULT_SCORE_SCALE } from "./baseScorer.js";
|
|
7
|
+
// Custom Scorer Utilities
|
|
8
|
+
export { composeScorers, createConditionalScorer, createFunctionScorer, createInvertedScorer, createKeywordScorer, createRegexScorer, createScorerMetadata, createSimpleLengthScorer, } from "./customScorerUtils.js";
|
|
9
|
+
// LLM Scorers
|
|
10
|
+
export * from "./llm/index.js";
|
|
11
|
+
// Rule Scorers
|
|
12
|
+
export * from "./rule/index.js";
|
|
13
|
+
// Scorer Builder
|
|
14
|
+
export { ScorerBuilder, Scorers } from "./scorerBuilder.js";
|
|
15
|
+
// Registry
|
|
16
|
+
export { ScorerRegistry } from "./scorerRegistry.js";
|
|
17
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Answer relevancy scorer
|
|
3
|
+
* Evaluates how relevant the AI response is to the user query
|
|
4
|
+
*/
|
|
5
|
+
import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
7
|
+
export declare class AnswerRelevancyScorer extends BaseLLMScorer {
|
|
8
|
+
constructor(config?: Partial<LLMScorerConfig>);
|
|
9
|
+
generatePrompt(input: ScorerInput): string;
|
|
10
|
+
parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
|
|
11
|
+
}
|
|
12
|
+
export declare function createAnswerRelevancyScorer(config?: Partial<LLMScorerConfig>): Promise<AnswerRelevancyScorer>;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Answer relevancy scorer
|
|
3
|
+
* Evaluates how relevant the AI response is to the user query
|
|
4
|
+
*/
|
|
5
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
6
|
+
const ANSWER_RELEVANCY_PROMPT = `You are evaluating if an AI response directly addresses the user's question.
|
|
7
|
+
|
|
8
|
+
**Question:**
|
|
9
|
+
{{query}}
|
|
10
|
+
|
|
11
|
+
**Response:**
|
|
12
|
+
{{response}}
|
|
13
|
+
|
|
14
|
+
{{#if hasContext}}
|
|
15
|
+
**Available Context:**
|
|
16
|
+
{{context}}
|
|
17
|
+
{{/if}}
|
|
18
|
+
|
|
19
|
+
## Evaluation Criteria
|
|
20
|
+
|
|
21
|
+
1. Does the response address the main intent of the question?
|
|
22
|
+
2. Is the response complete and sufficient?
|
|
23
|
+
3. Does it avoid unnecessary tangents?
|
|
24
|
+
4. Is the information directly relevant to what was asked?
|
|
25
|
+
|
|
26
|
+
## Output Format (JSON)
|
|
27
|
+
|
|
28
|
+
{
|
|
29
|
+
"addressesIntent": true/false,
|
|
30
|
+
"isComplete": true/false,
|
|
31
|
+
"isOnTopic": true/false,
|
|
32
|
+
"hasTangents": true/false,
|
|
33
|
+
"score": 0.0-10.0,
|
|
34
|
+
"reasoning": "explanation of the score",
|
|
35
|
+
"confidence": 0.0-1.0
|
|
36
|
+
}`;
|
|
37
|
+
export class AnswerRelevancyScorer extends BaseLLMScorer {
|
|
38
|
+
constructor(config) {
|
|
39
|
+
super({
|
|
40
|
+
id: "answer-relevancy",
|
|
41
|
+
name: "Answer Relevancy",
|
|
42
|
+
description: "Evaluates how relevant the AI response is to the user query",
|
|
43
|
+
type: "llm",
|
|
44
|
+
category: "relevancy",
|
|
45
|
+
version: "1.0.0",
|
|
46
|
+
defaultConfig: {
|
|
47
|
+
enabled: true,
|
|
48
|
+
threshold: 0.7,
|
|
49
|
+
weight: 1.0,
|
|
50
|
+
timeout: 25000,
|
|
51
|
+
retries: 2,
|
|
52
|
+
},
|
|
53
|
+
requiredInputs: ["query", "response"],
|
|
54
|
+
optionalInputs: ["context"],
|
|
55
|
+
}, config);
|
|
56
|
+
}
|
|
57
|
+
generatePrompt(input) {
|
|
58
|
+
let prompt = ANSWER_RELEVANCY_PROMPT;
|
|
59
|
+
prompt = this.substituteTemplate(prompt, {
|
|
60
|
+
query: input.query,
|
|
61
|
+
response: input.response,
|
|
62
|
+
});
|
|
63
|
+
const hasContext = !!(input.context && input.context.length > 0);
|
|
64
|
+
prompt = this.processConditionals(prompt, { hasContext });
|
|
65
|
+
if (hasContext && input.context) {
|
|
66
|
+
prompt = prompt.replace("{{context}}", input.context.map((c, i) => `[${i + 1}] ${c}`).join("\n"));
|
|
67
|
+
}
|
|
68
|
+
return prompt;
|
|
69
|
+
}
|
|
70
|
+
parseResponse(response, _input) {
|
|
71
|
+
const json = this.extractJSON(response);
|
|
72
|
+
if (!json) {
|
|
73
|
+
const score = this.extractScoreFromText(response);
|
|
74
|
+
return {
|
|
75
|
+
score,
|
|
76
|
+
reasoning: "Could not parse structured response",
|
|
77
|
+
confidence: 0.3,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
const score = Math.min(10, Math.max(0, typeof json.score === "number" ? json.score : 5));
|
|
81
|
+
const confidence = Math.min(1, Math.max(0, typeof json.confidence === "number" ? json.confidence : 0.8));
|
|
82
|
+
return {
|
|
83
|
+
score,
|
|
84
|
+
reasoning: typeof json.reasoning === "string"
|
|
85
|
+
? json.reasoning
|
|
86
|
+
: "No reasoning provided",
|
|
87
|
+
confidence,
|
|
88
|
+
metadata: {
|
|
89
|
+
addressesIntent: json.addressesIntent ?? null,
|
|
90
|
+
isComplete: json.isComplete ?? null,
|
|
91
|
+
isOnTopic: json.isOnTopic ?? null,
|
|
92
|
+
hasTangents: json.hasTangents ?? null,
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
export async function createAnswerRelevancyScorer(config) {
|
|
98
|
+
return new AnswerRelevancyScorer(config);
|
|
99
|
+
}
|
|
100
|
+
//# sourceMappingURL=answerRelevancyScorer.js.map
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Base class for all LLM-based scorers
|
|
3
|
+
* Provides common functionality for calling LLMs and parsing responses
|
|
4
|
+
*/
|
|
5
|
+
import type { JsonObject } from "../../../types/common.js";
|
|
6
|
+
import type { AIProvider } from "../../../types/providers.js";
|
|
7
|
+
import type { LLMScorer, LLMScorerConfig, ScoreResult, ScorerInput, ScorerMetadata } from "../../../types/scorerTypes.js";
|
|
8
|
+
import { BaseScorer } from "../baseScorer.js";
|
|
9
|
+
/**
|
|
10
|
+
* Default LLM scorer configuration
|
|
11
|
+
*/
|
|
12
|
+
export declare const DEFAULT_LLM_SCORER_CONFIG: LLMScorerConfig;
|
|
13
|
+
/**
|
|
14
|
+
* Abstract base class for LLM-based scorers
|
|
15
|
+
*/
|
|
16
|
+
export declare abstract class BaseLLMScorer extends BaseScorer implements LLMScorer {
|
|
17
|
+
protected _llmConfig: LLMScorerConfig;
|
|
18
|
+
protected provider?: AIProvider;
|
|
19
|
+
private initializationPromise;
|
|
20
|
+
constructor(metadata: ScorerMetadata, config?: LLMScorerConfig);
|
|
21
|
+
/**
|
|
22
|
+
* Get LLM-specific configuration
|
|
23
|
+
*/
|
|
24
|
+
get llmConfig(): LLMScorerConfig;
|
|
25
|
+
/**
|
|
26
|
+
* Generate the prompt for LLM scoring - must be implemented by subclasses
|
|
27
|
+
*/
|
|
28
|
+
abstract generatePrompt(input: ScorerInput): string;
|
|
29
|
+
/**
|
|
30
|
+
* Parse LLM response into score result - must be implemented by subclasses
|
|
31
|
+
*/
|
|
32
|
+
abstract parseResponse(response: string, input: ScorerInput): Partial<ScoreResult>;
|
|
33
|
+
/**
|
|
34
|
+
* Main scoring method
|
|
35
|
+
*/
|
|
36
|
+
score(input: ScorerInput): Promise<ScoreResult>;
|
|
37
|
+
/**
|
|
38
|
+
* Initialize the AI provider
|
|
39
|
+
*/
|
|
40
|
+
protected initializeProvider(): Promise<void>;
|
|
41
|
+
/**
|
|
42
|
+
* Internal method to actually initialize the provider
|
|
43
|
+
*/
|
|
44
|
+
private _doInitializeProvider;
|
|
45
|
+
/**
|
|
46
|
+
* Call the LLM with the given prompt
|
|
47
|
+
*/
|
|
48
|
+
protected callLLM(prompt: string): Promise<string>;
|
|
49
|
+
/**
|
|
50
|
+
* Extract JSON from LLM response
|
|
51
|
+
* Handles various formats including markdown code blocks
|
|
52
|
+
*/
|
|
53
|
+
protected extractJSON(response: string): JsonObject | null;
|
|
54
|
+
/**
|
|
55
|
+
* Simple template substitution for prompts
|
|
56
|
+
*/
|
|
57
|
+
protected substituteTemplate(template: string, variables: Record<string, string | string[] | undefined>): string;
|
|
58
|
+
/**
|
|
59
|
+
* Handle conditional template blocks
|
|
60
|
+
*/
|
|
61
|
+
protected processConditionals(template: string, conditions: Record<string, boolean>): string;
|
|
62
|
+
/**
|
|
63
|
+
* Extract a numeric score from text response
|
|
64
|
+
* Safe numeric extraction without ReDoS-prone regex
|
|
65
|
+
*/
|
|
66
|
+
protected extractNumericScore(text: string): number | null;
|
|
67
|
+
/**
|
|
68
|
+
* Extract a numeric score from text response with fallback
|
|
69
|
+
*/
|
|
70
|
+
protected extractScoreFromText(text: string, min?: number, max?: number): number;
|
|
71
|
+
}
|