@juspay/neurolink 9.36.0 → 9.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +921 -423
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/commands/proxy.js +6 -6
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +1 -1
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Report Generator
|
|
3
|
+
* Generate evaluation reports in various formats
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Default report configuration
|
|
7
|
+
*/
|
|
8
|
+
const DEFAULT_REPORT_CONFIG = {
|
|
9
|
+
format: "text",
|
|
10
|
+
includeReasoning: true,
|
|
11
|
+
includeMetadata: true,
|
|
12
|
+
includeTiming: true,
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Report generator class
|
|
16
|
+
*/
|
|
17
|
+
export class ReportGenerator {
|
|
18
|
+
_config;
|
|
19
|
+
constructor(config) {
|
|
20
|
+
this._config = { ...DEFAULT_REPORT_CONFIG, ...config };
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Generate a report
|
|
24
|
+
*/
|
|
25
|
+
generate(data) {
|
|
26
|
+
let content;
|
|
27
|
+
switch (this._config.format) {
|
|
28
|
+
case "json":
|
|
29
|
+
content = this._generateJsonReport(data);
|
|
30
|
+
break;
|
|
31
|
+
case "markdown":
|
|
32
|
+
content = this._generateMarkdownReport(data);
|
|
33
|
+
break;
|
|
34
|
+
case "html":
|
|
35
|
+
content = this._generateHtmlReport(data);
|
|
36
|
+
break;
|
|
37
|
+
case "text":
|
|
38
|
+
default:
|
|
39
|
+
content = this._generateTextReport(data);
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
return {
|
|
43
|
+
format: this._config.format,
|
|
44
|
+
content,
|
|
45
|
+
metadata: {
|
|
46
|
+
generatedAt: Date.now(),
|
|
47
|
+
format: this._config.format,
|
|
48
|
+
config: this._config,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Generate text report
|
|
54
|
+
*/
|
|
55
|
+
_generateTextReport(data) {
|
|
56
|
+
const lines = [];
|
|
57
|
+
const result = data.result;
|
|
58
|
+
lines.push("=".repeat(60));
|
|
59
|
+
lines.push(data.title);
|
|
60
|
+
lines.push("=".repeat(60));
|
|
61
|
+
lines.push(`Generated: ${new Date(data.timestamp).toISOString()}`);
|
|
62
|
+
lines.push("");
|
|
63
|
+
// Overall result
|
|
64
|
+
lines.push("OVERALL RESULT");
|
|
65
|
+
lines.push("-".repeat(40));
|
|
66
|
+
lines.push(`Score: ${result.overallScore.toFixed(2)}/10`);
|
|
67
|
+
lines.push(`Status: ${result.passed ? "PASSED" : "FAILED"}`);
|
|
68
|
+
lines.push(`Aggregation: ${result.aggregationMethod}`);
|
|
69
|
+
if (this._config.includeTiming && result.totalComputeTime) {
|
|
70
|
+
lines.push(`Duration: ${result.totalComputeTime}ms`);
|
|
71
|
+
}
|
|
72
|
+
lines.push("");
|
|
73
|
+
// Individual scores
|
|
74
|
+
lines.push("INDIVIDUAL SCORES");
|
|
75
|
+
lines.push("-".repeat(40));
|
|
76
|
+
for (const score of result.scores) {
|
|
77
|
+
const status = score.passed ? "[PASS]" : "[FAIL]";
|
|
78
|
+
lines.push(`${status} ${score.scorerName}: ${score.score.toFixed(2)}/10`);
|
|
79
|
+
if (this._config.includeReasoning) {
|
|
80
|
+
lines.push(` Reasoning: ${score.reasoning}`);
|
|
81
|
+
}
|
|
82
|
+
if (this._config.includeTiming) {
|
|
83
|
+
lines.push(` Duration: ${score.computeTime}ms`);
|
|
84
|
+
}
|
|
85
|
+
lines.push("");
|
|
86
|
+
}
|
|
87
|
+
// Errors if any
|
|
88
|
+
const pipelineResult = result;
|
|
89
|
+
if (pipelineResult.errors && pipelineResult.errors.length > 0) {
|
|
90
|
+
lines.push("ERRORS");
|
|
91
|
+
lines.push("-".repeat(40));
|
|
92
|
+
for (const error of pipelineResult.errors) {
|
|
93
|
+
lines.push(`[${error.scorerId}] ${error.error}`);
|
|
94
|
+
}
|
|
95
|
+
lines.push("");
|
|
96
|
+
}
|
|
97
|
+
// Custom sections
|
|
98
|
+
if (data.customSections) {
|
|
99
|
+
for (const section of data.customSections) {
|
|
100
|
+
lines.push(section.title.toUpperCase());
|
|
101
|
+
lines.push("-".repeat(40));
|
|
102
|
+
lines.push(typeof section.content === "string"
|
|
103
|
+
? section.content
|
|
104
|
+
: JSON.stringify(section.content, null, 2));
|
|
105
|
+
lines.push("");
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
lines.push("=".repeat(60));
|
|
109
|
+
return lines.join("\n");
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Generate JSON report
|
|
113
|
+
*/
|
|
114
|
+
_generateJsonReport(data) {
|
|
115
|
+
const report = {
|
|
116
|
+
title: data.title,
|
|
117
|
+
timestamp: data.timestamp,
|
|
118
|
+
generatedAt: new Date(data.timestamp).toISOString(),
|
|
119
|
+
overall: {
|
|
120
|
+
score: data.result.overallScore,
|
|
121
|
+
passed: data.result.passed,
|
|
122
|
+
aggregationMethod: data.result.aggregationMethod,
|
|
123
|
+
},
|
|
124
|
+
scores: data.result.scores.map((score) => {
|
|
125
|
+
const scoreObj = {
|
|
126
|
+
scorerId: score.scorerId,
|
|
127
|
+
scorerName: score.scorerName,
|
|
128
|
+
score: score.score,
|
|
129
|
+
normalizedScore: score.normalizedScore,
|
|
130
|
+
passed: score.passed,
|
|
131
|
+
threshold: score.threshold,
|
|
132
|
+
};
|
|
133
|
+
if (this._config.includeReasoning) {
|
|
134
|
+
scoreObj.reasoning = score.reasoning;
|
|
135
|
+
}
|
|
136
|
+
if (this._config.includeTiming) {
|
|
137
|
+
scoreObj.computeTime = score.computeTime;
|
|
138
|
+
}
|
|
139
|
+
if (this._config.includeMetadata && score.metadata) {
|
|
140
|
+
scoreObj.metadata = score.metadata;
|
|
141
|
+
}
|
|
142
|
+
return scoreObj;
|
|
143
|
+
}),
|
|
144
|
+
};
|
|
145
|
+
if (this._config.includeTiming) {
|
|
146
|
+
report.totalComputeTime = data.result.totalComputeTime;
|
|
147
|
+
}
|
|
148
|
+
const pipelineResult = data.result;
|
|
149
|
+
if (pipelineResult.errors && pipelineResult.errors.length > 0) {
|
|
150
|
+
report.errors = pipelineResult.errors;
|
|
151
|
+
}
|
|
152
|
+
if (data.customSections) {
|
|
153
|
+
report.customSections = data.customSections;
|
|
154
|
+
}
|
|
155
|
+
return JSON.stringify(report, null, 2);
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Generate Markdown report
|
|
159
|
+
*/
|
|
160
|
+
_generateMarkdownReport(data) {
|
|
161
|
+
const lines = [];
|
|
162
|
+
const result = data.result;
|
|
163
|
+
lines.push(`# ${data.title}`);
|
|
164
|
+
lines.push("");
|
|
165
|
+
lines.push(`*Generated: ${new Date(data.timestamp).toISOString()}*`);
|
|
166
|
+
lines.push("");
|
|
167
|
+
// Overall result
|
|
168
|
+
lines.push("## Overall Result");
|
|
169
|
+
lines.push("");
|
|
170
|
+
lines.push(`| Metric | Value |`);
|
|
171
|
+
lines.push(`| ------ | ----- |`);
|
|
172
|
+
lines.push(`| Score | ${result.overallScore.toFixed(2)}/10 |`);
|
|
173
|
+
lines.push(`| Status | ${result.passed ? "**PASSED**" : "**FAILED**"} |`);
|
|
174
|
+
lines.push(`| Aggregation | ${result.aggregationMethod} |`);
|
|
175
|
+
if (this._config.includeTiming && result.totalComputeTime) {
|
|
176
|
+
lines.push(`| Duration | ${result.totalComputeTime}ms |`);
|
|
177
|
+
}
|
|
178
|
+
lines.push("");
|
|
179
|
+
// Individual scores
|
|
180
|
+
lines.push("## Individual Scores");
|
|
181
|
+
lines.push("");
|
|
182
|
+
lines.push(`| Scorer | Score | Status | ${this._config.includeTiming ? "Duration |" : ""}`);
|
|
183
|
+
lines.push(`| ------ | ----- | ------ | ${this._config.includeTiming ? "-------- |" : ""}`);
|
|
184
|
+
for (const score of result.scores) {
|
|
185
|
+
const status = score.passed ? "Pass" : "Fail";
|
|
186
|
+
let row = `| ${score.scorerName} | ${score.score.toFixed(2)} | ${status} |`;
|
|
187
|
+
if (this._config.includeTiming) {
|
|
188
|
+
row += ` ${score.computeTime}ms |`;
|
|
189
|
+
}
|
|
190
|
+
lines.push(row);
|
|
191
|
+
}
|
|
192
|
+
lines.push("");
|
|
193
|
+
// Reasoning
|
|
194
|
+
if (this._config.includeReasoning) {
|
|
195
|
+
lines.push("### Reasoning");
|
|
196
|
+
lines.push("");
|
|
197
|
+
for (const score of result.scores) {
|
|
198
|
+
lines.push(`**${score.scorerName}**: ${score.reasoning}`);
|
|
199
|
+
lines.push("");
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
// Errors
|
|
203
|
+
const pipelineResult = result;
|
|
204
|
+
if (pipelineResult.errors && pipelineResult.errors.length > 0) {
|
|
205
|
+
lines.push("## Errors");
|
|
206
|
+
lines.push("");
|
|
207
|
+
for (const error of pipelineResult.errors) {
|
|
208
|
+
lines.push(`- **${error.scorerId}**: ${error.error}`);
|
|
209
|
+
}
|
|
210
|
+
lines.push("");
|
|
211
|
+
}
|
|
212
|
+
// Custom sections
|
|
213
|
+
if (data.customSections) {
|
|
214
|
+
for (const section of data.customSections) {
|
|
215
|
+
lines.push(`## ${section.title}`);
|
|
216
|
+
lines.push("");
|
|
217
|
+
lines.push(typeof section.content === "string"
|
|
218
|
+
? section.content
|
|
219
|
+
: "```json\n" + JSON.stringify(section.content, null, 2) + "\n```");
|
|
220
|
+
lines.push("");
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return lines.join("\n");
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Generate HTML report
|
|
227
|
+
*/
|
|
228
|
+
_generateHtmlReport(data) {
|
|
229
|
+
const result = data.result;
|
|
230
|
+
const statusClass = result.passed ? "passed" : "failed";
|
|
231
|
+
let html = `
|
|
232
|
+
<!DOCTYPE html>
|
|
233
|
+
<html>
|
|
234
|
+
<head>
|
|
235
|
+
<title>${this._escapeHtml(data.title)}</title>
|
|
236
|
+
<style>
|
|
237
|
+
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
|
|
238
|
+
h1 { color: #333; border-bottom: 2px solid #eee; padding-bottom: 10px; }
|
|
239
|
+
h2 { color: #555; margin-top: 30px; }
|
|
240
|
+
.meta { color: #888; font-size: 0.9em; }
|
|
241
|
+
.overall { background: #f5f5f5; padding: 20px; border-radius: 8px; margin: 20px 0; }
|
|
242
|
+
.overall.passed { border-left: 4px solid #4caf50; }
|
|
243
|
+
.overall.failed { border-left: 4px solid #f44336; }
|
|
244
|
+
.score-value { font-size: 2em; font-weight: bold; }
|
|
245
|
+
.passed .score-value { color: #4caf50; }
|
|
246
|
+
.failed .score-value { color: #f44336; }
|
|
247
|
+
table { width: 100%; border-collapse: collapse; margin: 20px 0; }
|
|
248
|
+
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #eee; }
|
|
249
|
+
th { background: #f9f9f9; font-weight: 600; }
|
|
250
|
+
.status-pass { color: #4caf50; }
|
|
251
|
+
.status-fail { color: #f44336; }
|
|
252
|
+
.reasoning { color: #666; font-size: 0.9em; font-style: italic; }
|
|
253
|
+
.error { background: #ffebee; padding: 10px; border-radius: 4px; margin: 5px 0; }
|
|
254
|
+
pre { background: #f5f5f5; padding: 15px; border-radius: 4px; overflow-x: auto; }
|
|
255
|
+
</style>
|
|
256
|
+
</head>
|
|
257
|
+
<body>
|
|
258
|
+
<h1>${this._escapeHtml(data.title)}</h1>
|
|
259
|
+
<p class="meta">Generated: ${new Date(data.timestamp).toISOString()}</p>
|
|
260
|
+
|
|
261
|
+
<div class="overall ${statusClass}">
|
|
262
|
+
<div class="score-value">${result.overallScore.toFixed(2)}/10</div>
|
|
263
|
+
<div>Status: <strong>${result.passed ? "PASSED" : "FAILED"}</strong></div>
|
|
264
|
+
<div>Aggregation: ${result.aggregationMethod}</div>
|
|
265
|
+
${this._config.includeTiming ? `<div>Duration: ${result.totalComputeTime}ms</div>` : ""}
|
|
266
|
+
</div>
|
|
267
|
+
|
|
268
|
+
<h2>Individual Scores</h2>
|
|
269
|
+
<table>
|
|
270
|
+
<thead>
|
|
271
|
+
<tr>
|
|
272
|
+
<th>Scorer</th>
|
|
273
|
+
<th>Score</th>
|
|
274
|
+
<th>Status</th>
|
|
275
|
+
${this._config.includeTiming ? "<th>Duration</th>" : ""}
|
|
276
|
+
</tr>
|
|
277
|
+
</thead>
|
|
278
|
+
<tbody>
|
|
279
|
+
`;
|
|
280
|
+
for (const score of result.scores) {
|
|
281
|
+
const statusClass = score.passed ? "status-pass" : "status-fail";
|
|
282
|
+
html += `
|
|
283
|
+
<tr>
|
|
284
|
+
<td>${this._escapeHtml(score.scorerName)}</td>
|
|
285
|
+
<td>${score.score.toFixed(2)}</td>
|
|
286
|
+
<td class="${statusClass}">${score.passed ? "Pass" : "Fail"}</td>
|
|
287
|
+
${this._config.includeTiming ? `<td>${score.computeTime}ms</td>` : ""}
|
|
288
|
+
</tr>
|
|
289
|
+
`;
|
|
290
|
+
if (this._config.includeReasoning) {
|
|
291
|
+
html += `
|
|
292
|
+
<tr>
|
|
293
|
+
<td colspan="${this._config.includeTiming ? 4 : 3}" class="reasoning">
|
|
294
|
+
${this._escapeHtml(score.reasoning)}
|
|
295
|
+
</td>
|
|
296
|
+
</tr>
|
|
297
|
+
`;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
html += `
|
|
301
|
+
</tbody>
|
|
302
|
+
</table>
|
|
303
|
+
`;
|
|
304
|
+
// Errors
|
|
305
|
+
const pipelineResult = result;
|
|
306
|
+
if (pipelineResult.errors && pipelineResult.errors.length > 0) {
|
|
307
|
+
html += `
|
|
308
|
+
<h2>Errors</h2>
|
|
309
|
+
`;
|
|
310
|
+
for (const error of pipelineResult.errors) {
|
|
311
|
+
html += `
|
|
312
|
+
<div class="error">
|
|
313
|
+
<strong>${this._escapeHtml(error.scorerId)}</strong>: ${this._escapeHtml(error.error)}
|
|
314
|
+
</div>
|
|
315
|
+
`;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// Custom sections
|
|
319
|
+
if (data.customSections) {
|
|
320
|
+
for (const section of data.customSections) {
|
|
321
|
+
html += `
|
|
322
|
+
<h2>${this._escapeHtml(section.title)}</h2>
|
|
323
|
+
`;
|
|
324
|
+
if (typeof section.content === "string") {
|
|
325
|
+
html += `<p>${this._escapeHtml(section.content)}</p>`;
|
|
326
|
+
}
|
|
327
|
+
else {
|
|
328
|
+
html += `<pre>${this._escapeHtml(JSON.stringify(section.content, null, 2))}</pre>`;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
html += `
|
|
333
|
+
</body>
|
|
334
|
+
</html>
|
|
335
|
+
`;
|
|
336
|
+
return html;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Escape HTML special characters
|
|
340
|
+
*/
|
|
341
|
+
_escapeHtml(text) {
|
|
342
|
+
return text
|
|
343
|
+
.replace(/&/g, "&")
|
|
344
|
+
.replace(/</g, "<")
|
|
345
|
+
.replace(/>/g, ">")
|
|
346
|
+
.replace(/"/g, """)
|
|
347
|
+
.replace(/'/g, "'");
|
|
348
|
+
}
|
|
349
|
+
/**
|
|
350
|
+
* Update configuration
|
|
351
|
+
*/
|
|
352
|
+
configure(config) {
|
|
353
|
+
this._config = { ...this._config, ...config };
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Create a report generator
|
|
358
|
+
*/
|
|
359
|
+
export function createReportGenerator(config) {
|
|
360
|
+
return new ReportGenerator(config);
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Quick report generation functions
|
|
364
|
+
*/
|
|
365
|
+
export const Reports = {
|
|
366
|
+
/** Generate text report */
|
|
367
|
+
text: (data) => new ReportGenerator({ format: "text" }).generate(data),
|
|
368
|
+
/** Generate JSON report */
|
|
369
|
+
json: (data) => new ReportGenerator({ format: "json" }).generate(data),
|
|
370
|
+
/** Generate Markdown report */
|
|
371
|
+
markdown: (data) => new ReportGenerator({ format: "markdown" }).generate(data),
|
|
372
|
+
/** Generate HTML report */
|
|
373
|
+
html: (data) => new ReportGenerator({ format: "html" }).generate(data),
|
|
374
|
+
};
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Abstract base scorer class providing common functionality
|
|
3
|
+
* All scorers extend this class for consistent behavior
|
|
4
|
+
*/
|
|
5
|
+
import type { JsonObject } from "../../types/common.js";
|
|
6
|
+
import type { ScoreResult, Scorer, ScorerConfig, ScorerInput, ScorerMetadata, ScoreScale } from "../../types/scorerTypes.js";
|
|
7
|
+
/**
|
|
8
|
+
* Default score scale (0-10)
|
|
9
|
+
*/
|
|
10
|
+
export declare const DEFAULT_SCORE_SCALE: ScoreScale;
|
|
11
|
+
/**
|
|
12
|
+
* Default scorer configuration
|
|
13
|
+
*/
|
|
14
|
+
export declare const DEFAULT_SCORER_CONFIG: ScorerConfig;
|
|
15
|
+
/**
|
|
16
|
+
* Abstract base class for all scorers
|
|
17
|
+
* Provides common functionality and enforces interface compliance
|
|
18
|
+
*/
|
|
19
|
+
export declare abstract class BaseScorer implements Scorer {
|
|
20
|
+
protected _config: ScorerConfig;
|
|
21
|
+
protected _metadata: ScorerMetadata;
|
|
22
|
+
constructor(metadata: ScorerMetadata, config?: ScorerConfig);
|
|
23
|
+
/**
|
|
24
|
+
* Get scorer metadata
|
|
25
|
+
*/
|
|
26
|
+
get metadata(): ScorerMetadata;
|
|
27
|
+
/**
|
|
28
|
+
* Get current configuration
|
|
29
|
+
*/
|
|
30
|
+
get config(): ScorerConfig;
|
|
31
|
+
/**
|
|
32
|
+
* Main scoring method - must be implemented by subclasses
|
|
33
|
+
*/
|
|
34
|
+
abstract score(input: ScorerInput): Promise<ScoreResult>;
|
|
35
|
+
/**
|
|
36
|
+
* Validate input has required fields
|
|
37
|
+
*/
|
|
38
|
+
validateInput(input: ScorerInput): {
|
|
39
|
+
valid: boolean;
|
|
40
|
+
errors: string[];
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Update configuration
|
|
44
|
+
*/
|
|
45
|
+
configure(config: Partial<ScorerConfig>): void;
|
|
46
|
+
/**
|
|
47
|
+
* Normalize a score to 0-1 scale
|
|
48
|
+
*/
|
|
49
|
+
protected normalizeScore(score: number, scale?: ScoreScale): number;
|
|
50
|
+
/**
|
|
51
|
+
* Convert normalized score back to scale
|
|
52
|
+
*/
|
|
53
|
+
protected denormalizeScore(normalizedScore: number, scale?: ScoreScale): number;
|
|
54
|
+
/**
|
|
55
|
+
* Check if score passes threshold
|
|
56
|
+
*/
|
|
57
|
+
protected checkThreshold(normalizedScore: number): boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Create a standardized score result
|
|
60
|
+
*/
|
|
61
|
+
protected createScoreResult(score: number, reasoning: string, options?: {
|
|
62
|
+
scale?: ScoreScale;
|
|
63
|
+
confidence?: number;
|
|
64
|
+
metadata?: JsonObject;
|
|
65
|
+
error?: string;
|
|
66
|
+
}): ScoreResult;
|
|
67
|
+
/**
|
|
68
|
+
* Create an error score result
|
|
69
|
+
*/
|
|
70
|
+
protected createErrorResult(error: Error | string): ScoreResult;
|
|
71
|
+
/**
|
|
72
|
+
* Execute scoring with timing and error handling
|
|
73
|
+
*/
|
|
74
|
+
protected executeWithTiming(scoringFn: () => Promise<Omit<ScoreResult, "computeTime">>): Promise<ScoreResult>;
|
|
75
|
+
/**
|
|
76
|
+
* Execute scoring with timeout
|
|
77
|
+
*/
|
|
78
|
+
protected executeWithTimeout<T>(fn: () => Promise<T>, timeoutMs: number, operationName: string): Promise<T>;
|
|
79
|
+
/**
|
|
80
|
+
* Execute with retry logic
|
|
81
|
+
*/
|
|
82
|
+
protected executeWithRetry<T>(operation: () => Promise<T>, retries?: number): Promise<T>;
|
|
83
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Abstract base scorer class providing common functionality
|
|
3
|
+
* All scorers extend this class for consistent behavior
|
|
4
|
+
*/
|
|
5
|
+
import { withTimeout, ErrorFactory } from "../../utils/errorHandling.js";
|
|
6
|
+
import { logger } from "../../utils/logger.js";
|
|
7
|
+
/**
|
|
8
|
+
* Default score scale (0-10)
|
|
9
|
+
*/
|
|
10
|
+
export const DEFAULT_SCORE_SCALE = {
|
|
11
|
+
min: 0,
|
|
12
|
+
max: 10,
|
|
13
|
+
precision: 2,
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Default scorer configuration
|
|
17
|
+
*/
|
|
18
|
+
export const DEFAULT_SCORER_CONFIG = {
|
|
19
|
+
enabled: true,
|
|
20
|
+
threshold: 0.7,
|
|
21
|
+
weight: 1.0,
|
|
22
|
+
timeout: 30000,
|
|
23
|
+
retries: 2,
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* Abstract base class for all scorers
|
|
27
|
+
* Provides common functionality and enforces interface compliance
|
|
28
|
+
*/
|
|
29
|
+
export class BaseScorer {
|
|
30
|
+
_config;
|
|
31
|
+
_metadata;
|
|
32
|
+
constructor(metadata, config) {
|
|
33
|
+
this._metadata = metadata;
|
|
34
|
+
this._config = {
|
|
35
|
+
...DEFAULT_SCORER_CONFIG,
|
|
36
|
+
...metadata.defaultConfig,
|
|
37
|
+
...config,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Get scorer metadata
|
|
42
|
+
*/
|
|
43
|
+
get metadata() {
|
|
44
|
+
return this._metadata;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Get current configuration
|
|
48
|
+
*/
|
|
49
|
+
get config() {
|
|
50
|
+
return this._config;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Validate input has required fields
|
|
54
|
+
*/
|
|
55
|
+
validateInput(input) {
|
|
56
|
+
const errors = [];
|
|
57
|
+
for (const field of this._metadata.requiredInputs) {
|
|
58
|
+
const value = input[field];
|
|
59
|
+
if (value === undefined || value === null) {
|
|
60
|
+
errors.push(`Missing required input: ${field}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Check for empty strings in required text fields
|
|
64
|
+
if (this._metadata.requiredInputs.includes("query") &&
|
|
65
|
+
typeof input.query === "string" &&
|
|
66
|
+
!input.query.trim()) {
|
|
67
|
+
errors.push("Query cannot be empty");
|
|
68
|
+
}
|
|
69
|
+
if (this._metadata.requiredInputs.includes("response") &&
|
|
70
|
+
typeof input.response === "string" &&
|
|
71
|
+
!input.response.trim()) {
|
|
72
|
+
errors.push("Response cannot be empty");
|
|
73
|
+
}
|
|
74
|
+
// Check context array is not empty if required
|
|
75
|
+
if (this._metadata.requiredInputs.includes("context") &&
|
|
76
|
+
input.context !== undefined) {
|
|
77
|
+
if (!Array.isArray(input.context) || input.context.length === 0) {
|
|
78
|
+
errors.push("Context must be a non-empty array");
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
valid: errors.length === 0,
|
|
83
|
+
errors,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Update configuration
|
|
88
|
+
*/
|
|
89
|
+
configure(config) {
|
|
90
|
+
this._config = {
|
|
91
|
+
...this._config,
|
|
92
|
+
...config,
|
|
93
|
+
};
|
|
94
|
+
logger.debug(`Scorer ${this._metadata.id} reconfigured`, {
|
|
95
|
+
config: this._config,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Normalize a score to 0-1 scale
|
|
100
|
+
*/
|
|
101
|
+
normalizeScore(score, scale = DEFAULT_SCORE_SCALE) {
|
|
102
|
+
// Validate inputs are finite
|
|
103
|
+
if (!Number.isFinite(score) ||
|
|
104
|
+
!Number.isFinite(scale.min) ||
|
|
105
|
+
!Number.isFinite(scale.max)) {
|
|
106
|
+
return 0;
|
|
107
|
+
}
|
|
108
|
+
// Guard against zero denominator
|
|
109
|
+
const denominator = scale.max - scale.min;
|
|
110
|
+
if (denominator === 0) {
|
|
111
|
+
return 0;
|
|
112
|
+
}
|
|
113
|
+
// Clamp score to scale bounds
|
|
114
|
+
const clampedScore = Math.max(scale.min, Math.min(scale.max, score));
|
|
115
|
+
const normalized = (clampedScore - scale.min) / denominator;
|
|
116
|
+
return Math.max(0, Math.min(1, normalized));
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Convert normalized score back to scale
|
|
120
|
+
*/
|
|
121
|
+
denormalizeScore(normalizedScore, scale = DEFAULT_SCORE_SCALE) {
|
|
122
|
+
const clamped = Math.max(0, Math.min(1, normalizedScore));
|
|
123
|
+
return scale.min + clamped * (scale.max - scale.min);
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Check if score passes threshold
|
|
127
|
+
*/
|
|
128
|
+
checkThreshold(normalizedScore) {
|
|
129
|
+
const threshold = this._config.threshold ?? 0.7;
|
|
130
|
+
return normalizedScore >= threshold;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Create a standardized score result
|
|
134
|
+
*/
|
|
135
|
+
createScoreResult(score, reasoning, options = {}) {
|
|
136
|
+
const scale = options.scale ?? DEFAULT_SCORE_SCALE;
|
|
137
|
+
const safeScore = Number.isFinite(score) ? score : scale.min;
|
|
138
|
+
const clampedScore = Math.max(scale.min, Math.min(scale.max, safeScore));
|
|
139
|
+
const normalizedScore = this.normalizeScore(clampedScore, scale);
|
|
140
|
+
// Ensure no NaN leaks into published scores
|
|
141
|
+
const finalScore = Number.isFinite(clampedScore) ? clampedScore : 0;
|
|
142
|
+
const finalNormalized = Number.isFinite(normalizedScore)
|
|
143
|
+
? normalizedScore
|
|
144
|
+
: 0;
|
|
145
|
+
return {
|
|
146
|
+
scorerId: this._metadata.id,
|
|
147
|
+
scorerName: this._metadata.name,
|
|
148
|
+
score: Number(finalScore.toFixed(scale.precision)),
|
|
149
|
+
normalizedScore: Number(finalNormalized.toFixed(4)),
|
|
150
|
+
scale,
|
|
151
|
+
reasoning,
|
|
152
|
+
passed: this.checkThreshold(finalNormalized),
|
|
153
|
+
threshold: this._config.threshold ?? 0.7,
|
|
154
|
+
confidence: options.confidence === undefined
|
|
155
|
+
? undefined
|
|
156
|
+
: Number.isFinite(options.confidence)
|
|
157
|
+
? Math.max(0, Math.min(1, options.confidence))
|
|
158
|
+
: 0,
|
|
159
|
+
metadata: options.metadata,
|
|
160
|
+
computeTime: 0, // Set by caller via executeWithTiming
|
|
161
|
+
error: options.error,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Create an error score result
|
|
166
|
+
*/
|
|
167
|
+
createErrorResult(error) {
|
|
168
|
+
const errorMessage = error instanceof Error ? error.message : error;
|
|
169
|
+
return {
|
|
170
|
+
scorerId: this._metadata.id,
|
|
171
|
+
scorerName: this._metadata.name,
|
|
172
|
+
score: 0,
|
|
173
|
+
normalizedScore: 0,
|
|
174
|
+
scale: DEFAULT_SCORE_SCALE,
|
|
175
|
+
reasoning: `Scoring failed: ${errorMessage}`,
|
|
176
|
+
passed: false,
|
|
177
|
+
threshold: this._config.threshold ?? 0.7,
|
|
178
|
+
computeTime: 0,
|
|
179
|
+
error: errorMessage,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Execute scoring with timing and error handling
|
|
184
|
+
*/
|
|
185
|
+
async executeWithTiming(scoringFn) {
|
|
186
|
+
const startTime = Date.now();
|
|
187
|
+
try {
|
|
188
|
+
const result = await scoringFn();
|
|
189
|
+
return {
|
|
190
|
+
...result,
|
|
191
|
+
computeTime: Date.now() - startTime,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
catch (error) {
|
|
195
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
196
|
+
logger.error(`Scorer ${this._metadata.id} failed`, {
|
|
197
|
+
error: errorMessage,
|
|
198
|
+
});
|
|
199
|
+
return {
|
|
200
|
+
...this.createErrorResult(errorMessage),
|
|
201
|
+
computeTime: Date.now() - startTime,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Execute scoring with timeout
|
|
207
|
+
*/
|
|
208
|
+
async executeWithTimeout(fn, timeoutMs, operationName) {
|
|
209
|
+
return withTimeout(fn(), timeoutMs, ErrorFactory.evaluationTimeout(operationName, timeoutMs));
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Execute with retry logic
|
|
213
|
+
*/
|
|
214
|
+
async executeWithRetry(operation, retries) {
|
|
215
|
+
const maxRetries = retries ?? this._config.retries ?? 2;
|
|
216
|
+
let lastError;
|
|
217
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
218
|
+
try {
|
|
219
|
+
return await operation();
|
|
220
|
+
}
|
|
221
|
+
catch (error) {
|
|
222
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
223
|
+
if (attempt < maxRetries) {
|
|
224
|
+
logger.debug(`Scorer ${this._metadata.id} retry ${attempt + 1}/${maxRetries}`, { error: lastError.message });
|
|
225
|
+
// Exponential backoff
|
|
226
|
+
await new Promise((resolve) => setTimeout(resolve, 2 ** attempt * 1000));
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
throw lastError ?? new Error("Operation failed after retries");
|
|
231
|
+
}
|
|
232
|
+
}
|