@juspay/neurolink 9.36.1 → 9.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +1105 -556
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/lib/processors/media/VideoProcessor.js +90 -41
- package/dist/lib/telemetry/telemetryService.d.ts +1 -1
- package/dist/lib/telemetry/telemetryService.js +27 -13
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/processors/media/VideoProcessor.js +90 -41
- package/dist/telemetry/telemetryService.d.ts +1 -1
- package/dist/telemetry/telemetryService.js +27 -13
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +7 -7
- package/dist/processors/media/ffprobe-static.d.ts +0 -4
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file EvaluationAggregator - Aggregates and analyzes evaluation results.
|
|
3
|
+
* Provides statistical analysis, trend detection, and summary generation.
|
|
4
|
+
*/
|
|
5
|
+
import type { EvaluationData } from "../types/evaluation.js";
|
|
6
|
+
/**
|
|
7
|
+
* Statistical summary of evaluation scores.
|
|
8
|
+
*/
|
|
9
|
+
export interface ScoreStatistics {
|
|
10
|
+
/** Minimum score */
|
|
11
|
+
min: number;
|
|
12
|
+
/** Maximum score */
|
|
13
|
+
max: number;
|
|
14
|
+
/** Mean (average) score */
|
|
15
|
+
mean: number;
|
|
16
|
+
/** Median score */
|
|
17
|
+
median: number;
|
|
18
|
+
/** Standard deviation */
|
|
19
|
+
stdDev: number;
|
|
20
|
+
/** Variance */
|
|
21
|
+
variance: number;
|
|
22
|
+
/** 25th percentile */
|
|
23
|
+
p25: number;
|
|
24
|
+
/** 75th percentile */
|
|
25
|
+
p75: number;
|
|
26
|
+
/** 90th percentile */
|
|
27
|
+
p90: number;
|
|
28
|
+
/** 95th percentile */
|
|
29
|
+
p95: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Score distribution across ranges.
|
|
33
|
+
*/
|
|
34
|
+
export interface ScoreDistribution {
|
|
35
|
+
/** Items scoring 1-3 (poor) */
|
|
36
|
+
poor: number;
|
|
37
|
+
/** Items scoring 4-5 (below average) */
|
|
38
|
+
belowAverage: number;
|
|
39
|
+
/** Items scoring 6-7 (average) */
|
|
40
|
+
average: number;
|
|
41
|
+
/** Items scoring 8-9 (good) */
|
|
42
|
+
good: number;
|
|
43
|
+
/** Items scoring 10 (excellent) */
|
|
44
|
+
excellent: number;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Trend analysis results.
|
|
48
|
+
*/
|
|
49
|
+
export interface TrendAnalysis {
|
|
50
|
+
/** Direction of the trend */
|
|
51
|
+
direction: "improving" | "declining" | "stable";
|
|
52
|
+
/** Slope of the linear regression */
|
|
53
|
+
slope: number;
|
|
54
|
+
/** R-squared value (fit quality) */
|
|
55
|
+
rSquared: number;
|
|
56
|
+
/** Percentage change from first to last */
|
|
57
|
+
percentChange: number;
|
|
58
|
+
/** Moving average of last N evaluations */
|
|
59
|
+
movingAverage: number;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Dimension-specific analysis for RAGAS metrics.
|
|
63
|
+
*/
|
|
64
|
+
export interface DimensionAnalysis {
|
|
65
|
+
/** Relevance score statistics */
|
|
66
|
+
relevance: ScoreStatistics;
|
|
67
|
+
/** Accuracy score statistics */
|
|
68
|
+
accuracy: ScoreStatistics;
|
|
69
|
+
/** Completeness score statistics */
|
|
70
|
+
completeness: ScoreStatistics;
|
|
71
|
+
/** Overall score statistics */
|
|
72
|
+
overall: ScoreStatistics;
|
|
73
|
+
/** Correlation matrix between dimensions */
|
|
74
|
+
correlations: {
|
|
75
|
+
relevanceAccuracy: number;
|
|
76
|
+
relevanceCompleteness: number;
|
|
77
|
+
accuracyCompleteness: number;
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Quality alerts summary.
|
|
82
|
+
*/
|
|
83
|
+
export interface AlertSummary {
|
|
84
|
+
/** Total number of alerts */
|
|
85
|
+
total: number;
|
|
86
|
+
/** Number of high severity alerts */
|
|
87
|
+
high: number;
|
|
88
|
+
/** Number of medium severity alerts */
|
|
89
|
+
medium: number;
|
|
90
|
+
/** Number of items marked as off-topic */
|
|
91
|
+
offTopic: number;
|
|
92
|
+
/** Alert rate as percentage */
|
|
93
|
+
alertRate: number;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Comprehensive aggregation result.
|
|
97
|
+
*/
|
|
98
|
+
export interface AggregationResult {
|
|
99
|
+
/** Number of evaluations aggregated */
|
|
100
|
+
count: number;
|
|
101
|
+
/** Statistics for overall scores */
|
|
102
|
+
statistics: ScoreStatistics;
|
|
103
|
+
/** Score distribution */
|
|
104
|
+
distribution: ScoreDistribution;
|
|
105
|
+
/** Dimension-specific analysis */
|
|
106
|
+
dimensions: DimensionAnalysis;
|
|
107
|
+
/** Sequence trend analysis based on insertion order (not time-based) */
|
|
108
|
+
sequenceTrend?: TrendAnalysis;
|
|
109
|
+
/** Alert summary */
|
|
110
|
+
alerts: AlertSummary;
|
|
111
|
+
/** Passing rate based on threshold */
|
|
112
|
+
passingRate: number;
|
|
113
|
+
/** Average evaluation time */
|
|
114
|
+
avgEvaluationTime: number;
|
|
115
|
+
/** Aggregation metadata */
|
|
116
|
+
metadata: {
|
|
117
|
+
aggregatedAt: string;
|
|
118
|
+
threshold: number;
|
|
119
|
+
evaluationModels: string[];
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* EvaluationAggregator - Aggregates evaluation results and provides analytics.
|
|
124
|
+
* Supports statistical analysis, trend detection, and quality monitoring.
|
|
125
|
+
*
|
|
126
|
+
* @example
|
|
127
|
+
* ```typescript
|
|
128
|
+
* const aggregator = new EvaluationAggregator();
|
|
129
|
+
*
|
|
130
|
+
* // Add evaluations
|
|
131
|
+
* aggregator.addEvaluation(evaluation1);
|
|
132
|
+
* aggregator.addEvaluation(evaluation2);
|
|
133
|
+
*
|
|
134
|
+
* // Get aggregation
|
|
135
|
+
* const result = aggregator.aggregate({ threshold: 7 });
|
|
136
|
+
* console.log(`Average score: ${result.statistics.mean}`);
|
|
137
|
+
* console.log(`Passing rate: ${result.passingRate}%`);
|
|
138
|
+
*
|
|
139
|
+
* // Get trend analysis
|
|
140
|
+
* const trend = aggregator.analyzeSequenceTrend();
|
|
141
|
+
* console.log(`Quality is ${trend.direction}`);
|
|
142
|
+
* ```
|
|
143
|
+
*/
|
|
144
|
+
export declare class EvaluationAggregator {
|
|
145
|
+
private evaluations;
|
|
146
|
+
/**
|
|
147
|
+
* Adds an evaluation to the aggregator.
|
|
148
|
+
*
|
|
149
|
+
* @param evaluation - The evaluation data to add
|
|
150
|
+
*/
|
|
151
|
+
addEvaluation(evaluation: EvaluationData): void;
|
|
152
|
+
/**
|
|
153
|
+
* Adds multiple evaluations to the aggregator.
|
|
154
|
+
*
|
|
155
|
+
* @param evaluations - Array of evaluation data to add
|
|
156
|
+
*/
|
|
157
|
+
addEvaluations(evaluations: EvaluationData[]): void;
|
|
158
|
+
/**
|
|
159
|
+
* Clears all evaluations from the aggregator.
|
|
160
|
+
*/
|
|
161
|
+
clear(): void;
|
|
162
|
+
/**
|
|
163
|
+
* Gets the current number of evaluations.
|
|
164
|
+
*/
|
|
165
|
+
getCount(): number;
|
|
166
|
+
/**
|
|
167
|
+
* Gets all evaluations.
|
|
168
|
+
*/
|
|
169
|
+
getEvaluations(): EvaluationData[];
|
|
170
|
+
/**
|
|
171
|
+
* Aggregates all evaluations and returns comprehensive statistics.
|
|
172
|
+
*
|
|
173
|
+
* @param options - Aggregation options
|
|
174
|
+
* @returns Comprehensive aggregation result
|
|
175
|
+
*/
|
|
176
|
+
aggregate(options?: {
|
|
177
|
+
threshold?: number;
|
|
178
|
+
}): AggregationResult;
|
|
179
|
+
/**
|
|
180
|
+
* Calculates statistical summary for a set of scores.
|
|
181
|
+
*
|
|
182
|
+
* @param scores - Array of scores
|
|
183
|
+
* @returns Statistical summary
|
|
184
|
+
*/
|
|
185
|
+
calculateStatistics(scores: number[]): ScoreStatistics;
|
|
186
|
+
/**
|
|
187
|
+
* Calculates the distribution of scores across quality ranges.
|
|
188
|
+
*
|
|
189
|
+
* @param scores - Array of scores
|
|
190
|
+
* @returns Score distribution
|
|
191
|
+
*/
|
|
192
|
+
calculateDistribution(scores: number[]): ScoreDistribution;
|
|
193
|
+
/**
|
|
194
|
+
* Analyzes sequence-based trends in evaluation scores (based on insertion order, not time).
|
|
195
|
+
*
|
|
196
|
+
* @param windowSize - Moving average window size (default: 5)
|
|
197
|
+
* @returns Trend analysis
|
|
198
|
+
*/
|
|
199
|
+
analyzeSequenceTrend(windowSize?: number): TrendAnalysis;
|
|
200
|
+
/**
|
|
201
|
+
* Analyzes each evaluation dimension separately.
|
|
202
|
+
*
|
|
203
|
+
* @param relevance - Relevance scores
|
|
204
|
+
* @param accuracy - Accuracy scores
|
|
205
|
+
* @param completeness - Completeness scores
|
|
206
|
+
* @param overall - Overall scores
|
|
207
|
+
* @returns Dimension analysis
|
|
208
|
+
*/
|
|
209
|
+
private analyzeDimensions;
|
|
210
|
+
/**
|
|
211
|
+
* Summarizes alert information from evaluations.
|
|
212
|
+
*
|
|
213
|
+
* @returns Alert summary
|
|
214
|
+
*/
|
|
215
|
+
private summarizeAlerts;
|
|
216
|
+
/**
|
|
217
|
+
* Calculates a specific percentile from sorted data.
|
|
218
|
+
*
|
|
219
|
+
* @param sorted - Sorted array of numbers
|
|
220
|
+
* @param p - Percentile (0-100)
|
|
221
|
+
* @returns The value at the percentile
|
|
222
|
+
*/
|
|
223
|
+
private percentile;
|
|
224
|
+
/**
|
|
225
|
+
* Calculates Pearson correlation between two arrays.
|
|
226
|
+
*
|
|
227
|
+
* @param x - First array
|
|
228
|
+
* @param y - Second array
|
|
229
|
+
* @returns Correlation coefficient (-1 to 1)
|
|
230
|
+
*/
|
|
231
|
+
private correlation;
|
|
232
|
+
/**
|
|
233
|
+
* Gets evaluations that failed to meet the threshold.
|
|
234
|
+
*
|
|
235
|
+
* @param threshold - The passing threshold
|
|
236
|
+
* @returns Array of failing evaluations
|
|
237
|
+
*/
|
|
238
|
+
getFailingEvaluations(threshold?: number): EvaluationData[];
|
|
239
|
+
/**
|
|
240
|
+
* Gets evaluations with high severity alerts.
|
|
241
|
+
*
|
|
242
|
+
* @returns Array of high-alert evaluations
|
|
243
|
+
*/
|
|
244
|
+
getHighAlertEvaluations(): EvaluationData[];
|
|
245
|
+
/**
|
|
246
|
+
* Gets evaluations marked as off-topic.
|
|
247
|
+
*
|
|
248
|
+
* @returns Array of off-topic evaluations
|
|
249
|
+
*/
|
|
250
|
+
getOffTopicEvaluations(): EvaluationData[];
|
|
251
|
+
/**
|
|
252
|
+
* Gets the top N performing evaluations.
|
|
253
|
+
*
|
|
254
|
+
* @param n - Number of evaluations to return
|
|
255
|
+
* @returns Array of top evaluations
|
|
256
|
+
*/
|
|
257
|
+
getTopEvaluations(n?: number): EvaluationData[];
|
|
258
|
+
/**
|
|
259
|
+
* Gets the bottom N performing evaluations.
|
|
260
|
+
*
|
|
261
|
+
* @param n - Number of evaluations to return
|
|
262
|
+
* @returns Array of bottom evaluations
|
|
263
|
+
*/
|
|
264
|
+
getBottomEvaluations(n?: number): EvaluationData[];
|
|
265
|
+
/**
|
|
266
|
+
* Generates a text summary of the aggregation.
|
|
267
|
+
*
|
|
268
|
+
* @param threshold - The passing threshold
|
|
269
|
+
* @returns Human-readable summary
|
|
270
|
+
*/
|
|
271
|
+
generateSummary(threshold?: number): string;
|
|
272
|
+
}
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file EvaluationAggregator - Aggregates and analyzes evaluation results.
|
|
3
|
+
* Provides statistical analysis, trend detection, and summary generation.
|
|
4
|
+
*/
|
|
5
|
+
import { evaluationErrors } from "./errors/EvaluationError.js";
|
|
6
|
+
/**
|
|
7
|
+
* EvaluationAggregator - Aggregates evaluation results and provides analytics.
|
|
8
|
+
* Supports statistical analysis, trend detection, and quality monitoring.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* const aggregator = new EvaluationAggregator();
|
|
13
|
+
*
|
|
14
|
+
* // Add evaluations
|
|
15
|
+
* aggregator.addEvaluation(evaluation1);
|
|
16
|
+
* aggregator.addEvaluation(evaluation2);
|
|
17
|
+
*
|
|
18
|
+
* // Get aggregation
|
|
19
|
+
* const result = aggregator.aggregate({ threshold: 7 });
|
|
20
|
+
* console.log(`Average score: ${result.statistics.mean}`);
|
|
21
|
+
* console.log(`Passing rate: ${result.passingRate}%`);
|
|
22
|
+
*
|
|
23
|
+
* // Get trend analysis
|
|
24
|
+
* const trend = aggregator.analyzeSequenceTrend();
|
|
25
|
+
* console.log(`Quality is ${trend.direction}`);
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
export class EvaluationAggregator {
|
|
29
|
+
evaluations = [];
|
|
30
|
+
/**
|
|
31
|
+
* Adds an evaluation to the aggregator.
|
|
32
|
+
*
|
|
33
|
+
* @param evaluation - The evaluation data to add
|
|
34
|
+
*/
|
|
35
|
+
addEvaluation(evaluation) {
|
|
36
|
+
this.evaluations.push(evaluation);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Adds multiple evaluations to the aggregator.
|
|
40
|
+
*
|
|
41
|
+
* @param evaluations - Array of evaluation data to add
|
|
42
|
+
*/
|
|
43
|
+
addEvaluations(evaluations) {
|
|
44
|
+
this.evaluations.push(...evaluations);
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Clears all evaluations from the aggregator.
|
|
48
|
+
*/
|
|
49
|
+
clear() {
|
|
50
|
+
this.evaluations = [];
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Gets the current number of evaluations.
|
|
54
|
+
*/
|
|
55
|
+
getCount() {
|
|
56
|
+
return this.evaluations.length;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Gets all evaluations.
|
|
60
|
+
*/
|
|
61
|
+
getEvaluations() {
|
|
62
|
+
return [...this.evaluations];
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Aggregates all evaluations and returns comprehensive statistics.
|
|
66
|
+
*
|
|
67
|
+
* @param options - Aggregation options
|
|
68
|
+
* @returns Comprehensive aggregation result
|
|
69
|
+
*/
|
|
70
|
+
aggregate(options = {}) {
|
|
71
|
+
const threshold = options.threshold || 7;
|
|
72
|
+
if (this.evaluations.length === 0) {
|
|
73
|
+
throw evaluationErrors.create("AGGREGATION_ERROR", "Cannot aggregate: no evaluations available", { retryable: false });
|
|
74
|
+
}
|
|
75
|
+
const overallScores = this.evaluations.map((e) => e.overall);
|
|
76
|
+
const relevanceScores = this.evaluations.map((e) => e.relevance);
|
|
77
|
+
const accuracyScores = this.evaluations.map((e) => e.accuracy);
|
|
78
|
+
const completenessScores = this.evaluations.map((e) => e.completeness);
|
|
79
|
+
const statistics = this.calculateStatistics(overallScores);
|
|
80
|
+
const distribution = this.calculateDistribution(overallScores);
|
|
81
|
+
const dimensions = this.analyzeDimensions(relevanceScores, accuracyScores, completenessScores, overallScores);
|
|
82
|
+
const alerts = this.summarizeAlerts();
|
|
83
|
+
const passingCount = this.evaluations.filter((e) => e.overall >= threshold).length;
|
|
84
|
+
const passingRate = (passingCount / this.evaluations.length) * 100;
|
|
85
|
+
const avgEvaluationTime = this.evaluations.reduce((sum, e) => sum + e.evaluationTime, 0) /
|
|
86
|
+
this.evaluations.length;
|
|
87
|
+
const evaluationModels = Array.from(new Set(this.evaluations.map((e) => e.evaluationModel)));
|
|
88
|
+
return {
|
|
89
|
+
count: this.evaluations.length,
|
|
90
|
+
statistics,
|
|
91
|
+
distribution,
|
|
92
|
+
dimensions,
|
|
93
|
+
sequenceTrend: this.evaluations.length >= 3 ? this.analyzeSequenceTrend() : undefined,
|
|
94
|
+
alerts,
|
|
95
|
+
passingRate,
|
|
96
|
+
avgEvaluationTime,
|
|
97
|
+
metadata: {
|
|
98
|
+
aggregatedAt: new Date().toISOString(),
|
|
99
|
+
threshold,
|
|
100
|
+
evaluationModels,
|
|
101
|
+
},
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Calculates statistical summary for a set of scores.
|
|
106
|
+
*
|
|
107
|
+
* @param scores - Array of scores
|
|
108
|
+
* @returns Statistical summary
|
|
109
|
+
*/
|
|
110
|
+
calculateStatistics(scores) {
|
|
111
|
+
if (scores.length === 0) {
|
|
112
|
+
return {
|
|
113
|
+
min: 0,
|
|
114
|
+
max: 0,
|
|
115
|
+
mean: 0,
|
|
116
|
+
median: 0,
|
|
117
|
+
stdDev: 0,
|
|
118
|
+
variance: 0,
|
|
119
|
+
p25: 0,
|
|
120
|
+
p75: 0,
|
|
121
|
+
p90: 0,
|
|
122
|
+
p95: 0,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
const sorted = [...scores].sort((a, b) => a - b);
|
|
126
|
+
const n = sorted.length;
|
|
127
|
+
const sum = scores.reduce((a, b) => a + b, 0);
|
|
128
|
+
const mean = sum / n;
|
|
129
|
+
const squaredDiffs = scores.map((s) => Math.pow(s - mean, 2));
|
|
130
|
+
const variance = squaredDiffs.reduce((a, b) => a + b, 0) / n;
|
|
131
|
+
const stdDev = Math.sqrt(variance);
|
|
132
|
+
return {
|
|
133
|
+
min: sorted[0],
|
|
134
|
+
max: sorted[n - 1],
|
|
135
|
+
mean,
|
|
136
|
+
median: this.percentile(sorted, 50),
|
|
137
|
+
stdDev,
|
|
138
|
+
variance,
|
|
139
|
+
p25: this.percentile(sorted, 25),
|
|
140
|
+
p75: this.percentile(sorted, 75),
|
|
141
|
+
p90: this.percentile(sorted, 90),
|
|
142
|
+
p95: this.percentile(sorted, 95),
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Calculates the distribution of scores across quality ranges.
|
|
147
|
+
*
|
|
148
|
+
* @param scores - Array of scores
|
|
149
|
+
* @returns Score distribution
|
|
150
|
+
*/
|
|
151
|
+
calculateDistribution(scores) {
|
|
152
|
+
return {
|
|
153
|
+
poor: scores.filter((s) => s >= 1 && s <= 3).length,
|
|
154
|
+
belowAverage: scores.filter((s) => s >= 4 && s <= 5).length,
|
|
155
|
+
average: scores.filter((s) => s >= 6 && s <= 7).length,
|
|
156
|
+
good: scores.filter((s) => s >= 8 && s <= 9).length,
|
|
157
|
+
excellent: scores.filter((s) => s >= 10).length,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Analyzes sequence-based trends in evaluation scores (based on insertion order, not time).
|
|
162
|
+
*
|
|
163
|
+
* @param windowSize - Moving average window size (default: 5)
|
|
164
|
+
* @returns Trend analysis
|
|
165
|
+
*/
|
|
166
|
+
analyzeSequenceTrend(windowSize = 5) {
|
|
167
|
+
const scores = this.evaluations.map((e) => e.overall);
|
|
168
|
+
if (scores.length < 2) {
|
|
169
|
+
return {
|
|
170
|
+
direction: "stable",
|
|
171
|
+
slope: 0,
|
|
172
|
+
rSquared: 0,
|
|
173
|
+
percentChange: 0,
|
|
174
|
+
movingAverage: scores[0] || 0,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
// Calculate linear regression
|
|
178
|
+
const n = scores.length;
|
|
179
|
+
const indices = scores.map((_, i) => i);
|
|
180
|
+
const sumX = indices.reduce((a, b) => a + b, 0);
|
|
181
|
+
const sumY = scores.reduce((a, b) => a + b, 0);
|
|
182
|
+
const sumXY = indices.reduce((sum, x, i) => sum + x * scores[i], 0);
|
|
183
|
+
const sumXX = indices.reduce((sum, x) => sum + x * x, 0);
|
|
184
|
+
const slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX);
|
|
185
|
+
const intercept = (sumY - slope * sumX) / n;
|
|
186
|
+
// Calculate R-squared
|
|
187
|
+
const yMean = sumY / n;
|
|
188
|
+
const ssTotal = scores.reduce((sum, y) => sum + Math.pow(y - yMean, 2), 0);
|
|
189
|
+
const ssResidual = scores.reduce((sum, y, i) => {
|
|
190
|
+
const predicted = slope * i + intercept;
|
|
191
|
+
return sum + Math.pow(y - predicted, 2);
|
|
192
|
+
}, 0);
|
|
193
|
+
const rSquared = ssTotal === 0 ? 0 : 1 - ssResidual / ssTotal;
|
|
194
|
+
// Calculate moving average
|
|
195
|
+
const window = Math.min(windowSize, scores.length);
|
|
196
|
+
const recentScores = scores.slice(-window);
|
|
197
|
+
const movingAverage = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
|
|
198
|
+
// Calculate percent change
|
|
199
|
+
const percentChange = scores[0] !== 0
|
|
200
|
+
? ((scores[scores.length - 1] - scores[0]) / scores[0]) * 100
|
|
201
|
+
: 0;
|
|
202
|
+
// Determine direction
|
|
203
|
+
let direction;
|
|
204
|
+
if (Math.abs(slope) < 0.05) {
|
|
205
|
+
direction = "stable";
|
|
206
|
+
}
|
|
207
|
+
else if (slope > 0) {
|
|
208
|
+
direction = "improving";
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
direction = "declining";
|
|
212
|
+
}
|
|
213
|
+
return {
|
|
214
|
+
direction,
|
|
215
|
+
slope,
|
|
216
|
+
rSquared,
|
|
217
|
+
percentChange,
|
|
218
|
+
movingAverage,
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Analyzes each evaluation dimension separately.
|
|
223
|
+
*
|
|
224
|
+
* @param relevance - Relevance scores
|
|
225
|
+
* @param accuracy - Accuracy scores
|
|
226
|
+
* @param completeness - Completeness scores
|
|
227
|
+
* @param overall - Overall scores
|
|
228
|
+
* @returns Dimension analysis
|
|
229
|
+
*/
|
|
230
|
+
analyzeDimensions(relevance, accuracy, completeness, overall) {
|
|
231
|
+
return {
|
|
232
|
+
relevance: this.calculateStatistics(relevance),
|
|
233
|
+
accuracy: this.calculateStatistics(accuracy),
|
|
234
|
+
completeness: this.calculateStatistics(completeness),
|
|
235
|
+
overall: this.calculateStatistics(overall),
|
|
236
|
+
correlations: {
|
|
237
|
+
relevanceAccuracy: this.correlation(relevance, accuracy),
|
|
238
|
+
relevanceCompleteness: this.correlation(relevance, completeness),
|
|
239
|
+
accuracyCompleteness: this.correlation(accuracy, completeness),
|
|
240
|
+
},
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Summarizes alert information from evaluations.
|
|
245
|
+
*
|
|
246
|
+
* @returns Alert summary
|
|
247
|
+
*/
|
|
248
|
+
summarizeAlerts() {
|
|
249
|
+
const highAlerts = this.evaluations.filter((e) => e.alertSeverity === "high").length;
|
|
250
|
+
const mediumAlerts = this.evaluations.filter((e) => e.alertSeverity === "medium").length;
|
|
251
|
+
const offTopicCount = this.evaluations.filter((e) => e.isOffTopic).length;
|
|
252
|
+
const total = highAlerts + mediumAlerts;
|
|
253
|
+
return {
|
|
254
|
+
total,
|
|
255
|
+
high: highAlerts,
|
|
256
|
+
medium: mediumAlerts,
|
|
257
|
+
offTopic: offTopicCount,
|
|
258
|
+
alertRate: (total / this.evaluations.length) * 100,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Calculates a specific percentile from sorted data.
|
|
263
|
+
*
|
|
264
|
+
* @param sorted - Sorted array of numbers
|
|
265
|
+
* @param p - Percentile (0-100)
|
|
266
|
+
* @returns The value at the percentile
|
|
267
|
+
*/
|
|
268
|
+
percentile(sorted, p) {
|
|
269
|
+
if (sorted.length === 0) {
|
|
270
|
+
return 0;
|
|
271
|
+
}
|
|
272
|
+
if (sorted.length === 1) {
|
|
273
|
+
return sorted[0];
|
|
274
|
+
}
|
|
275
|
+
const index = (p / 100) * (sorted.length - 1);
|
|
276
|
+
const lower = Math.floor(index);
|
|
277
|
+
const upper = Math.ceil(index);
|
|
278
|
+
if (lower === upper) {
|
|
279
|
+
return sorted[lower];
|
|
280
|
+
}
|
|
281
|
+
const fraction = index - lower;
|
|
282
|
+
return sorted[lower] * (1 - fraction) + sorted[upper] * fraction;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Calculates Pearson correlation between two arrays.
|
|
286
|
+
*
|
|
287
|
+
* @param x - First array
|
|
288
|
+
* @param y - Second array
|
|
289
|
+
* @returns Correlation coefficient (-1 to 1)
|
|
290
|
+
*/
|
|
291
|
+
correlation(x, y) {
|
|
292
|
+
if (x.length !== y.length || x.length === 0) {
|
|
293
|
+
return 0;
|
|
294
|
+
}
|
|
295
|
+
const n = x.length;
|
|
296
|
+
const sumX = x.reduce((a, b) => a + b, 0);
|
|
297
|
+
const sumY = y.reduce((a, b) => a + b, 0);
|
|
298
|
+
const sumXY = x.reduce((sum, xi, i) => sum + xi * y[i], 0);
|
|
299
|
+
const sumXX = x.reduce((sum, xi) => sum + xi * xi, 0);
|
|
300
|
+
const sumYY = y.reduce((sum, yi) => sum + yi * yi, 0);
|
|
301
|
+
const numerator = n * sumXY - sumX * sumY;
|
|
302
|
+
const denominator = Math.sqrt((n * sumXX - sumX * sumX) * (n * sumYY - sumY * sumY));
|
|
303
|
+
return denominator === 0 ? 0 : numerator / denominator;
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Gets evaluations that failed to meet the threshold.
|
|
307
|
+
*
|
|
308
|
+
* @param threshold - The passing threshold
|
|
309
|
+
* @returns Array of failing evaluations
|
|
310
|
+
*/
|
|
311
|
+
getFailingEvaluations(threshold = 7) {
|
|
312
|
+
return this.evaluations.filter((e) => e.overall < threshold);
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Gets evaluations with high severity alerts.
|
|
316
|
+
*
|
|
317
|
+
* @returns Array of high-alert evaluations
|
|
318
|
+
*/
|
|
319
|
+
getHighAlertEvaluations() {
|
|
320
|
+
return this.evaluations.filter((e) => e.alertSeverity === "high");
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Gets evaluations marked as off-topic.
|
|
324
|
+
*
|
|
325
|
+
* @returns Array of off-topic evaluations
|
|
326
|
+
*/
|
|
327
|
+
getOffTopicEvaluations() {
|
|
328
|
+
return this.evaluations.filter((e) => e.isOffTopic);
|
|
329
|
+
}
|
|
330
|
+
/**
|
|
331
|
+
* Gets the top N performing evaluations.
|
|
332
|
+
*
|
|
333
|
+
* @param n - Number of evaluations to return
|
|
334
|
+
* @returns Array of top evaluations
|
|
335
|
+
*/
|
|
336
|
+
getTopEvaluations(n = 5) {
|
|
337
|
+
return [...this.evaluations]
|
|
338
|
+
.sort((a, b) => b.overall - a.overall)
|
|
339
|
+
.slice(0, n);
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Gets the bottom N performing evaluations.
|
|
343
|
+
*
|
|
344
|
+
* @param n - Number of evaluations to return
|
|
345
|
+
* @returns Array of bottom evaluations
|
|
346
|
+
*/
|
|
347
|
+
getBottomEvaluations(n = 5) {
|
|
348
|
+
return [...this.evaluations]
|
|
349
|
+
.sort((a, b) => a.overall - b.overall)
|
|
350
|
+
.slice(0, n);
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Generates a text summary of the aggregation.
|
|
354
|
+
*
|
|
355
|
+
* @param threshold - The passing threshold
|
|
356
|
+
* @returns Human-readable summary
|
|
357
|
+
*/
|
|
358
|
+
generateSummary(threshold = 7) {
|
|
359
|
+
if (this.evaluations.length === 0) {
|
|
360
|
+
return "No evaluations to summarize.";
|
|
361
|
+
}
|
|
362
|
+
const result = this.aggregate({ threshold });
|
|
363
|
+
const trend = result.sequenceTrend;
|
|
364
|
+
let summary = `Evaluation Summary (${result.count} evaluations):\n`;
|
|
365
|
+
summary += `- Average Score: ${result.statistics.mean.toFixed(2)}/10\n`;
|
|
366
|
+
summary += `- Passing Rate: ${result.passingRate.toFixed(1)}%\n`;
|
|
367
|
+
summary += `- Score Range: ${result.statistics.min} - ${result.statistics.max}\n`;
|
|
368
|
+
summary += `- Alert Rate: ${result.alerts.alertRate.toFixed(1)}%\n`;
|
|
369
|
+
if (trend) {
|
|
370
|
+
summary += `- Quality Trend: ${trend.direction} (slope: ${trend.slope.toFixed(3)})\n`;
|
|
371
|
+
}
|
|
372
|
+
if (result.alerts.high > 0) {
|
|
373
|
+
summary += `\nWarning: ${result.alerts.high} high-severity alerts detected.\n`;
|
|
374
|
+
}
|
|
375
|
+
return summary;
|
|
376
|
+
}
|
|
377
|
+
}
|