@juspay/neurolink 9.36.1 → 9.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +1105 -556
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/lib/processors/media/VideoProcessor.js +90 -41
- package/dist/lib/telemetry/telemetryService.d.ts +1 -1
- package/dist/lib/telemetry/telemetryService.js +27 -13
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/processors/media/VideoProcessor.js +90 -41
- package/dist/telemetry/telemetryService.d.ts +1 -1
- package/dist/telemetry/telemetryService.js +27 -13
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +7 -7
- package/dist/processors/media/ffprobe-static.d.ts +0 -4
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Metrics Collector
|
|
3
|
+
* Collect and aggregate evaluation metrics
|
|
4
|
+
*/
|
|
5
|
+
import type { ScoreResult } from "../../types/scorerTypes.js";
|
|
6
|
+
import type { PipelineResult } from "../pipeline/evaluationPipeline.js";
|
|
7
|
+
/**
|
|
8
|
+
* Scorer metrics
|
|
9
|
+
*/
|
|
10
|
+
export type ScorerMetrics = {
|
|
11
|
+
scorerId: string;
|
|
12
|
+
scorerName: string;
|
|
13
|
+
totalExecutions: number;
|
|
14
|
+
successfulExecutions: number;
|
|
15
|
+
failedExecutions: number;
|
|
16
|
+
passedCount: number;
|
|
17
|
+
failedCount: number;
|
|
18
|
+
totalScore: number;
|
|
19
|
+
minScore: number;
|
|
20
|
+
maxScore: number;
|
|
21
|
+
totalDuration: number;
|
|
22
|
+
averageDuration: number;
|
|
23
|
+
averageScore: number;
|
|
24
|
+
passRate: number;
|
|
25
|
+
lastExecutionTime: number;
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Pipeline metrics
|
|
29
|
+
*/
|
|
30
|
+
export type PipelineMetrics = {
|
|
31
|
+
pipelineName: string;
|
|
32
|
+
totalExecutions: number;
|
|
33
|
+
passedCount: number;
|
|
34
|
+
failedCount: number;
|
|
35
|
+
totalScore: number;
|
|
36
|
+
minScore: number;
|
|
37
|
+
maxScore: number;
|
|
38
|
+
totalDuration: number;
|
|
39
|
+
averageDuration: number;
|
|
40
|
+
averageScore: number;
|
|
41
|
+
passRate: number;
|
|
42
|
+
lastExecutionTime: number;
|
|
43
|
+
scorerMetrics: Map<string, ScorerMetrics>;
|
|
44
|
+
};
|
|
45
|
+
/**
|
|
46
|
+
* Aggregated metrics
|
|
47
|
+
*/
|
|
48
|
+
export type AggregatedMetrics = {
|
|
49
|
+
totalEvaluations: number;
|
|
50
|
+
overallPassRate: number;
|
|
51
|
+
averageScore: number;
|
|
52
|
+
averageDuration: number;
|
|
53
|
+
scoreDistribution: {
|
|
54
|
+
excellent: number;
|
|
55
|
+
good: number;
|
|
56
|
+
fair: number;
|
|
57
|
+
poor: number;
|
|
58
|
+
failing: number;
|
|
59
|
+
};
|
|
60
|
+
pipelineMetrics: Map<string, PipelineMetrics>;
|
|
61
|
+
scorerMetrics: Map<string, ScorerMetrics>;
|
|
62
|
+
collectionStartTime: number;
|
|
63
|
+
lastUpdateTime: number;
|
|
64
|
+
};
|
|
65
|
+
/**
|
|
66
|
+
* Metrics collector for evaluation data
|
|
67
|
+
*/
|
|
68
|
+
export declare class MetricsCollector {
|
|
69
|
+
private _pipelineMetrics;
|
|
70
|
+
private _scorerMetrics;
|
|
71
|
+
private _collectionStartTime;
|
|
72
|
+
private _lastUpdateTime;
|
|
73
|
+
private _totalEvaluations;
|
|
74
|
+
private _scoreDistribution;
|
|
75
|
+
/**
|
|
76
|
+
* Record a scorer execution
|
|
77
|
+
*/
|
|
78
|
+
recordScorer(scorerId: string, scorerName: string, result: ScoreResult): void;
|
|
79
|
+
/**
|
|
80
|
+
* Record a pipeline execution
|
|
81
|
+
*/
|
|
82
|
+
recordPipeline(result: PipelineResult): void;
|
|
83
|
+
/**
|
|
84
|
+
* Get aggregated metrics
|
|
85
|
+
*/
|
|
86
|
+
getMetrics(): AggregatedMetrics;
|
|
87
|
+
/**
|
|
88
|
+
* Get metrics for a specific scorer
|
|
89
|
+
*/
|
|
90
|
+
getScorerMetrics(scorerId: string): ScorerMetrics | undefined;
|
|
91
|
+
/**
|
|
92
|
+
* Get metrics for a specific pipeline
|
|
93
|
+
*/
|
|
94
|
+
getPipelineMetrics(pipelineName: string): PipelineMetrics | undefined;
|
|
95
|
+
/**
|
|
96
|
+
* Get summary statistics
|
|
97
|
+
*/
|
|
98
|
+
getSummary(): {
|
|
99
|
+
totalEvaluations: number;
|
|
100
|
+
passRate: number;
|
|
101
|
+
averageScore: number;
|
|
102
|
+
topScorers: Array<{
|
|
103
|
+
id: string;
|
|
104
|
+
passRate: number;
|
|
105
|
+
}>;
|
|
106
|
+
bottomScorers: Array<{
|
|
107
|
+
id: string;
|
|
108
|
+
passRate: number;
|
|
109
|
+
}>;
|
|
110
|
+
};
|
|
111
|
+
/**
|
|
112
|
+
* Export metrics as JSON
|
|
113
|
+
*/
|
|
114
|
+
exportJson(): string;
|
|
115
|
+
/**
|
|
116
|
+
* Reset all metrics
|
|
117
|
+
*/
|
|
118
|
+
reset(): void;
|
|
119
|
+
/**
|
|
120
|
+
* Create empty scorer metrics
|
|
121
|
+
*/
|
|
122
|
+
private _createEmptyScorerMetrics;
|
|
123
|
+
/**
|
|
124
|
+
* Create empty pipeline metrics
|
|
125
|
+
*/
|
|
126
|
+
private _createEmptyPipelineMetrics;
|
|
127
|
+
/**
|
|
128
|
+
* Update scorer metrics with new result
|
|
129
|
+
*/
|
|
130
|
+
private _updateScorerMetrics;
|
|
131
|
+
/**
|
|
132
|
+
* Update pipeline metrics with new result
|
|
133
|
+
*/
|
|
134
|
+
private _updatePipelineMetrics;
|
|
135
|
+
/**
|
|
136
|
+
* Update score distribution
|
|
137
|
+
*/
|
|
138
|
+
private _updateScoreDistribution;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Create a metrics collector
|
|
142
|
+
*/
|
|
143
|
+
export declare function createMetricsCollector(): MetricsCollector;
|
|
144
|
+
/**
|
|
145
|
+
* Global metrics collector instance
|
|
146
|
+
*/
|
|
147
|
+
export declare const globalMetricsCollector: MetricsCollector;
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Metrics Collector
|
|
3
|
+
* Collect and aggregate evaluation metrics
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Metrics collector for evaluation data
|
|
7
|
+
*/
|
|
8
|
+
export class MetricsCollector {
|
|
9
|
+
_pipelineMetrics = new Map();
|
|
10
|
+
_scorerMetrics = new Map();
|
|
11
|
+
_collectionStartTime = Date.now();
|
|
12
|
+
_lastUpdateTime = Date.now();
|
|
13
|
+
_totalEvaluations = 0;
|
|
14
|
+
_scoreDistribution = {
|
|
15
|
+
excellent: 0,
|
|
16
|
+
good: 0,
|
|
17
|
+
fair: 0,
|
|
18
|
+
poor: 0,
|
|
19
|
+
failing: 0,
|
|
20
|
+
};
|
|
21
|
+
/**
|
|
22
|
+
* Record a scorer execution
|
|
23
|
+
*/
|
|
24
|
+
recordScorer(scorerId, scorerName, result) {
|
|
25
|
+
let metrics = this._scorerMetrics.get(scorerId);
|
|
26
|
+
if (!metrics) {
|
|
27
|
+
metrics = this._createEmptyScorerMetrics(scorerId, scorerName);
|
|
28
|
+
this._scorerMetrics.set(scorerId, metrics);
|
|
29
|
+
}
|
|
30
|
+
this._updateScorerMetrics(metrics, result);
|
|
31
|
+
this._lastUpdateTime = Date.now();
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Record a pipeline execution
|
|
35
|
+
*/
|
|
36
|
+
recordPipeline(result) {
|
|
37
|
+
const pipelineName = result.pipelineConfig.name ?? "unnamed";
|
|
38
|
+
let metrics = this._pipelineMetrics.get(pipelineName);
|
|
39
|
+
if (!metrics) {
|
|
40
|
+
metrics = this._createEmptyPipelineMetrics(pipelineName);
|
|
41
|
+
this._pipelineMetrics.set(pipelineName, metrics);
|
|
42
|
+
}
|
|
43
|
+
this._updatePipelineMetrics(metrics, result);
|
|
44
|
+
this._updateScoreDistribution(result.overallScore);
|
|
45
|
+
this._totalEvaluations++;
|
|
46
|
+
this._lastUpdateTime = Date.now();
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get aggregated metrics
|
|
50
|
+
*/
|
|
51
|
+
getMetrics() {
|
|
52
|
+
let totalScore = 0;
|
|
53
|
+
let totalDuration = 0;
|
|
54
|
+
let totalPassed = 0;
|
|
55
|
+
for (const metrics of this._pipelineMetrics.values()) {
|
|
56
|
+
totalScore += metrics.totalScore;
|
|
57
|
+
totalDuration += metrics.totalDuration;
|
|
58
|
+
totalPassed += metrics.passedCount;
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
totalEvaluations: this._totalEvaluations,
|
|
62
|
+
overallPassRate: this._totalEvaluations > 0 ? totalPassed / this._totalEvaluations : 0,
|
|
63
|
+
averageScore: this._totalEvaluations > 0 ? totalScore / this._totalEvaluations : 0,
|
|
64
|
+
averageDuration: this._totalEvaluations > 0 ? totalDuration / this._totalEvaluations : 0,
|
|
65
|
+
scoreDistribution: { ...this._scoreDistribution },
|
|
66
|
+
pipelineMetrics: new Map(this._pipelineMetrics),
|
|
67
|
+
scorerMetrics: new Map(this._scorerMetrics),
|
|
68
|
+
collectionStartTime: this._collectionStartTime,
|
|
69
|
+
lastUpdateTime: this._lastUpdateTime,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Get metrics for a specific scorer
|
|
74
|
+
*/
|
|
75
|
+
getScorerMetrics(scorerId) {
|
|
76
|
+
return this._scorerMetrics.get(scorerId);
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Get metrics for a specific pipeline
|
|
80
|
+
*/
|
|
81
|
+
getPipelineMetrics(pipelineName) {
|
|
82
|
+
return this._pipelineMetrics.get(pipelineName);
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Get summary statistics
|
|
86
|
+
*/
|
|
87
|
+
getSummary() {
|
|
88
|
+
const scorersList = Array.from(this._scorerMetrics.values())
|
|
89
|
+
.filter((m) => m.totalExecutions > 0)
|
|
90
|
+
.sort((a, b) => b.passRate - a.passRate);
|
|
91
|
+
const metrics = this.getMetrics();
|
|
92
|
+
return {
|
|
93
|
+
totalEvaluations: metrics.totalEvaluations,
|
|
94
|
+
passRate: metrics.overallPassRate,
|
|
95
|
+
averageScore: metrics.averageScore,
|
|
96
|
+
topScorers: scorersList.slice(0, 5).map((m) => ({
|
|
97
|
+
id: m.scorerId,
|
|
98
|
+
passRate: m.passRate,
|
|
99
|
+
})),
|
|
100
|
+
bottomScorers: scorersList
|
|
101
|
+
.slice(-5)
|
|
102
|
+
.reverse()
|
|
103
|
+
.map((m) => ({
|
|
104
|
+
id: m.scorerId,
|
|
105
|
+
passRate: m.passRate,
|
|
106
|
+
})),
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Export metrics as JSON
|
|
111
|
+
*/
|
|
112
|
+
exportJson() {
|
|
113
|
+
const metrics = this.getMetrics();
|
|
114
|
+
return JSON.stringify({
|
|
115
|
+
totalEvaluations: metrics.totalEvaluations,
|
|
116
|
+
overallPassRate: metrics.overallPassRate,
|
|
117
|
+
averageScore: metrics.averageScore,
|
|
118
|
+
averageDuration: metrics.averageDuration,
|
|
119
|
+
scoreDistribution: metrics.scoreDistribution,
|
|
120
|
+
collectionStartTime: metrics.collectionStartTime,
|
|
121
|
+
lastUpdateTime: metrics.lastUpdateTime,
|
|
122
|
+
pipelines: Array.from(metrics.pipelineMetrics.entries()).map(([name, pm]) => ({
|
|
123
|
+
name,
|
|
124
|
+
totalExecutions: pm.totalExecutions,
|
|
125
|
+
passRate: pm.passRate,
|
|
126
|
+
averageScore: pm.averageScore,
|
|
127
|
+
averageDuration: pm.averageDuration,
|
|
128
|
+
})),
|
|
129
|
+
scorers: Array.from(metrics.scorerMetrics.entries()).map(([id, sm]) => ({
|
|
130
|
+
id,
|
|
131
|
+
name: sm.scorerName,
|
|
132
|
+
totalExecutions: sm.totalExecutions,
|
|
133
|
+
passRate: sm.passRate,
|
|
134
|
+
averageScore: sm.averageScore,
|
|
135
|
+
averageDuration: sm.averageDuration,
|
|
136
|
+
minScore: Number.isFinite(sm.minScore) ? sm.minScore : null,
|
|
137
|
+
maxScore: Number.isFinite(sm.maxScore) ? sm.maxScore : null,
|
|
138
|
+
})),
|
|
139
|
+
}, null, 2);
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Reset all metrics
|
|
143
|
+
*/
|
|
144
|
+
reset() {
|
|
145
|
+
this._pipelineMetrics.clear();
|
|
146
|
+
this._scorerMetrics.clear();
|
|
147
|
+
this._collectionStartTime = Date.now();
|
|
148
|
+
this._lastUpdateTime = Date.now();
|
|
149
|
+
this._totalEvaluations = 0;
|
|
150
|
+
this._scoreDistribution = {
|
|
151
|
+
excellent: 0,
|
|
152
|
+
good: 0,
|
|
153
|
+
fair: 0,
|
|
154
|
+
poor: 0,
|
|
155
|
+
failing: 0,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Create empty scorer metrics
|
|
160
|
+
*/
|
|
161
|
+
_createEmptyScorerMetrics(scorerId, scorerName) {
|
|
162
|
+
return {
|
|
163
|
+
scorerId,
|
|
164
|
+
scorerName,
|
|
165
|
+
totalExecutions: 0,
|
|
166
|
+
successfulExecutions: 0,
|
|
167
|
+
failedExecutions: 0,
|
|
168
|
+
passedCount: 0,
|
|
169
|
+
failedCount: 0,
|
|
170
|
+
totalScore: 0,
|
|
171
|
+
minScore: Infinity,
|
|
172
|
+
maxScore: -Infinity,
|
|
173
|
+
totalDuration: 0,
|
|
174
|
+
averageDuration: 0,
|
|
175
|
+
averageScore: 0,
|
|
176
|
+
passRate: 0,
|
|
177
|
+
lastExecutionTime: 0,
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Create empty pipeline metrics
|
|
182
|
+
*/
|
|
183
|
+
_createEmptyPipelineMetrics(pipelineName) {
|
|
184
|
+
return {
|
|
185
|
+
pipelineName,
|
|
186
|
+
totalExecutions: 0,
|
|
187
|
+
passedCount: 0,
|
|
188
|
+
failedCount: 0,
|
|
189
|
+
totalScore: 0,
|
|
190
|
+
minScore: Infinity,
|
|
191
|
+
maxScore: -Infinity,
|
|
192
|
+
totalDuration: 0,
|
|
193
|
+
averageDuration: 0,
|
|
194
|
+
averageScore: 0,
|
|
195
|
+
passRate: 0,
|
|
196
|
+
lastExecutionTime: 0,
|
|
197
|
+
scorerMetrics: new Map(),
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Update scorer metrics with new result
|
|
202
|
+
*/
|
|
203
|
+
_updateScorerMetrics(metrics, result) {
|
|
204
|
+
metrics.totalExecutions++;
|
|
205
|
+
if (result.error) {
|
|
206
|
+
metrics.failedExecutions++;
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
metrics.successfulExecutions++;
|
|
210
|
+
}
|
|
211
|
+
if (result.passed) {
|
|
212
|
+
metrics.passedCount++;
|
|
213
|
+
}
|
|
214
|
+
else {
|
|
215
|
+
metrics.failedCount++;
|
|
216
|
+
}
|
|
217
|
+
metrics.totalScore += result.score;
|
|
218
|
+
metrics.minScore = Math.min(metrics.minScore, result.score);
|
|
219
|
+
metrics.maxScore = Math.max(metrics.maxScore, result.score);
|
|
220
|
+
metrics.totalDuration += result.computeTime;
|
|
221
|
+
metrics.averageScore = metrics.totalScore / metrics.totalExecutions;
|
|
222
|
+
metrics.averageDuration = metrics.totalDuration / metrics.totalExecutions;
|
|
223
|
+
metrics.passRate = metrics.passedCount / metrics.totalExecutions;
|
|
224
|
+
metrics.lastExecutionTime = Date.now();
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Update pipeline metrics with new result
|
|
228
|
+
*/
|
|
229
|
+
_updatePipelineMetrics(metrics, result) {
|
|
230
|
+
metrics.totalExecutions++;
|
|
231
|
+
if (result.passed) {
|
|
232
|
+
metrics.passedCount++;
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
metrics.failedCount++;
|
|
236
|
+
}
|
|
237
|
+
metrics.totalScore += result.overallScore;
|
|
238
|
+
metrics.minScore = Math.min(metrics.minScore, result.overallScore);
|
|
239
|
+
metrics.maxScore = Math.max(metrics.maxScore, result.overallScore);
|
|
240
|
+
metrics.totalDuration += result.totalComputeTime;
|
|
241
|
+
metrics.averageScore = metrics.totalScore / metrics.totalExecutions;
|
|
242
|
+
metrics.averageDuration = metrics.totalDuration / metrics.totalExecutions;
|
|
243
|
+
metrics.passRate = metrics.passedCount / metrics.totalExecutions;
|
|
244
|
+
metrics.lastExecutionTime = Date.now();
|
|
245
|
+
// Update individual scorer metrics within pipeline
|
|
246
|
+
for (const scoreResult of result.scores) {
|
|
247
|
+
let scorerMetrics = metrics.scorerMetrics.get(scoreResult.scorerId);
|
|
248
|
+
if (!scorerMetrics) {
|
|
249
|
+
scorerMetrics = this._createEmptyScorerMetrics(scoreResult.scorerId, scoreResult.scorerName);
|
|
250
|
+
metrics.scorerMetrics.set(scoreResult.scorerId, scorerMetrics);
|
|
251
|
+
}
|
|
252
|
+
this._updateScorerMetrics(scorerMetrics, scoreResult);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Update score distribution
|
|
257
|
+
*/
|
|
258
|
+
_updateScoreDistribution(score) {
|
|
259
|
+
if (score >= 9) {
|
|
260
|
+
this._scoreDistribution.excellent++;
|
|
261
|
+
}
|
|
262
|
+
else if (score >= 7) {
|
|
263
|
+
this._scoreDistribution.good++;
|
|
264
|
+
}
|
|
265
|
+
else if (score >= 5) {
|
|
266
|
+
this._scoreDistribution.fair++;
|
|
267
|
+
}
|
|
268
|
+
else if (score >= 3) {
|
|
269
|
+
this._scoreDistribution.poor++;
|
|
270
|
+
}
|
|
271
|
+
else {
|
|
272
|
+
this._scoreDistribution.failing++;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Create a metrics collector
|
|
278
|
+
*/
|
|
279
|
+
export function createMetricsCollector() {
|
|
280
|
+
return new MetricsCollector();
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Global metrics collector instance
|
|
284
|
+
*/
|
|
285
|
+
export const globalMetricsCollector = new MetricsCollector();
|
|
286
|
+
//# sourceMappingURL=metricsCollector.js.map
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Report Generator
|
|
3
|
+
* Generate evaluation reports in various formats
|
|
4
|
+
*/
|
|
5
|
+
import type { JsonObject } from "../../types/common.js";
|
|
6
|
+
import type { AggregatedScores, ReportConfig, ReportFormat } from "../../types/scorerTypes.js";
|
|
7
|
+
import type { PipelineResult } from "../pipeline/evaluationPipeline.js";
|
|
8
|
+
/**
|
|
9
|
+
* Report data structure
|
|
10
|
+
*/
|
|
11
|
+
export type ReportData = {
|
|
12
|
+
/** Report title */
|
|
13
|
+
title: string;
|
|
14
|
+
/** Timestamp */
|
|
15
|
+
timestamp: number;
|
|
16
|
+
/** Evaluation result */
|
|
17
|
+
result: PipelineResult | AggregatedScores;
|
|
18
|
+
/** Optional custom sections */
|
|
19
|
+
customSections?: Array<{
|
|
20
|
+
title: string;
|
|
21
|
+
content: string | JsonObject;
|
|
22
|
+
}>;
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Generated report
|
|
26
|
+
*/
|
|
27
|
+
export type GeneratedReport = {
|
|
28
|
+
/** Report format */
|
|
29
|
+
format: ReportFormat;
|
|
30
|
+
/** Report content */
|
|
31
|
+
content: string;
|
|
32
|
+
/** Metadata */
|
|
33
|
+
metadata: {
|
|
34
|
+
generatedAt: number;
|
|
35
|
+
format: ReportFormat;
|
|
36
|
+
config: ReportConfig;
|
|
37
|
+
};
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Report generator class
|
|
41
|
+
*/
|
|
42
|
+
export declare class ReportGenerator {
|
|
43
|
+
private _config;
|
|
44
|
+
constructor(config?: Partial<ReportConfig>);
|
|
45
|
+
/**
|
|
46
|
+
* Generate a report
|
|
47
|
+
*/
|
|
48
|
+
generate(data: ReportData): GeneratedReport;
|
|
49
|
+
/**
|
|
50
|
+
* Generate text report
|
|
51
|
+
*/
|
|
52
|
+
private _generateTextReport;
|
|
53
|
+
/**
|
|
54
|
+
* Generate JSON report
|
|
55
|
+
*/
|
|
56
|
+
private _generateJsonReport;
|
|
57
|
+
/**
|
|
58
|
+
* Generate Markdown report
|
|
59
|
+
*/
|
|
60
|
+
private _generateMarkdownReport;
|
|
61
|
+
/**
|
|
62
|
+
* Generate HTML report
|
|
63
|
+
*/
|
|
64
|
+
private _generateHtmlReport;
|
|
65
|
+
/**
|
|
66
|
+
* Escape HTML special characters
|
|
67
|
+
*/
|
|
68
|
+
private _escapeHtml;
|
|
69
|
+
/**
|
|
70
|
+
* Update configuration
|
|
71
|
+
*/
|
|
72
|
+
configure(config: Partial<ReportConfig>): void;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Create a report generator
|
|
76
|
+
*/
|
|
77
|
+
export declare function createReportGenerator(config?: Partial<ReportConfig>): ReportGenerator;
|
|
78
|
+
/**
|
|
79
|
+
* Quick report generation functions
|
|
80
|
+
*/
|
|
81
|
+
export declare const Reports: {
|
|
82
|
+
/** Generate text report */
|
|
83
|
+
text: (data: ReportData) => GeneratedReport;
|
|
84
|
+
/** Generate JSON report */
|
|
85
|
+
json: (data: ReportData) => GeneratedReport;
|
|
86
|
+
/** Generate Markdown report */
|
|
87
|
+
markdown: (data: ReportData) => GeneratedReport;
|
|
88
|
+
/** Generate HTML report */
|
|
89
|
+
html: (data: ReportData) => GeneratedReport;
|
|
90
|
+
};
|