@juspay/neurolink 9.36.1 → 9.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +1105 -556
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
  189. package/dist/lib/processors/media/VideoProcessor.js +90 -41
  190. package/dist/lib/telemetry/telemetryService.d.ts +1 -1
  191. package/dist/lib/telemetry/telemetryService.js +27 -13
  192. package/dist/lib/types/index.d.ts +3 -1
  193. package/dist/lib/types/index.js +3 -2
  194. package/dist/lib/types/scorerTypes.d.ts +423 -0
  195. package/dist/lib/types/scorerTypes.js +6 -0
  196. package/dist/lib/utils/errorHandling.d.ts +20 -0
  197. package/dist/lib/utils/errorHandling.js +60 -0
  198. package/dist/neurolink.d.ts +204 -0
  199. package/dist/neurolink.js +296 -0
  200. package/dist/processors/media/VideoProcessor.d.ts +8 -2
  201. package/dist/processors/media/VideoProcessor.js +90 -41
  202. package/dist/telemetry/telemetryService.d.ts +1 -1
  203. package/dist/telemetry/telemetryService.js +27 -13
  204. package/dist/types/index.d.ts +3 -1
  205. package/dist/types/index.js +3 -2
  206. package/dist/types/scorerTypes.d.ts +423 -0
  207. package/dist/types/scorerTypes.js +5 -0
  208. package/dist/utils/errorHandling.d.ts +20 -0
  209. package/dist/utils/errorHandling.js +60 -0
  210. package/package.json +7 -7
  211. package/dist/processors/media/ffprobe-static.d.ts +0 -4
@@ -0,0 +1,272 @@
1
+ /**
2
+ * @file EvaluationAggregator - Aggregates and analyzes evaluation results.
3
+ * Provides statistical analysis, trend detection, and summary generation.
4
+ */
5
+ import type { EvaluationData } from "../types/evaluation.js";
6
+ /**
7
+ * Statistical summary of evaluation scores.
8
+ */
9
+ export interface ScoreStatistics {
10
+ /** Minimum score */
11
+ min: number;
12
+ /** Maximum score */
13
+ max: number;
14
+ /** Mean (average) score */
15
+ mean: number;
16
+ /** Median score */
17
+ median: number;
18
+ /** Standard deviation */
19
+ stdDev: number;
20
+ /** Variance */
21
+ variance: number;
22
+ /** 25th percentile */
23
+ p25: number;
24
+ /** 75th percentile */
25
+ p75: number;
26
+ /** 90th percentile */
27
+ p90: number;
28
+ /** 95th percentile */
29
+ p95: number;
30
+ }
31
+ /**
32
+ * Score distribution across ranges.
33
+ */
34
+ export interface ScoreDistribution {
35
+ /** Items scoring 1-3 (poor) */
36
+ poor: number;
37
+ /** Items scoring 4-5 (below average) */
38
+ belowAverage: number;
39
+ /** Items scoring 6-7 (average) */
40
+ average: number;
41
+ /** Items scoring 8-9 (good) */
42
+ good: number;
43
+ /** Items scoring 10 (excellent) */
44
+ excellent: number;
45
+ }
46
+ /**
47
+ * Trend analysis results.
48
+ */
49
+ export interface TrendAnalysis {
50
+ /** Direction of the trend */
51
+ direction: "improving" | "declining" | "stable";
52
+ /** Slope of the linear regression */
53
+ slope: number;
54
+ /** R-squared value (fit quality) */
55
+ rSquared: number;
56
+ /** Percentage change from first to last */
57
+ percentChange: number;
58
+ /** Moving average of last N evaluations */
59
+ movingAverage: number;
60
+ }
61
+ /**
62
+ * Dimension-specific analysis for RAGAS metrics.
63
+ */
64
+ export interface DimensionAnalysis {
65
+ /** Relevance score statistics */
66
+ relevance: ScoreStatistics;
67
+ /** Accuracy score statistics */
68
+ accuracy: ScoreStatistics;
69
+ /** Completeness score statistics */
70
+ completeness: ScoreStatistics;
71
+ /** Overall score statistics */
72
+ overall: ScoreStatistics;
73
+ /** Correlation matrix between dimensions */
74
+ correlations: {
75
+ relevanceAccuracy: number;
76
+ relevanceCompleteness: number;
77
+ accuracyCompleteness: number;
78
+ };
79
+ }
80
+ /**
81
+ * Quality alerts summary.
82
+ */
83
+ export interface AlertSummary {
84
+ /** Total number of alerts */
85
+ total: number;
86
+ /** Number of high severity alerts */
87
+ high: number;
88
+ /** Number of medium severity alerts */
89
+ medium: number;
90
+ /** Number of items marked as off-topic */
91
+ offTopic: number;
92
+ /** Alert rate as percentage */
93
+ alertRate: number;
94
+ }
95
+ /**
96
+ * Comprehensive aggregation result.
97
+ */
98
+ export interface AggregationResult {
99
+ /** Number of evaluations aggregated */
100
+ count: number;
101
+ /** Statistics for overall scores */
102
+ statistics: ScoreStatistics;
103
+ /** Score distribution */
104
+ distribution: ScoreDistribution;
105
+ /** Dimension-specific analysis */
106
+ dimensions: DimensionAnalysis;
107
+ /** Sequence trend analysis based on insertion order (not time-based) */
108
+ sequenceTrend?: TrendAnalysis;
109
+ /** Alert summary */
110
+ alerts: AlertSummary;
111
+ /** Passing rate based on threshold */
112
+ passingRate: number;
113
+ /** Average evaluation time */
114
+ avgEvaluationTime: number;
115
+ /** Aggregation metadata */
116
+ metadata: {
117
+ aggregatedAt: string;
118
+ threshold: number;
119
+ evaluationModels: string[];
120
+ };
121
+ }
122
+ /**
123
+ * EvaluationAggregator - Aggregates evaluation results and provides analytics.
124
+ * Supports statistical analysis, trend detection, and quality monitoring.
125
+ *
126
+ * @example
127
+ * ```typescript
128
+ * const aggregator = new EvaluationAggregator();
129
+ *
130
+ * // Add evaluations
131
+ * aggregator.addEvaluation(evaluation1);
132
+ * aggregator.addEvaluation(evaluation2);
133
+ *
134
+ * // Get aggregation
135
+ * const result = aggregator.aggregate({ threshold: 7 });
136
+ * console.log(`Average score: ${result.statistics.mean}`);
137
+ * console.log(`Passing rate: ${result.passingRate}%`);
138
+ *
139
+ * // Get trend analysis
140
+ * const trend = aggregator.analyzeSequenceTrend();
141
+ * console.log(`Quality is ${trend.direction}`);
142
+ * ```
143
+ */
144
+ export declare class EvaluationAggregator {
145
+ private evaluations;
146
+ /**
147
+ * Adds an evaluation to the aggregator.
148
+ *
149
+ * @param evaluation - The evaluation data to add
150
+ */
151
+ addEvaluation(evaluation: EvaluationData): void;
152
+ /**
153
+ * Adds multiple evaluations to the aggregator.
154
+ *
155
+ * @param evaluations - Array of evaluation data to add
156
+ */
157
+ addEvaluations(evaluations: EvaluationData[]): void;
158
+ /**
159
+ * Clears all evaluations from the aggregator.
160
+ */
161
+ clear(): void;
162
+ /**
163
+ * Gets the current number of evaluations.
164
+ */
165
+ getCount(): number;
166
+ /**
167
+ * Gets all evaluations.
168
+ */
169
+ getEvaluations(): EvaluationData[];
170
+ /**
171
+ * Aggregates all evaluations and returns comprehensive statistics.
172
+ *
173
+ * @param options - Aggregation options
174
+ * @returns Comprehensive aggregation result
175
+ */
176
+ aggregate(options?: {
177
+ threshold?: number;
178
+ }): AggregationResult;
179
+ /**
180
+ * Calculates statistical summary for a set of scores.
181
+ *
182
+ * @param scores - Array of scores
183
+ * @returns Statistical summary
184
+ */
185
+ calculateStatistics(scores: number[]): ScoreStatistics;
186
+ /**
187
+ * Calculates the distribution of scores across quality ranges.
188
+ *
189
+ * @param scores - Array of scores
190
+ * @returns Score distribution
191
+ */
192
+ calculateDistribution(scores: number[]): ScoreDistribution;
193
+ /**
194
+ * Analyzes sequence-based trends in evaluation scores (based on insertion order, not time).
195
+ *
196
+ * @param windowSize - Moving average window size (default: 5)
197
+ * @returns Trend analysis
198
+ */
199
+ analyzeSequenceTrend(windowSize?: number): TrendAnalysis;
200
+ /**
201
+ * Analyzes each evaluation dimension separately.
202
+ *
203
+ * @param relevance - Relevance scores
204
+ * @param accuracy - Accuracy scores
205
+ * @param completeness - Completeness scores
206
+ * @param overall - Overall scores
207
+ * @returns Dimension analysis
208
+ */
209
+ private analyzeDimensions;
210
+ /**
211
+ * Summarizes alert information from evaluations.
212
+ *
213
+ * @returns Alert summary
214
+ */
215
+ private summarizeAlerts;
216
+ /**
217
+ * Calculates a specific percentile from sorted data.
218
+ *
219
+ * @param sorted - Sorted array of numbers
220
+ * @param p - Percentile (0-100)
221
+ * @returns The value at the percentile
222
+ */
223
+ private percentile;
224
+ /**
225
+ * Calculates Pearson correlation between two arrays.
226
+ *
227
+ * @param x - First array
228
+ * @param y - Second array
229
+ * @returns Correlation coefficient (-1 to 1)
230
+ */
231
+ private correlation;
232
+ /**
233
+ * Gets evaluations that failed to meet the threshold.
234
+ *
235
+ * @param threshold - The passing threshold
236
+ * @returns Array of failing evaluations
237
+ */
238
+ getFailingEvaluations(threshold?: number): EvaluationData[];
239
+ /**
240
+ * Gets evaluations with high severity alerts.
241
+ *
242
+ * @returns Array of high-alert evaluations
243
+ */
244
+ getHighAlertEvaluations(): EvaluationData[];
245
+ /**
246
+ * Gets evaluations marked as off-topic.
247
+ *
248
+ * @returns Array of off-topic evaluations
249
+ */
250
+ getOffTopicEvaluations(): EvaluationData[];
251
+ /**
252
+ * Gets the top N performing evaluations.
253
+ *
254
+ * @param n - Number of evaluations to return
255
+ * @returns Array of top evaluations
256
+ */
257
+ getTopEvaluations(n?: number): EvaluationData[];
258
+ /**
259
+ * Gets the bottom N performing evaluations.
260
+ *
261
+ * @param n - Number of evaluations to return
262
+ * @returns Array of bottom evaluations
263
+ */
264
+ getBottomEvaluations(n?: number): EvaluationData[];
265
+ /**
266
+ * Generates a text summary of the aggregation.
267
+ *
268
+ * @param threshold - The passing threshold
269
+ * @returns Human-readable summary
270
+ */
271
+ generateSummary(threshold?: number): string;
272
+ }
@@ -0,0 +1,377 @@
1
+ /**
2
+ * @file EvaluationAggregator - Aggregates and analyzes evaluation results.
3
+ * Provides statistical analysis, trend detection, and summary generation.
4
+ */
5
+ import { evaluationErrors } from "./errors/EvaluationError.js";
6
+ /**
7
+ * EvaluationAggregator - Aggregates evaluation results and provides analytics.
8
+ * Supports statistical analysis, trend detection, and quality monitoring.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * const aggregator = new EvaluationAggregator();
13
+ *
14
+ * // Add evaluations
15
+ * aggregator.addEvaluation(evaluation1);
16
+ * aggregator.addEvaluation(evaluation2);
17
+ *
18
+ * // Get aggregation
19
+ * const result = aggregator.aggregate({ threshold: 7 });
20
+ * console.log(`Average score: ${result.statistics.mean}`);
21
+ * console.log(`Passing rate: ${result.passingRate}%`);
22
+ *
23
+ * // Get trend analysis
24
+ * const trend = aggregator.analyzeSequenceTrend();
25
+ * console.log(`Quality is ${trend.direction}`);
26
+ * ```
27
+ */
28
+ export class EvaluationAggregator {
29
+ evaluations = [];
30
+ /**
31
+ * Adds an evaluation to the aggregator.
32
+ *
33
+ * @param evaluation - The evaluation data to add
34
+ */
35
+ addEvaluation(evaluation) {
36
+ this.evaluations.push(evaluation);
37
+ }
38
+ /**
39
+ * Adds multiple evaluations to the aggregator.
40
+ *
41
+ * @param evaluations - Array of evaluation data to add
42
+ */
43
+ addEvaluations(evaluations) {
44
+ this.evaluations.push(...evaluations);
45
+ }
46
+ /**
47
+ * Clears all evaluations from the aggregator.
48
+ */
49
+ clear() {
50
+ this.evaluations = [];
51
+ }
52
+ /**
53
+ * Gets the current number of evaluations.
54
+ */
55
+ getCount() {
56
+ return this.evaluations.length;
57
+ }
58
+ /**
59
+ * Gets all evaluations.
60
+ */
61
+ getEvaluations() {
62
+ return [...this.evaluations];
63
+ }
64
+ /**
65
+ * Aggregates all evaluations and returns comprehensive statistics.
66
+ *
67
+ * @param options - Aggregation options
68
+ * @returns Comprehensive aggregation result
69
+ */
70
+ aggregate(options = {}) {
71
+ const threshold = options.threshold || 7;
72
+ if (this.evaluations.length === 0) {
73
+ throw evaluationErrors.create("AGGREGATION_ERROR", "Cannot aggregate: no evaluations available", { retryable: false });
74
+ }
75
+ const overallScores = this.evaluations.map((e) => e.overall);
76
+ const relevanceScores = this.evaluations.map((e) => e.relevance);
77
+ const accuracyScores = this.evaluations.map((e) => e.accuracy);
78
+ const completenessScores = this.evaluations.map((e) => e.completeness);
79
+ const statistics = this.calculateStatistics(overallScores);
80
+ const distribution = this.calculateDistribution(overallScores);
81
+ const dimensions = this.analyzeDimensions(relevanceScores, accuracyScores, completenessScores, overallScores);
82
+ const alerts = this.summarizeAlerts();
83
+ const passingCount = this.evaluations.filter((e) => e.overall >= threshold).length;
84
+ const passingRate = (passingCount / this.evaluations.length) * 100;
85
+ const avgEvaluationTime = this.evaluations.reduce((sum, e) => sum + e.evaluationTime, 0) /
86
+ this.evaluations.length;
87
+ const evaluationModels = Array.from(new Set(this.evaluations.map((e) => e.evaluationModel)));
88
+ return {
89
+ count: this.evaluations.length,
90
+ statistics,
91
+ distribution,
92
+ dimensions,
93
+ sequenceTrend: this.evaluations.length >= 3 ? this.analyzeSequenceTrend() : undefined,
94
+ alerts,
95
+ passingRate,
96
+ avgEvaluationTime,
97
+ metadata: {
98
+ aggregatedAt: new Date().toISOString(),
99
+ threshold,
100
+ evaluationModels,
101
+ },
102
+ };
103
+ }
104
+ /**
105
+ * Calculates statistical summary for a set of scores.
106
+ *
107
+ * @param scores - Array of scores
108
+ * @returns Statistical summary
109
+ */
110
+ calculateStatistics(scores) {
111
+ if (scores.length === 0) {
112
+ return {
113
+ min: 0,
114
+ max: 0,
115
+ mean: 0,
116
+ median: 0,
117
+ stdDev: 0,
118
+ variance: 0,
119
+ p25: 0,
120
+ p75: 0,
121
+ p90: 0,
122
+ p95: 0,
123
+ };
124
+ }
125
+ const sorted = [...scores].sort((a, b) => a - b);
126
+ const n = sorted.length;
127
+ const sum = scores.reduce((a, b) => a + b, 0);
128
+ const mean = sum / n;
129
+ const squaredDiffs = scores.map((s) => Math.pow(s - mean, 2));
130
+ const variance = squaredDiffs.reduce((a, b) => a + b, 0) / n;
131
+ const stdDev = Math.sqrt(variance);
132
+ return {
133
+ min: sorted[0],
134
+ max: sorted[n - 1],
135
+ mean,
136
+ median: this.percentile(sorted, 50),
137
+ stdDev,
138
+ variance,
139
+ p25: this.percentile(sorted, 25),
140
+ p75: this.percentile(sorted, 75),
141
+ p90: this.percentile(sorted, 90),
142
+ p95: this.percentile(sorted, 95),
143
+ };
144
+ }
145
+ /**
146
+ * Calculates the distribution of scores across quality ranges.
147
+ *
148
+ * @param scores - Array of scores
149
+ * @returns Score distribution
150
+ */
151
+ calculateDistribution(scores) {
152
+ return {
153
+ poor: scores.filter((s) => s >= 1 && s <= 3).length,
154
+ belowAverage: scores.filter((s) => s >= 4 && s <= 5).length,
155
+ average: scores.filter((s) => s >= 6 && s <= 7).length,
156
+ good: scores.filter((s) => s >= 8 && s <= 9).length,
157
+ excellent: scores.filter((s) => s >= 10).length,
158
+ };
159
+ }
160
+ /**
161
+ * Analyzes sequence-based trends in evaluation scores (based on insertion order, not time).
162
+ *
163
+ * @param windowSize - Moving average window size (default: 5)
164
+ * @returns Trend analysis
165
+ */
166
+ analyzeSequenceTrend(windowSize = 5) {
167
+ const scores = this.evaluations.map((e) => e.overall);
168
+ if (scores.length < 2) {
169
+ return {
170
+ direction: "stable",
171
+ slope: 0,
172
+ rSquared: 0,
173
+ percentChange: 0,
174
+ movingAverage: scores[0] || 0,
175
+ };
176
+ }
177
+ // Calculate linear regression
178
+ const n = scores.length;
179
+ const indices = scores.map((_, i) => i);
180
+ const sumX = indices.reduce((a, b) => a + b, 0);
181
+ const sumY = scores.reduce((a, b) => a + b, 0);
182
+ const sumXY = indices.reduce((sum, x, i) => sum + x * scores[i], 0);
183
+ const sumXX = indices.reduce((sum, x) => sum + x * x, 0);
184
+ const slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX);
185
+ const intercept = (sumY - slope * sumX) / n;
186
+ // Calculate R-squared
187
+ const yMean = sumY / n;
188
+ const ssTotal = scores.reduce((sum, y) => sum + Math.pow(y - yMean, 2), 0);
189
+ const ssResidual = scores.reduce((sum, y, i) => {
190
+ const predicted = slope * i + intercept;
191
+ return sum + Math.pow(y - predicted, 2);
192
+ }, 0);
193
+ const rSquared = ssTotal === 0 ? 0 : 1 - ssResidual / ssTotal;
194
+ // Calculate moving average
195
+ const window = Math.min(windowSize, scores.length);
196
+ const recentScores = scores.slice(-window);
197
+ const movingAverage = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
198
+ // Calculate percent change
199
+ const percentChange = scores[0] !== 0
200
+ ? ((scores[scores.length - 1] - scores[0]) / scores[0]) * 100
201
+ : 0;
202
+ // Determine direction
203
+ let direction;
204
+ if (Math.abs(slope) < 0.05) {
205
+ direction = "stable";
206
+ }
207
+ else if (slope > 0) {
208
+ direction = "improving";
209
+ }
210
+ else {
211
+ direction = "declining";
212
+ }
213
+ return {
214
+ direction,
215
+ slope,
216
+ rSquared,
217
+ percentChange,
218
+ movingAverage,
219
+ };
220
+ }
221
+ /**
222
+ * Analyzes each evaluation dimension separately.
223
+ *
224
+ * @param relevance - Relevance scores
225
+ * @param accuracy - Accuracy scores
226
+ * @param completeness - Completeness scores
227
+ * @param overall - Overall scores
228
+ * @returns Dimension analysis
229
+ */
230
+ analyzeDimensions(relevance, accuracy, completeness, overall) {
231
+ return {
232
+ relevance: this.calculateStatistics(relevance),
233
+ accuracy: this.calculateStatistics(accuracy),
234
+ completeness: this.calculateStatistics(completeness),
235
+ overall: this.calculateStatistics(overall),
236
+ correlations: {
237
+ relevanceAccuracy: this.correlation(relevance, accuracy),
238
+ relevanceCompleteness: this.correlation(relevance, completeness),
239
+ accuracyCompleteness: this.correlation(accuracy, completeness),
240
+ },
241
+ };
242
+ }
243
+ /**
244
+ * Summarizes alert information from evaluations.
245
+ *
246
+ * @returns Alert summary
247
+ */
248
+ summarizeAlerts() {
249
+ const highAlerts = this.evaluations.filter((e) => e.alertSeverity === "high").length;
250
+ const mediumAlerts = this.evaluations.filter((e) => e.alertSeverity === "medium").length;
251
+ const offTopicCount = this.evaluations.filter((e) => e.isOffTopic).length;
252
+ const total = highAlerts + mediumAlerts;
253
+ return {
254
+ total,
255
+ high: highAlerts,
256
+ medium: mediumAlerts,
257
+ offTopic: offTopicCount,
258
+ alertRate: (total / this.evaluations.length) * 100,
259
+ };
260
+ }
261
+ /**
262
+ * Calculates a specific percentile from sorted data.
263
+ *
264
+ * @param sorted - Sorted array of numbers
265
+ * @param p - Percentile (0-100)
266
+ * @returns The value at the percentile
267
+ */
268
+ percentile(sorted, p) {
269
+ if (sorted.length === 0) {
270
+ return 0;
271
+ }
272
+ if (sorted.length === 1) {
273
+ return sorted[0];
274
+ }
275
+ const index = (p / 100) * (sorted.length - 1);
276
+ const lower = Math.floor(index);
277
+ const upper = Math.ceil(index);
278
+ if (lower === upper) {
279
+ return sorted[lower];
280
+ }
281
+ const fraction = index - lower;
282
+ return sorted[lower] * (1 - fraction) + sorted[upper] * fraction;
283
+ }
284
+ /**
285
+ * Calculates Pearson correlation between two arrays.
286
+ *
287
+ * @param x - First array
288
+ * @param y - Second array
289
+ * @returns Correlation coefficient (-1 to 1)
290
+ */
291
+ correlation(x, y) {
292
+ if (x.length !== y.length || x.length === 0) {
293
+ return 0;
294
+ }
295
+ const n = x.length;
296
+ const sumX = x.reduce((a, b) => a + b, 0);
297
+ const sumY = y.reduce((a, b) => a + b, 0);
298
+ const sumXY = x.reduce((sum, xi, i) => sum + xi * y[i], 0);
299
+ const sumXX = x.reduce((sum, xi) => sum + xi * xi, 0);
300
+ const sumYY = y.reduce((sum, yi) => sum + yi * yi, 0);
301
+ const numerator = n * sumXY - sumX * sumY;
302
+ const denominator = Math.sqrt((n * sumXX - sumX * sumX) * (n * sumYY - sumY * sumY));
303
+ return denominator === 0 ? 0 : numerator / denominator;
304
+ }
305
+ /**
306
+ * Gets evaluations that failed to meet the threshold.
307
+ *
308
+ * @param threshold - The passing threshold
309
+ * @returns Array of failing evaluations
310
+ */
311
+ getFailingEvaluations(threshold = 7) {
312
+ return this.evaluations.filter((e) => e.overall < threshold);
313
+ }
314
+ /**
315
+ * Gets evaluations with high severity alerts.
316
+ *
317
+ * @returns Array of high-alert evaluations
318
+ */
319
+ getHighAlertEvaluations() {
320
+ return this.evaluations.filter((e) => e.alertSeverity === "high");
321
+ }
322
+ /**
323
+ * Gets evaluations marked as off-topic.
324
+ *
325
+ * @returns Array of off-topic evaluations
326
+ */
327
+ getOffTopicEvaluations() {
328
+ return this.evaluations.filter((e) => e.isOffTopic);
329
+ }
330
+ /**
331
+ * Gets the top N performing evaluations.
332
+ *
333
+ * @param n - Number of evaluations to return
334
+ * @returns Array of top evaluations
335
+ */
336
+ getTopEvaluations(n = 5) {
337
+ return [...this.evaluations]
338
+ .sort((a, b) => b.overall - a.overall)
339
+ .slice(0, n);
340
+ }
341
+ /**
342
+ * Gets the bottom N performing evaluations.
343
+ *
344
+ * @param n - Number of evaluations to return
345
+ * @returns Array of bottom evaluations
346
+ */
347
+ getBottomEvaluations(n = 5) {
348
+ return [...this.evaluations]
349
+ .sort((a, b) => a.overall - b.overall)
350
+ .slice(0, n);
351
+ }
352
+ /**
353
+ * Generates a text summary of the aggregation.
354
+ *
355
+ * @param threshold - The passing threshold
356
+ * @returns Human-readable summary
357
+ */
358
+ generateSummary(threshold = 7) {
359
+ if (this.evaluations.length === 0) {
360
+ return "No evaluations to summarize.";
361
+ }
362
+ const result = this.aggregate({ threshold });
363
+ const trend = result.sequenceTrend;
364
+ let summary = `Evaluation Summary (${result.count} evaluations):\n`;
365
+ summary += `- Average Score: ${result.statistics.mean.toFixed(2)}/10\n`;
366
+ summary += `- Passing Rate: ${result.passingRate.toFixed(1)}%\n`;
367
+ summary += `- Score Range: ${result.statistics.min} - ${result.statistics.max}\n`;
368
+ summary += `- Alert Rate: ${result.alerts.alertRate.toFixed(1)}%\n`;
369
+ if (trend) {
370
+ summary += `- Quality Trend: ${trend.direction} (slope: ${trend.slope.toFixed(3)})\n`;
371
+ }
372
+ if (result.alerts.high > 0) {
373
+ summary += `\nWarning: ${result.alerts.high} high-severity alerts detected.\n`;
374
+ }
375
+ return summary;
376
+ }
377
+ }