ppef 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/README.md +76 -125
  2. package/dist/__tests__/cli/evaluate-command.integration.test.d.ts +8 -0
  3. package/dist/__tests__/cli/evaluate-command.integration.test.d.ts.map +1 -0
  4. package/dist/__tests__/cli/evaluate-command.integration.test.js +308 -0
  5. package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -0
  6. package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts +8 -0
  7. package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts.map +1 -0
  8. package/dist/__tests__/evaluators/claims-evaluator.unit.test.js +405 -0
  9. package/dist/__tests__/evaluators/claims-evaluator.unit.test.js.map +1 -0
  10. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts +8 -0
  11. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts.map +1 -0
  12. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js +424 -0
  13. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js.map +1 -0
  14. package/dist/__tests__/evaluators/registry.unit.test.d.ts +7 -0
  15. package/dist/__tests__/evaluators/registry.unit.test.d.ts.map +1 -0
  16. package/dist/__tests__/evaluators/registry.unit.test.js +173 -0
  17. package/dist/__tests__/evaluators/registry.unit.test.js.map +1 -0
  18. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts +8 -0
  19. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts.map +1 -0
  20. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js +260 -0
  21. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js.map +1 -0
  22. package/dist/__tests__/framework-pipeline.integration.test.js +36 -9
  23. package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -1
  24. package/dist/__tests__/index-exports.unit.test.js +9 -12
  25. package/dist/__tests__/index-exports.unit.test.js.map +1 -1
  26. package/dist/aggregation/pipeline.d.ts.map +1 -1
  27. package/dist/aggregation/pipeline.js +40 -3
  28. package/dist/aggregation/pipeline.js.map +1 -1
  29. package/dist/claims/index.d.ts +6 -3
  30. package/dist/claims/index.d.ts.map +1 -1
  31. package/dist/claims/index.js +6 -3
  32. package/dist/claims/index.js.map +1 -1
  33. package/dist/cli/__tests__/aggregate.command.unit.test.js +3 -0
  34. package/dist/cli/__tests__/aggregate.command.unit.test.js.map +1 -1
  35. package/dist/cli/__tests__/binary-sut.integration.test.d.ts +8 -0
  36. package/dist/cli/__tests__/binary-sut.integration.test.d.ts.map +1 -0
  37. package/dist/cli/__tests__/binary-sut.integration.test.js +165 -0
  38. package/dist/cli/__tests__/binary-sut.integration.test.js.map +1 -0
  39. package/dist/cli/__tests__/config-loader.unit.test.d.ts +7 -0
  40. package/dist/cli/__tests__/config-loader.unit.test.d.ts.map +1 -0
  41. package/dist/cli/__tests__/config-loader.unit.test.js +611 -0
  42. package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -0
  43. package/dist/cli/command-deps.d.ts +13 -1
  44. package/dist/cli/command-deps.d.ts.map +1 -1
  45. package/dist/cli/commands/aggregate.d.ts.map +1 -1
  46. package/dist/cli/commands/aggregate.js +3 -0
  47. package/dist/cli/commands/aggregate.js.map +1 -1
  48. package/dist/cli/commands/evaluate.d.ts +41 -0
  49. package/dist/cli/commands/evaluate.d.ts.map +1 -0
  50. package/dist/cli/commands/evaluate.js +287 -0
  51. package/dist/cli/commands/evaluate.js.map +1 -0
  52. package/dist/cli/commands/run.d.ts.map +1 -1
  53. package/dist/cli/commands/run.js +93 -1
  54. package/dist/cli/commands/run.js.map +1 -1
  55. package/dist/cli/index.d.ts +2 -1
  56. package/dist/cli/index.d.ts.map +1 -1
  57. package/dist/cli/index.js +3 -1
  58. package/dist/cli/index.js.map +1 -1
  59. package/dist/cli/module-loader.d.ts +23 -1
  60. package/dist/cli/module-loader.d.ts.map +1 -1
  61. package/dist/cli/module-loader.js +19 -1
  62. package/dist/cli/module-loader.js.map +1 -1
  63. package/dist/cli/types.d.ts +19 -0
  64. package/dist/cli/types.d.ts.map +1 -1
  65. package/dist/evaluators/claims-evaluator.d.ts +87 -0
  66. package/dist/evaluators/claims-evaluator.d.ts.map +1 -0
  67. package/dist/evaluators/claims-evaluator.js +289 -0
  68. package/dist/evaluators/claims-evaluator.js.map +1 -0
  69. package/dist/evaluators/exploratory-evaluator.d.ts +136 -0
  70. package/dist/evaluators/exploratory-evaluator.d.ts.map +1 -0
  71. package/dist/evaluators/exploratory-evaluator.js +545 -0
  72. package/dist/evaluators/exploratory-evaluator.js.map +1 -0
  73. package/dist/evaluators/index.d.ts +13 -0
  74. package/dist/evaluators/index.d.ts.map +1 -0
  75. package/dist/evaluators/index.js +14 -0
  76. package/dist/evaluators/index.js.map +1 -0
  77. package/dist/evaluators/metrics-evaluator.d.ts +114 -0
  78. package/dist/evaluators/metrics-evaluator.d.ts.map +1 -0
  79. package/dist/evaluators/metrics-evaluator.js +433 -0
  80. package/dist/evaluators/metrics-evaluator.js.map +1 -0
  81. package/dist/evaluators/registry.d.ts +106 -0
  82. package/dist/evaluators/registry.d.ts.map +1 -0
  83. package/dist/evaluators/registry.js +148 -0
  84. package/dist/evaluators/registry.js.map +1 -0
  85. package/dist/evaluators/robustness-evaluator.d.ts +57 -0
  86. package/dist/evaluators/robustness-evaluator.d.ts.map +1 -0
  87. package/dist/evaluators/robustness-evaluator.js +186 -0
  88. package/dist/evaluators/robustness-evaluator.js.map +1 -0
  89. package/dist/executor/__tests__/binary-sut.unit.test.d.ts +8 -0
  90. package/dist/executor/__tests__/binary-sut.unit.test.d.ts.map +1 -0
  91. package/dist/executor/__tests__/binary-sut.unit.test.js +313 -0
  92. package/dist/executor/__tests__/binary-sut.unit.test.js.map +1 -0
  93. package/dist/executor/__tests__/checkpoint-storage.unit.test.js +43 -0
  94. package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -1
  95. package/dist/executor/__tests__/executor.unit.test.js +56 -9
  96. package/dist/executor/__tests__/executor.unit.test.js.map +1 -1
  97. package/dist/executor/__tests__/resource-calculator.unit.test.d.ts +10 -0
  98. package/dist/executor/__tests__/resource-calculator.unit.test.d.ts.map +1 -0
  99. package/dist/executor/__tests__/resource-calculator.unit.test.js +104 -0
  100. package/dist/executor/__tests__/resource-calculator.unit.test.js.map +1 -0
  101. package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts +8 -0
  102. package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts.map +1 -0
  103. package/dist/executor/__tests__/worker-threads-executor.unit.test.js +276 -0
  104. package/dist/executor/__tests__/worker-threads-executor.unit.test.js.map +1 -0
  105. package/dist/executor/binary-sut.d.ts +105 -0
  106. package/dist/executor/binary-sut.d.ts.map +1 -0
  107. package/dist/executor/binary-sut.js +174 -0
  108. package/dist/executor/binary-sut.js.map +1 -0
  109. package/dist/executor/checkpoint-storage.d.ts.map +1 -1
  110. package/dist/executor/checkpoint-storage.js +6 -4
  111. package/dist/executor/checkpoint-storage.js.map +1 -1
  112. package/dist/executor/executor.d.ts +28 -0
  113. package/dist/executor/executor.d.ts.map +1 -1
  114. package/dist/executor/executor.js +85 -24
  115. package/dist/executor/executor.js.map +1 -1
  116. package/dist/executor/index.d.ts +4 -0
  117. package/dist/executor/index.d.ts.map +1 -1
  118. package/dist/executor/index.js +4 -0
  119. package/dist/executor/index.js.map +1 -1
  120. package/dist/executor/resource-calculator.d.ts +49 -0
  121. package/dist/executor/resource-calculator.d.ts.map +1 -0
  122. package/dist/executor/resource-calculator.js +129 -0
  123. package/dist/executor/resource-calculator.js.map +1 -0
  124. package/dist/executor/worker-entry.js +26 -10
  125. package/dist/executor/worker-entry.js.map +1 -1
  126. package/dist/executor/worker-executor.d.ts +104 -3
  127. package/dist/executor/worker-executor.d.ts.map +1 -1
  128. package/dist/executor/worker-executor.js +224 -4
  129. package/dist/executor/worker-executor.js.map +1 -1
  130. package/dist/executor/worker-threads-executor.d.ts +245 -0
  131. package/dist/executor/worker-threads-executor.d.ts.map +1 -0
  132. package/dist/executor/worker-threads-executor.js +332 -0
  133. package/dist/executor/worker-threads-executor.js.map +1 -0
  134. package/dist/index.d.ts +1 -0
  135. package/dist/index.d.ts.map +1 -1
  136. package/dist/index.js +4 -2
  137. package/dist/index.js.map +1 -1
  138. package/dist/renderers/latex-renderer.d.ts +60 -0
  139. package/dist/renderers/latex-renderer.d.ts.map +1 -1
  140. package/dist/renderers/latex-renderer.js +299 -0
  141. package/dist/renderers/latex-renderer.js.map +1 -1
  142. package/dist/renderers/types.d.ts +9 -0
  143. package/dist/renderers/types.d.ts.map +1 -1
  144. package/dist/renderers/types.js.map +1 -1
  145. package/dist/robustness/index.d.ts +5 -2
  146. package/dist/robustness/index.d.ts.map +1 -1
  147. package/dist/robustness/index.js +4 -2
  148. package/dist/robustness/index.js.map +1 -1
  149. package/dist/types/evaluator.d.ts +449 -0
  150. package/dist/types/evaluator.d.ts.map +1 -0
  151. package/dist/types/evaluator.js +9 -0
  152. package/dist/types/evaluator.js.map +1 -0
  153. package/dist/types/result.d.ts +2 -0
  154. package/dist/types/result.d.ts.map +1 -1
  155. package/package.json +1 -1
  156. package/dist/claims/__tests__/evaluator.unit.test.d.ts +0 -12
  157. package/dist/claims/__tests__/evaluator.unit.test.d.ts.map +0 -1
  158. package/dist/claims/__tests__/evaluator.unit.test.js +0 -801
  159. package/dist/claims/__tests__/evaluator.unit.test.js.map +0 -1
  160. package/dist/claims/evaluator.d.ts +0 -33
  161. package/dist/claims/evaluator.d.ts.map +0 -1
  162. package/dist/claims/evaluator.js +0 -174
  163. package/dist/claims/evaluator.js.map +0 -1
  164. package/dist/robustness/__tests__/analyzer.unit.test.d.ts +0 -11
  165. package/dist/robustness/__tests__/analyzer.unit.test.d.ts.map +0 -1
  166. package/dist/robustness/__tests__/analyzer.unit.test.js +0 -455
  167. package/dist/robustness/__tests__/analyzer.unit.test.js.map +0 -1
  168. package/dist/robustness/analyzer.d.ts +0 -61
  169. package/dist/robustness/analyzer.d.ts.map +0 -1
  170. package/dist/robustness/analyzer.js +0 -191
  171. package/dist/robustness/analyzer.js.map +0 -1
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Exploratory Evaluator
3
+ *
4
+ * Hypothesis-free analysis for discovering patterns in evaluation data.
5
+ * Unlike ClaimsEvaluator which tests predefined hypotheses, this evaluator
6
+ * performs exploratory analysis including:
7
+ * - Ranking all SUTs by any metric (not just primary vs baseline)
8
+ * - Finding significant pairwise differences (N-way comparisons)
9
+ * - Discovering case-class effects
10
+ * - Computing metric correlations
11
+ */
12
+ import type { EvaluationContext, EvaluationOutput, EvaluationSummary, Evaluator, ExploratoryEvaluatorConfig, ExploratoryEvaluatorData, IEvaluator, ValidationResult } from "../types/evaluator.js";
13
+ /**
14
+ * Exploratory evaluator - hypothesis-free comparative analysis.
15
+ */
16
+ export declare class ExploratoryEvaluator implements Evaluator<ExploratoryEvaluatorConfig, EvaluationContext, ExploratoryEvaluatorData>, IEvaluator {
17
+ /** Type identifier */
18
+ readonly type: "exploratory";
19
+ /** Schema version */
20
+ private static readonly VERSION;
21
+ /** Default significance level */
22
+ private static readonly DEFAULT_SIGNIFICANCE;
23
+ /**
24
+ * Validate exploratory evaluator configuration.
25
+ *
26
+ * @param config - Configuration to validate
27
+ * @returns Validation result
28
+ */
29
+ validateConfig(config: ExploratoryEvaluatorConfig): ValidationResult;
30
+ /**
31
+ * Perform exploratory evaluation.
32
+ *
33
+ * @param config - Exploratory evaluator configuration
34
+ * @param input - Evaluation context with aggregates
35
+ * @returns Evaluation output
36
+ */
37
+ evaluate(config: ExploratoryEvaluatorConfig, input: EvaluationContext): EvaluationOutput<ExploratoryEvaluatorData>;
38
+ /**
39
+ * Summarize evaluation output.
40
+ *
41
+ * @param output - Evaluation output to summarize
42
+ * @returns Summary statistics
43
+ */
44
+ summarize(output: EvaluationOutput<ExploratoryEvaluatorData>): EvaluationSummary;
45
+ /**
46
+ * Determine which SUTs to analyze.
47
+ * @param aggregates
48
+ * @param configSuts
49
+ */
50
+ private determineSuts;
51
+ /**
52
+ * Determine which metrics to analyze.
53
+ * @param aggregates
54
+ * @param configMetrics
55
+ */
56
+ private determineMetrics;
57
+ /**
58
+ * Compute rankings for a single metric.
59
+ * @param aggregates
60
+ * @param metric
61
+ * @param direction
62
+ */
63
+ private computeRankings;
64
+ /**
65
+ * Compute all pairwise comparisons.
66
+ * @param aggregates
67
+ * @param suts
68
+ * @param metrics
69
+ * @param significanceLevel
70
+ * @param minEffectSize
71
+ */
72
+ private computePairwiseComparisons;
73
+ /**
74
+ * Compare a single pair of SUTs for a metric.
75
+ * @param aggregates
76
+ * @param sutA
77
+ * @param sutB
78
+ * @param metric
79
+ * @param significanceLevel
80
+ * @param minEffectSize
81
+ */
82
+ private compareSutPair;
83
+ /**
84
+ * Analyze case-class effects on SUT performance.
85
+ * @param aggregates
86
+ * @param metrics
87
+ * @param significanceLevel
88
+ */
89
+ private analyzeCaseClassEffects;
90
+ /**
91
+ * Compute correlations between metrics.
92
+ * @param aggregates
93
+ * @param metrics
94
+ */
95
+ private computeMetricCorrelations;
96
+ /**
97
+ * Compute Pearson and Spearman correlation between two metrics.
98
+ * @param aggregates
99
+ * @param metricA
100
+ * @param metricB
101
+ */
102
+ private computeCorrelation;
103
+ /**
104
+ * Compute Pearson correlation coefficient.
105
+ * @param x
106
+ * @param y
107
+ */
108
+ private pearsonCorrelation;
109
+ /**
110
+ * Compute Spearman rank correlation coefficient.
111
+ * @param x
112
+ * @param y
113
+ */
114
+ private spearmanCorrelation;
115
+ /**
116
+ * Compute ranks for an array of values (handling ties).
117
+ * @param values
118
+ */
119
+ private computeRanks;
120
+ /**
121
+ * Interpret correlation coefficient.
122
+ * @param r
123
+ */
124
+ private interpretCorrelation;
125
+ /**
126
+ * Compute variance of an array.
127
+ * @param values
128
+ */
129
+ private variance;
130
+ /**
131
+ * Standard normal CDF approximation.
132
+ * @param z
133
+ */
134
+ private normalCdf;
135
+ }
136
+ //# sourceMappingURL=exploratory-evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"exploratory-evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/exploratory-evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,KAAK,EAEX,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,SAAS,EACT,0BAA0B,EAC1B,wBAAwB,EAExB,UAAU,EAKV,gBAAgB,EAChB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,qBAAa,oBACZ,YACC,SAAS,CAAC,0BAA0B,EAAE,iBAAiB,EAAE,wBAAwB,CAAC,EAClF,UAAU;IAEX,sBAAsB;IACtB,QAAQ,CAAC,IAAI,EAAG,aAAa,CAAU;IAEvC,qBAAqB;IACrB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAW;IAE1C,iCAAiC;IACjC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAQ;IAEpD;;;;;OAKG;IACH,cAAc,CAAC,MAAM,EAAE,0BAA0B,GAAG,gBAAgB;IA2CpE;;;;;;OAMG;IACH,QAAQ,CACP,MAAM,EAAE,0BAA0B,EAClC,KAAK,EAAE,iBAAiB,GACtB,gBAAgB,CAAC,wBAAwB,CAAC;IAsF7C;;;;;OAKG;IACH,SAAS,CAAC,MAAM,EAAE,gBAAgB,CAAC,wBAAwB,CAAC,GAAG,iBAAiB;IAchF;;;;OAIG;IACH,OAAO,CAAC,aAAa;IAQrB;;;;OAIG;IACH,OAAO,CAAC,gBAAgB;IAcxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAuEvB;;;;;;;OAOG;IACH,OAAO,CAAC,0BAA0B;IAmClC;;;;;;;;OAQG;IACH,OAAO,CAAC,cAAc;IAuFtB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA8D/B;;;;OAIG;IACH,OAAO,CAAC,yBAAyB;IAqBjC;;;;;OAKG;IACH,OAAO,CAAC,kBAAkB;IA2C1B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;IAqB1B;;;;OAIG;IACH,OAAO,CAAC,mBAAmB;IAM3B;;;OAGG;IACH,OAAO,CAAC,YAAY;IA0BpB;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAS5B;;;OAGG;IACH,OAAO,CAAC,QAAQ;IAMhB;;;OAGG;IACH,OAAO,CAAC,SAAS;CAiBjB"}
@@ -0,0 +1,545 @@
1
+ /**
2
+ * Exploratory Evaluator
3
+ *
4
+ * Hypothesis-free analysis for discovering patterns in evaluation data.
5
+ * Unlike ClaimsEvaluator which tests predefined hypotheses, this evaluator
6
+ * performs exploratory analysis including:
7
+ * - Ranking all SUTs by any metric (not just primary vs baseline)
8
+ * - Finding significant pairwise differences (N-way comparisons)
9
+ * - Discovering case-class effects
10
+ * - Computing metric correlations
11
+ */
12
+ /**
13
+ * Exploratory evaluator - hypothesis-free comparative analysis.
14
+ */
15
+ export class ExploratoryEvaluator {
16
+ /** Type identifier */
17
+ type = "exploratory";
18
+ /** Schema version */
19
+ static VERSION = "1.0.0";
20
+ /** Default significance level */
21
+ static DEFAULT_SIGNIFICANCE = 0.05;
22
+ /**
23
+ * Validate exploratory evaluator configuration.
24
+ *
25
+ * @param config - Configuration to validate
26
+ * @returns Validation result
27
+ */
28
+ validateConfig(config) {
29
+ const errors = [];
30
+ const warnings = [];
31
+ // Validate significance level if provided
32
+ if (config.significanceLevel !== undefined) {
33
+ if (config.significanceLevel <= 0 || config.significanceLevel >= 1) {
34
+ errors.push("significanceLevel must be between 0 and 1 (exclusive)");
35
+ }
36
+ }
37
+ // Validate min effect size if provided
38
+ if (config.minEffectSize !== undefined && config.minEffectSize < 0) {
39
+ errors.push("minEffectSize must be non-negative");
40
+ }
41
+ // Validate metric directions
42
+ if (config.metricDirections) {
43
+ for (const [metric, direction] of Object.entries(config.metricDirections)) {
44
+ const validDirections = ["higher-better", "lower-better"];
45
+ if (!validDirections.includes(direction)) {
46
+ errors.push(`Invalid direction for metric "${metric}": must be "higher-better" or "lower-better"`);
47
+ }
48
+ }
49
+ }
50
+ // Warning if no metrics or SUTs specified
51
+ if (!config.metrics || config.metrics.length === 0) {
52
+ warnings.push("No metrics specified - will analyze all available metrics");
53
+ }
54
+ if (!config.suts || config.suts.length === 0) {
55
+ warnings.push("No SUTs specified - will analyze all available SUTs");
56
+ }
57
+ return {
58
+ valid: errors.length === 0,
59
+ errors: errors.length > 0 ? errors : undefined,
60
+ warnings: warnings.length > 0 ? warnings : undefined,
61
+ };
62
+ }
63
+ /**
64
+ * Perform exploratory evaluation.
65
+ *
66
+ * @param config - Exploratory evaluator configuration
67
+ * @param input - Evaluation context with aggregates
68
+ * @returns Evaluation output
69
+ */
70
+ evaluate(config, input) {
71
+ const { aggregates } = input;
72
+ const significanceLevel = config.significanceLevel ?? ExploratoryEvaluator.DEFAULT_SIGNIFICANCE;
73
+ // Determine which SUTs and metrics to analyze
74
+ const sutsToAnalyze = this.determineSuts(aggregates, config.suts);
75
+ const metricsToAnalyze = this.determineMetrics(aggregates, config.metrics);
76
+ // Filter aggregates to only include specified SUTs
77
+ const filteredAggregates = aggregates.filter((agg) => sutsToAnalyze.includes(agg.sut));
78
+ // Compute rankings for each metric
79
+ const rankings = {};
80
+ for (const metric of metricsToAnalyze) {
81
+ rankings[metric] = this.computeRankings(filteredAggregates, metric, config.metricDirections?.[metric] ?? "higher-better");
82
+ }
83
+ // Compute all pairwise comparisons
84
+ const pairwiseComparisons = this.computePairwiseComparisons(filteredAggregates, sutsToAnalyze, metricsToAnalyze, significanceLevel, config.minEffectSize);
85
+ // Analyze case-class effects if requested
86
+ let caseClassEffects;
87
+ if (config.analyzeCaseClassEffects !== false) {
88
+ caseClassEffects = this.analyzeCaseClassEffects(filteredAggregates, metricsToAnalyze, significanceLevel);
89
+ }
90
+ // Compute metric correlations if requested
91
+ let metricCorrelations;
92
+ if (config.computeCorrelations !== false && metricsToAnalyze.length >= 2) {
93
+ metricCorrelations = this.computeMetricCorrelations(filteredAggregates, metricsToAnalyze);
94
+ }
95
+ // Determine best SUT per metric
96
+ const bestSutPerMetric = {};
97
+ for (const [metric, ranking] of Object.entries(rankings)) {
98
+ if (ranking.length > 0) {
99
+ bestSutPerMetric[metric] = ranking[0].sut;
100
+ }
101
+ }
102
+ // Count unique case classes
103
+ const caseClasses = new Set(filteredAggregates.map((agg) => agg.caseClass));
104
+ const summary = {
105
+ version: ExploratoryEvaluator.VERSION,
106
+ timestamp: new Date().toISOString(),
107
+ rankings,
108
+ pairwiseComparisons,
109
+ caseClassEffects,
110
+ metricCorrelations,
111
+ summary: {
112
+ sutsAnalyzed: sutsToAnalyze.length,
113
+ metricsAnalyzed: metricsToAnalyze.length,
114
+ pairwiseComparisonsCount: pairwiseComparisons.length,
115
+ significantDifferences: pairwiseComparisons.filter((c) => c.significant).length,
116
+ caseClassesAnalyzed: caseClasses.size,
117
+ bestSutPerMetric,
118
+ },
119
+ };
120
+ return {
121
+ type: "exploratory",
122
+ version: ExploratoryEvaluator.VERSION,
123
+ timestamp: new Date().toISOString(),
124
+ data: summary,
125
+ metadata: {
126
+ inputSource: input.metadata?.source,
127
+ config,
128
+ },
129
+ };
130
+ }
131
+ /**
132
+ * Summarize evaluation output.
133
+ *
134
+ * @param output - Evaluation output to summarize
135
+ * @returns Summary statistics
136
+ */
137
+ summarize(output) {
138
+ const { summary } = output.data;
139
+ return {
140
+ total: summary.pairwiseComparisonsCount,
141
+ passed: summary.significantDifferences,
142
+ additional: {
143
+ sutsAnalyzed: summary.sutsAnalyzed,
144
+ metricsAnalyzed: summary.metricsAnalyzed,
145
+ significantDifferences: summary.significantDifferences,
146
+ },
147
+ };
148
+ }
149
+ /**
150
+ * Determine which SUTs to analyze.
151
+ * @param aggregates
152
+ * @param configSuts
153
+ */
154
+ determineSuts(aggregates, configSuts) {
155
+ if (configSuts && configSuts.length > 0) {
156
+ return configSuts;
157
+ }
158
+ // Extract unique SUTs from aggregates
159
+ return [...new Set(aggregates.map((agg) => agg.sut))];
160
+ }
161
+ /**
162
+ * Determine which metrics to analyze.
163
+ * @param aggregates
164
+ * @param configMetrics
165
+ */
166
+ determineMetrics(aggregates, configMetrics) {
167
+ if (configMetrics && configMetrics.length > 0) {
168
+ return configMetrics;
169
+ }
170
+ // Extract unique metrics from aggregates
171
+ const metrics = new Set();
172
+ for (const agg of aggregates) {
173
+ for (const metric of Object.keys(agg.metrics)) {
174
+ metrics.add(metric);
175
+ }
176
+ }
177
+ return [...metrics];
178
+ }
179
+ /**
180
+ * Compute rankings for a single metric.
181
+ * @param aggregates
182
+ * @param metric
183
+ * @param direction
184
+ */
185
+ computeRankings(aggregates, metric, direction) {
186
+ // Group aggregates by SUT and compute mean/median across case classes
187
+ const sutStats = new Map();
188
+ for (const agg of aggregates) {
189
+ if (!Object.hasOwn(agg.metrics, metric))
190
+ continue;
191
+ const metricStats = agg.metrics[metric];
192
+ let existing = sutStats.get(agg.sut);
193
+ if (!existing) {
194
+ existing = { values: [], sum: 0, count: 0 };
195
+ sutStats.set(agg.sut, existing);
196
+ }
197
+ existing.values.push(metricStats.mean);
198
+ existing.sum += metricStats.mean;
199
+ existing.count++;
200
+ }
201
+ // Compute rankings
202
+ const rankings = [];
203
+ for (const [sut, stats] of sutStats) {
204
+ const mean = stats.sum / stats.count;
205
+ const sortedValues = [...stats.values].sort((a, b) => a - b);
206
+ const median = sortedValues.length % 2 === 0
207
+ ? (sortedValues[sortedValues.length / 2 - 1] + sortedValues[sortedValues.length / 2]) / 2
208
+ : sortedValues[Math.floor(sortedValues.length / 2)];
209
+ // Compute standard deviation
210
+ const squaredDiffs = stats.values.map((v) => (v - mean) ** 2);
211
+ const variance = squaredDiffs.reduce((a, b) => a + b, 0) / stats.count;
212
+ const std = Math.sqrt(variance);
213
+ rankings.push({
214
+ sut,
215
+ mean,
216
+ median,
217
+ std: std > 0 ? std : undefined,
218
+ rank: 0, // Will be set after sorting
219
+ n: stats.count,
220
+ });
221
+ }
222
+ // Sort by mean (direction determines order)
223
+ rankings.sort((a, b) => {
224
+ if (direction === "higher-better") {
225
+ return b.mean - a.mean; // Higher first
226
+ }
227
+ return a.mean - b.mean; // Lower first
228
+ });
229
+ // Assign ranks (1-indexed)
230
+ for (let index = 0; index < rankings.length; index++) {
231
+ rankings[index].rank = index + 1;
232
+ }
233
+ return rankings;
234
+ }
235
+ /**
236
+ * Compute all pairwise comparisons.
237
+ * @param aggregates
238
+ * @param suts
239
+ * @param metrics
240
+ * @param significanceLevel
241
+ * @param minEffectSize
242
+ */
243
+ computePairwiseComparisons(aggregates, suts, metrics, significanceLevel, minEffectSize) {
244
+ const comparisons = [];
245
+ // For each metric, compare all pairs of SUTs
246
+ for (const metric of metrics) {
247
+ for (let index = 0; index < suts.length; index++) {
248
+ for (let index_ = index + 1; index_ < suts.length; index_++) {
249
+ const sutA = suts[index];
250
+ const sutB = suts[index_];
251
+ const comparison = this.compareSutPair(aggregates, sutA, sutB, metric, significanceLevel, minEffectSize);
252
+ if (comparison) {
253
+ comparisons.push(comparison);
254
+ }
255
+ }
256
+ }
257
+ }
258
+ return comparisons;
259
+ }
260
+ /**
261
+ * Compare a single pair of SUTs for a metric.
262
+ * @param aggregates
263
+ * @param sutA
264
+ * @param sutB
265
+ * @param metric
266
+ * @param significanceLevel
267
+ * @param minEffectSize
268
+ */
269
+ compareSutPair(aggregates, sutA, sutB, metric, significanceLevel, minEffectSize) {
270
+ // Get values for each SUT
271
+ const valuesA = [];
272
+ const valuesB = [];
273
+ for (const agg of aggregates) {
274
+ if (!Object.hasOwn(agg.metrics, metric))
275
+ continue;
276
+ const metricStats = agg.metrics[metric];
277
+ if (agg.sut === sutA) {
278
+ valuesA.push(metricStats.mean);
279
+ }
280
+ else if (agg.sut === sutB) {
281
+ valuesB.push(metricStats.mean);
282
+ }
283
+ }
284
+ if (valuesA.length === 0 || valuesB.length === 0) {
285
+ return null;
286
+ }
287
+ // Compute basic statistics
288
+ const meanA = valuesA.reduce((a, b) => a + b, 0) / valuesA.length;
289
+ const meanB = valuesB.reduce((a, b) => a + b, 0) / valuesB.length;
290
+ const delta = meanA - meanB;
291
+ const ratio = meanB !== 0 ? meanA / meanB : Infinity;
292
+ // Use existing comparison data if available
293
+ let pValue;
294
+ let effectSize;
295
+ // Look for pre-computed comparison in aggregates
296
+ const aggA = aggregates.find((agg) => agg.sut === sutA);
297
+ if (aggA?.comparisons?.[sutB]) {
298
+ const comparison = aggA.comparisons[sutB];
299
+ pValue = comparison.pValue;
300
+ effectSize = comparison.effectSize;
301
+ }
302
+ // If no pre-computed values, estimate significance
303
+ if (pValue === undefined && valuesA.length >= 3 && valuesB.length >= 3) {
304
+ // Compute pooled standard deviation for effect size
305
+ const varA = this.variance(valuesA);
306
+ const varB = this.variance(valuesB);
307
+ const pooledStd = Math.sqrt(((valuesA.length - 1) * varA + (valuesB.length - 1) * varB) /
308
+ (valuesA.length + valuesB.length - 2));
309
+ if (pooledStd > 0) {
310
+ effectSize = delta / pooledStd;
311
+ // Simple two-sample t-test approximation
312
+ const se = pooledStd * Math.sqrt(1 / valuesA.length + 1 / valuesB.length);
313
+ const t = delta / se;
314
+ const df = valuesA.length + valuesB.length - 2;
315
+ // Approximate p-value using normal distribution for large df
316
+ pValue = df >= 30 ? 2 * (1 - this.normalCdf(Math.abs(t))) : undefined;
317
+ }
318
+ }
319
+ // Determine significance
320
+ const significant = pValue !== undefined &&
321
+ pValue < significanceLevel &&
322
+ (minEffectSize === undefined ||
323
+ (effectSize !== undefined && Math.abs(effectSize) >= minEffectSize));
324
+ return {
325
+ sutA,
326
+ sutB,
327
+ metric,
328
+ delta,
329
+ ratio,
330
+ pValue,
331
+ effectSize,
332
+ significant,
333
+ };
334
+ }
335
+ /**
336
+ * Analyze case-class effects on SUT performance.
337
+ * @param aggregates
338
+ * @param metrics
339
+ * @param significanceLevel
340
+ */
341
+ analyzeCaseClassEffects(aggregates, metrics, significanceLevel) {
342
+ const effects = [];
343
+ // Get unique SUTs and case classes
344
+ const suts = [...new Set(aggregates.map((agg) => agg.sut))];
345
+ const caseClasses = [...new Set(aggregates.map((agg) => agg.caseClass))];
346
+ // Skip if insufficient data
347
+ if (caseClasses.length < 2) {
348
+ return effects;
349
+ }
350
+ for (const metric of metrics) {
351
+ for (const sut of suts) {
352
+ // Get all values for this SUT across case classes
353
+ const sutAggregates = aggregates.filter((agg) => agg.sut === sut && metric in agg.metrics);
354
+ if (sutAggregates.length === 0)
355
+ continue;
356
+ // Compute overall mean for this SUT
357
+ const allValues = sutAggregates.map((agg) => agg.metrics[metric].mean);
358
+ const overallMean = allValues.reduce((a, b) => a + b, 0) / allValues.length;
359
+ const overallStd = Math.sqrt(this.variance(allValues));
360
+ // Compute effect for each case class
361
+ for (const caseClass of caseClasses) {
362
+ const caseAggregates = sutAggregates.filter((agg) => agg.caseClass === caseClass);
363
+ if (caseAggregates.length === 0)
364
+ continue;
365
+ const caseValues = caseAggregates.map((agg) => agg.metrics[metric].mean);
366
+ const caseMean = caseValues.reduce((a, b) => a + b, 0) / caseValues.length;
367
+ const deviation = caseMean - overallMean;
368
+ const percentageDeviation = overallMean !== 0 ? (deviation / overallMean) * 100 : 0;
369
+ // Determine significance using z-score if we have enough data
370
+ let significant = false;
371
+ if (overallStd > 0 && caseValues.length >= 2) {
372
+ const zScore = Math.abs(deviation) / (overallStd / Math.sqrt(caseValues.length));
373
+ const pValue = 2 * (1 - this.normalCdf(zScore));
374
+ significant = pValue < significanceLevel;
375
+ }
376
+ effects.push({
377
+ caseClass: String(caseClass),
378
+ sut,
379
+ metric,
380
+ deviationFromMean: deviation,
381
+ percentageDeviation,
382
+ significant,
383
+ });
384
+ }
385
+ }
386
+ }
387
+ return effects;
388
+ }
389
+ /**
390
+ * Compute correlations between metrics.
391
+ * @param aggregates
392
+ * @param metrics
393
+ */
394
+ computeMetricCorrelations(aggregates, metrics) {
395
+ const correlations = [];
396
+ for (let index = 0; index < metrics.length; index++) {
397
+ for (let index_ = index + 1; index_ < metrics.length; index_++) {
398
+ const metricA = metrics[index];
399
+ const metricB = metrics[index_];
400
+ const correlation = this.computeCorrelation(aggregates, metricA, metricB);
401
+ if (correlation) {
402
+ correlations.push(correlation);
403
+ }
404
+ }
405
+ }
406
+ return correlations;
407
+ }
408
+ /**
409
+ * Compute Pearson and Spearman correlation between two metrics.
410
+ * @param aggregates
411
+ * @param metricA
412
+ * @param metricB
413
+ */
414
+ computeCorrelation(aggregates, metricA, metricB) {
415
+ // Extract paired values
416
+ const pairs = [];
417
+ for (const agg of aggregates) {
418
+ if (!Object.hasOwn(agg.metrics, metricA) || !Object.hasOwn(agg.metrics, metricB)) {
419
+ continue;
420
+ }
421
+ const statsA = agg.metrics[metricA];
422
+ const statsB = agg.metrics[metricB];
423
+ pairs.push([statsA.mean, statsB.mean]);
424
+ }
425
+ if (pairs.length < 3) {
426
+ return null;
427
+ }
428
+ const xValues = pairs.map(([x]) => x);
429
+ const yValues = pairs.map(([, y]) => y);
430
+ // Pearson correlation
431
+ const pearsonR = this.pearsonCorrelation(xValues, yValues);
432
+ // Spearman rank correlation
433
+ const spearmanRho = this.spearmanCorrelation(xValues, yValues);
434
+ // Interpret correlation strength
435
+ const interpretation = this.interpretCorrelation(pearsonR);
436
+ return {
437
+ metricA,
438
+ metricB,
439
+ pearsonR,
440
+ spearmanRho,
441
+ interpretation,
442
+ };
443
+ }
444
+ /**
445
+ * Compute Pearson correlation coefficient.
446
+ * @param x
447
+ * @param y
448
+ */
449
+ pearsonCorrelation(x, y) {
450
+ const n = x.length;
451
+ const meanX = x.reduce((a, b) => a + b, 0) / n;
452
+ const meanY = y.reduce((a, b) => a + b, 0) / n;
453
+ let numerator = 0;
454
+ let sumSqX = 0;
455
+ let sumSqY = 0;
456
+ for (let index = 0; index < n; index++) {
457
+ const dx = x[index] - meanX;
458
+ const dy = y[index] - meanY;
459
+ numerator += dx * dy;
460
+ sumSqX += dx * dx;
461
+ sumSqY += dy * dy;
462
+ }
463
+ const denominator = Math.sqrt(sumSqX * sumSqY);
464
+ return denominator === 0 ? 0 : numerator / denominator;
465
+ }
466
+ /**
467
+ * Compute Spearman rank correlation coefficient.
468
+ * @param x
469
+ * @param y
470
+ */
471
+ spearmanCorrelation(x, y) {
472
+ const rankX = this.computeRanks(x);
473
+ const rankY = this.computeRanks(y);
474
+ return this.pearsonCorrelation(rankX, rankY);
475
+ }
476
+ /**
477
+ * Compute ranks for an array of values (handling ties).
478
+ * @param values
479
+ */
480
+ computeRanks(values) {
481
+ const indexed = values.map((v, index) => ({ value: v, index }));
482
+ indexed.sort((a, b) => a.value - b.value);
483
+ const ranks = new Array(values.length);
484
+ let index = 0;
485
+ while (index < indexed.length) {
486
+ let index_ = index;
487
+ // Find all values equal to this one (for tie handling)
488
+ while (index_ < indexed.length && indexed[index_].value === indexed[index].value) {
489
+ index_++;
490
+ }
491
+ // Average rank for tied values
492
+ const avgRank = (index + index_ + 1) / 2;
493
+ for (let k = index; k < index_; k++) {
494
+ ranks[indexed[k].index] = avgRank;
495
+ }
496
+ index = index_;
497
+ }
498
+ return ranks;
499
+ }
500
+ /**
501
+ * Interpret correlation coefficient.
502
+ * @param r
503
+ */
504
+ interpretCorrelation(r) {
505
+ const absR = Math.abs(r);
506
+ if (absR >= 0.9)
507
+ return "very strong";
508
+ if (absR >= 0.7)
509
+ return "strong";
510
+ if (absR >= 0.5)
511
+ return "moderate";
512
+ if (absR >= 0.3)
513
+ return "weak";
514
+ return "negligible";
515
+ }
516
+ /**
517
+ * Compute variance of an array.
518
+ * @param values
519
+ */
520
+ variance(values) {
521
+ if (values.length === 0)
522
+ return 0;
523
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
524
+ return values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
525
+ }
526
+ /**
527
+ * Standard normal CDF approximation.
528
+ * @param z
529
+ */
530
+ normalCdf(z) {
531
+ // Abramowitz and Stegun approximation
532
+ const a1 = 0.254829592;
533
+ const a2 = -0.284496736;
534
+ const a3 = 1.421413741;
535
+ const a4 = -1.453152027;
536
+ const a5 = 1.061405429;
537
+ const p = 0.3275911;
538
+ const sign = z < 0 ? -1 : 1;
539
+ z = Math.abs(z) / Math.SQRT2;
540
+ const t = 1.0 / (1.0 + p * z);
541
+ const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-z * z);
542
+ return 0.5 * (1.0 + sign * y);
543
+ }
544
+ }
545
+ //# sourceMappingURL=exploratory-evaluator.js.map