ppef 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -125
- package/dist/__tests__/cli/evaluate-command.integration.test.d.ts +8 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.d.ts.map +1 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.js +308 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.js +405 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js +424 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/registry.unit.test.d.ts +7 -0
- package/dist/__tests__/evaluators/registry.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/registry.unit.test.js +173 -0
- package/dist/__tests__/evaluators/registry.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js +260 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/framework-pipeline.integration.test.js +36 -9
- package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -1
- package/dist/__tests__/index-exports.unit.test.js +9 -12
- package/dist/__tests__/index-exports.unit.test.js.map +1 -1
- package/dist/aggregation/pipeline.d.ts.map +1 -1
- package/dist/aggregation/pipeline.js +40 -3
- package/dist/aggregation/pipeline.js.map +1 -1
- package/dist/claims/index.d.ts +6 -3
- package/dist/claims/index.d.ts.map +1 -1
- package/dist/claims/index.js +6 -3
- package/dist/claims/index.js.map +1 -1
- package/dist/cli/__tests__/aggregate.command.unit.test.js +3 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.js.map +1 -1
- package/dist/cli/__tests__/binary-sut.integration.test.d.ts +8 -0
- package/dist/cli/__tests__/binary-sut.integration.test.d.ts.map +1 -0
- package/dist/cli/__tests__/binary-sut.integration.test.js +165 -0
- package/dist/cli/__tests__/binary-sut.integration.test.js.map +1 -0
- package/dist/cli/__tests__/config-loader.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/config-loader.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/config-loader.unit.test.js +611 -0
- package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -0
- package/dist/cli/command-deps.d.ts +13 -1
- package/dist/cli/command-deps.d.ts.map +1 -1
- package/dist/cli/commands/aggregate.d.ts.map +1 -1
- package/dist/cli/commands/aggregate.js +3 -0
- package/dist/cli/commands/aggregate.js.map +1 -1
- package/dist/cli/commands/evaluate.d.ts +41 -0
- package/dist/cli/commands/evaluate.d.ts.map +1 -0
- package/dist/cli/commands/evaluate.js +287 -0
- package/dist/cli/commands/evaluate.js.map +1 -0
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +93 -1
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/index.d.ts +2 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +3 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/module-loader.d.ts +23 -1
- package/dist/cli/module-loader.d.ts.map +1 -1
- package/dist/cli/module-loader.js +19 -1
- package/dist/cli/module-loader.js.map +1 -1
- package/dist/cli/types.d.ts +19 -0
- package/dist/cli/types.d.ts.map +1 -1
- package/dist/evaluators/claims-evaluator.d.ts +87 -0
- package/dist/evaluators/claims-evaluator.d.ts.map +1 -0
- package/dist/evaluators/claims-evaluator.js +289 -0
- package/dist/evaluators/claims-evaluator.js.map +1 -0
- package/dist/evaluators/exploratory-evaluator.d.ts +136 -0
- package/dist/evaluators/exploratory-evaluator.d.ts.map +1 -0
- package/dist/evaluators/exploratory-evaluator.js +545 -0
- package/dist/evaluators/exploratory-evaluator.js.map +1 -0
- package/dist/evaluators/index.d.ts +13 -0
- package/dist/evaluators/index.d.ts.map +1 -0
- package/dist/evaluators/index.js +14 -0
- package/dist/evaluators/index.js.map +1 -0
- package/dist/evaluators/metrics-evaluator.d.ts +114 -0
- package/dist/evaluators/metrics-evaluator.d.ts.map +1 -0
- package/dist/evaluators/metrics-evaluator.js +433 -0
- package/dist/evaluators/metrics-evaluator.js.map +1 -0
- package/dist/evaluators/registry.d.ts +106 -0
- package/dist/evaluators/registry.d.ts.map +1 -0
- package/dist/evaluators/registry.js +148 -0
- package/dist/evaluators/registry.js.map +1 -0
- package/dist/evaluators/robustness-evaluator.d.ts +57 -0
- package/dist/evaluators/robustness-evaluator.d.ts.map +1 -0
- package/dist/evaluators/robustness-evaluator.js +186 -0
- package/dist/evaluators/robustness-evaluator.js.map +1 -0
- package/dist/executor/__tests__/binary-sut.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/binary-sut.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/binary-sut.unit.test.js +313 -0
- package/dist/executor/__tests__/binary-sut.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js +43 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -1
- package/dist/executor/__tests__/executor.unit.test.js +56 -9
- package/dist/executor/__tests__/executor.unit.test.js.map +1 -1
- package/dist/executor/__tests__/resource-calculator.unit.test.d.ts +10 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.js +104 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.js.map +1 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.js +276 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.js.map +1 -0
- package/dist/executor/binary-sut.d.ts +105 -0
- package/dist/executor/binary-sut.d.ts.map +1 -0
- package/dist/executor/binary-sut.js +174 -0
- package/dist/executor/binary-sut.js.map +1 -0
- package/dist/executor/checkpoint-storage.d.ts.map +1 -1
- package/dist/executor/checkpoint-storage.js +6 -4
- package/dist/executor/checkpoint-storage.js.map +1 -1
- package/dist/executor/executor.d.ts +28 -0
- package/dist/executor/executor.d.ts.map +1 -1
- package/dist/executor/executor.js +85 -24
- package/dist/executor/executor.js.map +1 -1
- package/dist/executor/index.d.ts +4 -0
- package/dist/executor/index.d.ts.map +1 -1
- package/dist/executor/index.js +4 -0
- package/dist/executor/index.js.map +1 -1
- package/dist/executor/resource-calculator.d.ts +49 -0
- package/dist/executor/resource-calculator.d.ts.map +1 -0
- package/dist/executor/resource-calculator.js +129 -0
- package/dist/executor/resource-calculator.js.map +1 -0
- package/dist/executor/worker-entry.js +26 -10
- package/dist/executor/worker-entry.js.map +1 -1
- package/dist/executor/worker-executor.d.ts +104 -3
- package/dist/executor/worker-executor.d.ts.map +1 -1
- package/dist/executor/worker-executor.js +224 -4
- package/dist/executor/worker-executor.js.map +1 -1
- package/dist/executor/worker-threads-executor.d.ts +245 -0
- package/dist/executor/worker-threads-executor.d.ts.map +1 -0
- package/dist/executor/worker-threads-executor.js +332 -0
- package/dist/executor/worker-threads-executor.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/renderers/latex-renderer.d.ts +60 -0
- package/dist/renderers/latex-renderer.d.ts.map +1 -1
- package/dist/renderers/latex-renderer.js +299 -0
- package/dist/renderers/latex-renderer.js.map +1 -1
- package/dist/renderers/types.d.ts +9 -0
- package/dist/renderers/types.d.ts.map +1 -1
- package/dist/renderers/types.js.map +1 -1
- package/dist/robustness/index.d.ts +5 -2
- package/dist/robustness/index.d.ts.map +1 -1
- package/dist/robustness/index.js +4 -2
- package/dist/robustness/index.js.map +1 -1
- package/dist/types/evaluator.d.ts +449 -0
- package/dist/types/evaluator.d.ts.map +1 -0
- package/dist/types/evaluator.js +9 -0
- package/dist/types/evaluator.js.map +1 -0
- package/dist/types/result.d.ts +2 -0
- package/dist/types/result.d.ts.map +1 -1
- package/package.json +1 -1
- package/dist/claims/__tests__/evaluator.unit.test.d.ts +0 -12
- package/dist/claims/__tests__/evaluator.unit.test.d.ts.map +0 -1
- package/dist/claims/__tests__/evaluator.unit.test.js +0 -801
- package/dist/claims/__tests__/evaluator.unit.test.js.map +0 -1
- package/dist/claims/evaluator.d.ts +0 -33
- package/dist/claims/evaluator.d.ts.map +0 -1
- package/dist/claims/evaluator.js +0 -174
- package/dist/claims/evaluator.js.map +0 -1
- package/dist/robustness/__tests__/analyzer.unit.test.d.ts +0 -11
- package/dist/robustness/__tests__/analyzer.unit.test.d.ts.map +0 -1
- package/dist/robustness/__tests__/analyzer.unit.test.js +0 -455
- package/dist/robustness/__tests__/analyzer.unit.test.js.map +0 -1
- package/dist/robustness/analyzer.d.ts +0 -61
- package/dist/robustness/analyzer.d.ts.map +0 -1
- package/dist/robustness/analyzer.js +0 -191
- package/dist/robustness/analyzer.js.map +0 -1
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exploratory Evaluator
|
|
3
|
+
*
|
|
4
|
+
* Hypothesis-free analysis for discovering patterns in evaluation data.
|
|
5
|
+
* Unlike ClaimsEvaluator which tests predefined hypotheses, this evaluator
|
|
6
|
+
* performs exploratory analysis including:
|
|
7
|
+
* - Ranking all SUTs by any metric (not just primary vs baseline)
|
|
8
|
+
* - Finding significant pairwise differences (N-way comparisons)
|
|
9
|
+
* - Discovering case-class effects
|
|
10
|
+
* - Computing metric correlations
|
|
11
|
+
*/
|
|
12
|
+
import type { EvaluationContext, EvaluationOutput, EvaluationSummary, Evaluator, ExploratoryEvaluatorConfig, ExploratoryEvaluatorData, IEvaluator, ValidationResult } from "../types/evaluator.js";
|
|
13
|
+
/**
|
|
14
|
+
* Exploratory evaluator - hypothesis-free comparative analysis.
|
|
15
|
+
*/
|
|
16
|
+
export declare class ExploratoryEvaluator implements Evaluator<ExploratoryEvaluatorConfig, EvaluationContext, ExploratoryEvaluatorData>, IEvaluator {
|
|
17
|
+
/** Type identifier */
|
|
18
|
+
readonly type: "exploratory";
|
|
19
|
+
/** Schema version */
|
|
20
|
+
private static readonly VERSION;
|
|
21
|
+
/** Default significance level */
|
|
22
|
+
private static readonly DEFAULT_SIGNIFICANCE;
|
|
23
|
+
/**
|
|
24
|
+
* Validate exploratory evaluator configuration.
|
|
25
|
+
*
|
|
26
|
+
* @param config - Configuration to validate
|
|
27
|
+
* @returns Validation result
|
|
28
|
+
*/
|
|
29
|
+
validateConfig(config: ExploratoryEvaluatorConfig): ValidationResult;
|
|
30
|
+
/**
|
|
31
|
+
* Perform exploratory evaluation.
|
|
32
|
+
*
|
|
33
|
+
* @param config - Exploratory evaluator configuration
|
|
34
|
+
* @param input - Evaluation context with aggregates
|
|
35
|
+
* @returns Evaluation output
|
|
36
|
+
*/
|
|
37
|
+
evaluate(config: ExploratoryEvaluatorConfig, input: EvaluationContext): EvaluationOutput<ExploratoryEvaluatorData>;
|
|
38
|
+
/**
|
|
39
|
+
* Summarize evaluation output.
|
|
40
|
+
*
|
|
41
|
+
* @param output - Evaluation output to summarize
|
|
42
|
+
* @returns Summary statistics
|
|
43
|
+
*/
|
|
44
|
+
summarize(output: EvaluationOutput<ExploratoryEvaluatorData>): EvaluationSummary;
|
|
45
|
+
/**
|
|
46
|
+
* Determine which SUTs to analyze.
|
|
47
|
+
* @param aggregates
|
|
48
|
+
* @param configSuts
|
|
49
|
+
*/
|
|
50
|
+
private determineSuts;
|
|
51
|
+
/**
|
|
52
|
+
* Determine which metrics to analyze.
|
|
53
|
+
* @param aggregates
|
|
54
|
+
* @param configMetrics
|
|
55
|
+
*/
|
|
56
|
+
private determineMetrics;
|
|
57
|
+
/**
|
|
58
|
+
* Compute rankings for a single metric.
|
|
59
|
+
* @param aggregates
|
|
60
|
+
* @param metric
|
|
61
|
+
* @param direction
|
|
62
|
+
*/
|
|
63
|
+
private computeRankings;
|
|
64
|
+
/**
|
|
65
|
+
* Compute all pairwise comparisons.
|
|
66
|
+
* @param aggregates
|
|
67
|
+
* @param suts
|
|
68
|
+
* @param metrics
|
|
69
|
+
* @param significanceLevel
|
|
70
|
+
* @param minEffectSize
|
|
71
|
+
*/
|
|
72
|
+
private computePairwiseComparisons;
|
|
73
|
+
/**
|
|
74
|
+
* Compare a single pair of SUTs for a metric.
|
|
75
|
+
* @param aggregates
|
|
76
|
+
* @param sutA
|
|
77
|
+
* @param sutB
|
|
78
|
+
* @param metric
|
|
79
|
+
* @param significanceLevel
|
|
80
|
+
* @param minEffectSize
|
|
81
|
+
*/
|
|
82
|
+
private compareSutPair;
|
|
83
|
+
/**
|
|
84
|
+
* Analyze case-class effects on SUT performance.
|
|
85
|
+
* @param aggregates
|
|
86
|
+
* @param metrics
|
|
87
|
+
* @param significanceLevel
|
|
88
|
+
*/
|
|
89
|
+
private analyzeCaseClassEffects;
|
|
90
|
+
/**
|
|
91
|
+
* Compute correlations between metrics.
|
|
92
|
+
* @param aggregates
|
|
93
|
+
* @param metrics
|
|
94
|
+
*/
|
|
95
|
+
private computeMetricCorrelations;
|
|
96
|
+
/**
|
|
97
|
+
* Compute Pearson and Spearman correlation between two metrics.
|
|
98
|
+
* @param aggregates
|
|
99
|
+
* @param metricA
|
|
100
|
+
* @param metricB
|
|
101
|
+
*/
|
|
102
|
+
private computeCorrelation;
|
|
103
|
+
/**
|
|
104
|
+
* Compute Pearson correlation coefficient.
|
|
105
|
+
* @param x
|
|
106
|
+
* @param y
|
|
107
|
+
*/
|
|
108
|
+
private pearsonCorrelation;
|
|
109
|
+
/**
|
|
110
|
+
* Compute Spearman rank correlation coefficient.
|
|
111
|
+
* @param x
|
|
112
|
+
* @param y
|
|
113
|
+
*/
|
|
114
|
+
private spearmanCorrelation;
|
|
115
|
+
/**
|
|
116
|
+
* Compute ranks for an array of values (handling ties).
|
|
117
|
+
* @param values
|
|
118
|
+
*/
|
|
119
|
+
private computeRanks;
|
|
120
|
+
/**
|
|
121
|
+
* Interpret correlation coefficient.
|
|
122
|
+
* @param r
|
|
123
|
+
*/
|
|
124
|
+
private interpretCorrelation;
|
|
125
|
+
/**
|
|
126
|
+
* Compute variance of an array.
|
|
127
|
+
* @param values
|
|
128
|
+
*/
|
|
129
|
+
private variance;
|
|
130
|
+
/**
|
|
131
|
+
* Standard normal CDF approximation.
|
|
132
|
+
* @param z
|
|
133
|
+
*/
|
|
134
|
+
private normalCdf;
|
|
135
|
+
}
|
|
136
|
+
//# sourceMappingURL=exploratory-evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exploratory-evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/exploratory-evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,KAAK,EAEX,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,SAAS,EACT,0BAA0B,EAC1B,wBAAwB,EAExB,UAAU,EAKV,gBAAgB,EAChB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,qBAAa,oBACZ,YACC,SAAS,CAAC,0BAA0B,EAAE,iBAAiB,EAAE,wBAAwB,CAAC,EAClF,UAAU;IAEX,sBAAsB;IACtB,QAAQ,CAAC,IAAI,EAAG,aAAa,CAAU;IAEvC,qBAAqB;IACrB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAW;IAE1C,iCAAiC;IACjC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAQ;IAEpD;;;;;OAKG;IACH,cAAc,CAAC,MAAM,EAAE,0BAA0B,GAAG,gBAAgB;IA2CpE;;;;;;OAMG;IACH,QAAQ,CACP,MAAM,EAAE,0BAA0B,EAClC,KAAK,EAAE,iBAAiB,GACtB,gBAAgB,CAAC,wBAAwB,CAAC;IAsF7C;;;;;OAKG;IACH,SAAS,CAAC,MAAM,EAAE,gBAAgB,CAAC,wBAAwB,CAAC,GAAG,iBAAiB;IAchF;;;;OAIG;IACH,OAAO,CAAC,aAAa;IAQrB;;;;OAIG;IACH,OAAO,CAAC,gBAAgB;IAcxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAuEvB;;;;;;;OAOG;IACH,OAAO,CAAC,0BAA0B;IAmClC;;;;;;;;OAQG;IACH,OAAO,CAAC,cAAc;IAuFtB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA8D/B;;;;OAIG;IACH,OAAO,CAAC,yBAAyB;IAqBjC;;;;;OAKG;IACH,OAAO,CAAC,kBAAkB;IA2C1B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;IAqB1B;;;;OAIG;IACH,OAAO,CAAC,mBAAmB;IAM3B;;;OAGG;IACH,OAAO,CAAC,YAAY;IA0BpB;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAS5B;;;OAGG;IACH,OAAO,CAAC,QAAQ;IAMhB;;;OAGG;IACH,OAAO,CAAC,SAAS;CAiBjB"}
|
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exploratory Evaluator
|
|
3
|
+
*
|
|
4
|
+
* Hypothesis-free analysis for discovering patterns in evaluation data.
|
|
5
|
+
* Unlike ClaimsEvaluator which tests predefined hypotheses, this evaluator
|
|
6
|
+
* performs exploratory analysis including:
|
|
7
|
+
* - Ranking all SUTs by any metric (not just primary vs baseline)
|
|
8
|
+
* - Finding significant pairwise differences (N-way comparisons)
|
|
9
|
+
* - Discovering case-class effects
|
|
10
|
+
* - Computing metric correlations
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Exploratory evaluator - hypothesis-free comparative analysis.
|
|
14
|
+
*/
|
|
15
|
+
export class ExploratoryEvaluator {
|
|
16
|
+
/** Type identifier */
|
|
17
|
+
type = "exploratory";
|
|
18
|
+
/** Schema version */
|
|
19
|
+
static VERSION = "1.0.0";
|
|
20
|
+
/** Default significance level */
|
|
21
|
+
static DEFAULT_SIGNIFICANCE = 0.05;
|
|
22
|
+
/**
|
|
23
|
+
* Validate exploratory evaluator configuration.
|
|
24
|
+
*
|
|
25
|
+
* @param config - Configuration to validate
|
|
26
|
+
* @returns Validation result
|
|
27
|
+
*/
|
|
28
|
+
validateConfig(config) {
|
|
29
|
+
const errors = [];
|
|
30
|
+
const warnings = [];
|
|
31
|
+
// Validate significance level if provided
|
|
32
|
+
if (config.significanceLevel !== undefined) {
|
|
33
|
+
if (config.significanceLevel <= 0 || config.significanceLevel >= 1) {
|
|
34
|
+
errors.push("significanceLevel must be between 0 and 1 (exclusive)");
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
// Validate min effect size if provided
|
|
38
|
+
if (config.minEffectSize !== undefined && config.minEffectSize < 0) {
|
|
39
|
+
errors.push("minEffectSize must be non-negative");
|
|
40
|
+
}
|
|
41
|
+
// Validate metric directions
|
|
42
|
+
if (config.metricDirections) {
|
|
43
|
+
for (const [metric, direction] of Object.entries(config.metricDirections)) {
|
|
44
|
+
const validDirections = ["higher-better", "lower-better"];
|
|
45
|
+
if (!validDirections.includes(direction)) {
|
|
46
|
+
errors.push(`Invalid direction for metric "${metric}": must be "higher-better" or "lower-better"`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
// Warning if no metrics or SUTs specified
|
|
51
|
+
if (!config.metrics || config.metrics.length === 0) {
|
|
52
|
+
warnings.push("No metrics specified - will analyze all available metrics");
|
|
53
|
+
}
|
|
54
|
+
if (!config.suts || config.suts.length === 0) {
|
|
55
|
+
warnings.push("No SUTs specified - will analyze all available SUTs");
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
valid: errors.length === 0,
|
|
59
|
+
errors: errors.length > 0 ? errors : undefined,
|
|
60
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Perform exploratory evaluation.
|
|
65
|
+
*
|
|
66
|
+
* @param config - Exploratory evaluator configuration
|
|
67
|
+
* @param input - Evaluation context with aggregates
|
|
68
|
+
* @returns Evaluation output
|
|
69
|
+
*/
|
|
70
|
+
evaluate(config, input) {
|
|
71
|
+
const { aggregates } = input;
|
|
72
|
+
const significanceLevel = config.significanceLevel ?? ExploratoryEvaluator.DEFAULT_SIGNIFICANCE;
|
|
73
|
+
// Determine which SUTs and metrics to analyze
|
|
74
|
+
const sutsToAnalyze = this.determineSuts(aggregates, config.suts);
|
|
75
|
+
const metricsToAnalyze = this.determineMetrics(aggregates, config.metrics);
|
|
76
|
+
// Filter aggregates to only include specified SUTs
|
|
77
|
+
const filteredAggregates = aggregates.filter((agg) => sutsToAnalyze.includes(agg.sut));
|
|
78
|
+
// Compute rankings for each metric
|
|
79
|
+
const rankings = {};
|
|
80
|
+
for (const metric of metricsToAnalyze) {
|
|
81
|
+
rankings[metric] = this.computeRankings(filteredAggregates, metric, config.metricDirections?.[metric] ?? "higher-better");
|
|
82
|
+
}
|
|
83
|
+
// Compute all pairwise comparisons
|
|
84
|
+
const pairwiseComparisons = this.computePairwiseComparisons(filteredAggregates, sutsToAnalyze, metricsToAnalyze, significanceLevel, config.minEffectSize);
|
|
85
|
+
// Analyze case-class effects if requested
|
|
86
|
+
let caseClassEffects;
|
|
87
|
+
if (config.analyzeCaseClassEffects !== false) {
|
|
88
|
+
caseClassEffects = this.analyzeCaseClassEffects(filteredAggregates, metricsToAnalyze, significanceLevel);
|
|
89
|
+
}
|
|
90
|
+
// Compute metric correlations if requested
|
|
91
|
+
let metricCorrelations;
|
|
92
|
+
if (config.computeCorrelations !== false && metricsToAnalyze.length >= 2) {
|
|
93
|
+
metricCorrelations = this.computeMetricCorrelations(filteredAggregates, metricsToAnalyze);
|
|
94
|
+
}
|
|
95
|
+
// Determine best SUT per metric
|
|
96
|
+
const bestSutPerMetric = {};
|
|
97
|
+
for (const [metric, ranking] of Object.entries(rankings)) {
|
|
98
|
+
if (ranking.length > 0) {
|
|
99
|
+
bestSutPerMetric[metric] = ranking[0].sut;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// Count unique case classes
|
|
103
|
+
const caseClasses = new Set(filteredAggregates.map((agg) => agg.caseClass));
|
|
104
|
+
const summary = {
|
|
105
|
+
version: ExploratoryEvaluator.VERSION,
|
|
106
|
+
timestamp: new Date().toISOString(),
|
|
107
|
+
rankings,
|
|
108
|
+
pairwiseComparisons,
|
|
109
|
+
caseClassEffects,
|
|
110
|
+
metricCorrelations,
|
|
111
|
+
summary: {
|
|
112
|
+
sutsAnalyzed: sutsToAnalyze.length,
|
|
113
|
+
metricsAnalyzed: metricsToAnalyze.length,
|
|
114
|
+
pairwiseComparisonsCount: pairwiseComparisons.length,
|
|
115
|
+
significantDifferences: pairwiseComparisons.filter((c) => c.significant).length,
|
|
116
|
+
caseClassesAnalyzed: caseClasses.size,
|
|
117
|
+
bestSutPerMetric,
|
|
118
|
+
},
|
|
119
|
+
};
|
|
120
|
+
return {
|
|
121
|
+
type: "exploratory",
|
|
122
|
+
version: ExploratoryEvaluator.VERSION,
|
|
123
|
+
timestamp: new Date().toISOString(),
|
|
124
|
+
data: summary,
|
|
125
|
+
metadata: {
|
|
126
|
+
inputSource: input.metadata?.source,
|
|
127
|
+
config,
|
|
128
|
+
},
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Summarize evaluation output.
|
|
133
|
+
*
|
|
134
|
+
* @param output - Evaluation output to summarize
|
|
135
|
+
* @returns Summary statistics
|
|
136
|
+
*/
|
|
137
|
+
summarize(output) {
|
|
138
|
+
const { summary } = output.data;
|
|
139
|
+
return {
|
|
140
|
+
total: summary.pairwiseComparisonsCount,
|
|
141
|
+
passed: summary.significantDifferences,
|
|
142
|
+
additional: {
|
|
143
|
+
sutsAnalyzed: summary.sutsAnalyzed,
|
|
144
|
+
metricsAnalyzed: summary.metricsAnalyzed,
|
|
145
|
+
significantDifferences: summary.significantDifferences,
|
|
146
|
+
},
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Determine which SUTs to analyze.
|
|
151
|
+
* @param aggregates
|
|
152
|
+
* @param configSuts
|
|
153
|
+
*/
|
|
154
|
+
determineSuts(aggregates, configSuts) {
|
|
155
|
+
if (configSuts && configSuts.length > 0) {
|
|
156
|
+
return configSuts;
|
|
157
|
+
}
|
|
158
|
+
// Extract unique SUTs from aggregates
|
|
159
|
+
return [...new Set(aggregates.map((agg) => agg.sut))];
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Determine which metrics to analyze.
|
|
163
|
+
* @param aggregates
|
|
164
|
+
* @param configMetrics
|
|
165
|
+
*/
|
|
166
|
+
determineMetrics(aggregates, configMetrics) {
|
|
167
|
+
if (configMetrics && configMetrics.length > 0) {
|
|
168
|
+
return configMetrics;
|
|
169
|
+
}
|
|
170
|
+
// Extract unique metrics from aggregates
|
|
171
|
+
const metrics = new Set();
|
|
172
|
+
for (const agg of aggregates) {
|
|
173
|
+
for (const metric of Object.keys(agg.metrics)) {
|
|
174
|
+
metrics.add(metric);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return [...metrics];
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Compute rankings for a single metric.
|
|
181
|
+
* @param aggregates
|
|
182
|
+
* @param metric
|
|
183
|
+
* @param direction
|
|
184
|
+
*/
|
|
185
|
+
computeRankings(aggregates, metric, direction) {
|
|
186
|
+
// Group aggregates by SUT and compute mean/median across case classes
|
|
187
|
+
const sutStats = new Map();
|
|
188
|
+
for (const agg of aggregates) {
|
|
189
|
+
if (!Object.hasOwn(agg.metrics, metric))
|
|
190
|
+
continue;
|
|
191
|
+
const metricStats = agg.metrics[metric];
|
|
192
|
+
let existing = sutStats.get(agg.sut);
|
|
193
|
+
if (!existing) {
|
|
194
|
+
existing = { values: [], sum: 0, count: 0 };
|
|
195
|
+
sutStats.set(agg.sut, existing);
|
|
196
|
+
}
|
|
197
|
+
existing.values.push(metricStats.mean);
|
|
198
|
+
existing.sum += metricStats.mean;
|
|
199
|
+
existing.count++;
|
|
200
|
+
}
|
|
201
|
+
// Compute rankings
|
|
202
|
+
const rankings = [];
|
|
203
|
+
for (const [sut, stats] of sutStats) {
|
|
204
|
+
const mean = stats.sum / stats.count;
|
|
205
|
+
const sortedValues = [...stats.values].sort((a, b) => a - b);
|
|
206
|
+
const median = sortedValues.length % 2 === 0
|
|
207
|
+
? (sortedValues[sortedValues.length / 2 - 1] + sortedValues[sortedValues.length / 2]) / 2
|
|
208
|
+
: sortedValues[Math.floor(sortedValues.length / 2)];
|
|
209
|
+
// Compute standard deviation
|
|
210
|
+
const squaredDiffs = stats.values.map((v) => (v - mean) ** 2);
|
|
211
|
+
const variance = squaredDiffs.reduce((a, b) => a + b, 0) / stats.count;
|
|
212
|
+
const std = Math.sqrt(variance);
|
|
213
|
+
rankings.push({
|
|
214
|
+
sut,
|
|
215
|
+
mean,
|
|
216
|
+
median,
|
|
217
|
+
std: std > 0 ? std : undefined,
|
|
218
|
+
rank: 0, // Will be set after sorting
|
|
219
|
+
n: stats.count,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
// Sort by mean (direction determines order)
|
|
223
|
+
rankings.sort((a, b) => {
|
|
224
|
+
if (direction === "higher-better") {
|
|
225
|
+
return b.mean - a.mean; // Higher first
|
|
226
|
+
}
|
|
227
|
+
return a.mean - b.mean; // Lower first
|
|
228
|
+
});
|
|
229
|
+
// Assign ranks (1-indexed)
|
|
230
|
+
for (let index = 0; index < rankings.length; index++) {
|
|
231
|
+
rankings[index].rank = index + 1;
|
|
232
|
+
}
|
|
233
|
+
return rankings;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Compute all pairwise comparisons.
|
|
237
|
+
* @param aggregates
|
|
238
|
+
* @param suts
|
|
239
|
+
* @param metrics
|
|
240
|
+
* @param significanceLevel
|
|
241
|
+
* @param minEffectSize
|
|
242
|
+
*/
|
|
243
|
+
computePairwiseComparisons(aggregates, suts, metrics, significanceLevel, minEffectSize) {
|
|
244
|
+
const comparisons = [];
|
|
245
|
+
// For each metric, compare all pairs of SUTs
|
|
246
|
+
for (const metric of metrics) {
|
|
247
|
+
for (let index = 0; index < suts.length; index++) {
|
|
248
|
+
for (let index_ = index + 1; index_ < suts.length; index_++) {
|
|
249
|
+
const sutA = suts[index];
|
|
250
|
+
const sutB = suts[index_];
|
|
251
|
+
const comparison = this.compareSutPair(aggregates, sutA, sutB, metric, significanceLevel, minEffectSize);
|
|
252
|
+
if (comparison) {
|
|
253
|
+
comparisons.push(comparison);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return comparisons;
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Compare a single pair of SUTs for a metric.
|
|
262
|
+
* @param aggregates
|
|
263
|
+
* @param sutA
|
|
264
|
+
* @param sutB
|
|
265
|
+
* @param metric
|
|
266
|
+
* @param significanceLevel
|
|
267
|
+
* @param minEffectSize
|
|
268
|
+
*/
|
|
269
|
+
compareSutPair(aggregates, sutA, sutB, metric, significanceLevel, minEffectSize) {
|
|
270
|
+
// Get values for each SUT
|
|
271
|
+
const valuesA = [];
|
|
272
|
+
const valuesB = [];
|
|
273
|
+
for (const agg of aggregates) {
|
|
274
|
+
if (!Object.hasOwn(agg.metrics, metric))
|
|
275
|
+
continue;
|
|
276
|
+
const metricStats = agg.metrics[metric];
|
|
277
|
+
if (agg.sut === sutA) {
|
|
278
|
+
valuesA.push(metricStats.mean);
|
|
279
|
+
}
|
|
280
|
+
else if (agg.sut === sutB) {
|
|
281
|
+
valuesB.push(metricStats.mean);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
if (valuesA.length === 0 || valuesB.length === 0) {
|
|
285
|
+
return null;
|
|
286
|
+
}
|
|
287
|
+
// Compute basic statistics
|
|
288
|
+
const meanA = valuesA.reduce((a, b) => a + b, 0) / valuesA.length;
|
|
289
|
+
const meanB = valuesB.reduce((a, b) => a + b, 0) / valuesB.length;
|
|
290
|
+
const delta = meanA - meanB;
|
|
291
|
+
const ratio = meanB !== 0 ? meanA / meanB : Infinity;
|
|
292
|
+
// Use existing comparison data if available
|
|
293
|
+
let pValue;
|
|
294
|
+
let effectSize;
|
|
295
|
+
// Look for pre-computed comparison in aggregates
|
|
296
|
+
const aggA = aggregates.find((agg) => agg.sut === sutA);
|
|
297
|
+
if (aggA?.comparisons?.[sutB]) {
|
|
298
|
+
const comparison = aggA.comparisons[sutB];
|
|
299
|
+
pValue = comparison.pValue;
|
|
300
|
+
effectSize = comparison.effectSize;
|
|
301
|
+
}
|
|
302
|
+
// If no pre-computed values, estimate significance
|
|
303
|
+
if (pValue === undefined && valuesA.length >= 3 && valuesB.length >= 3) {
|
|
304
|
+
// Compute pooled standard deviation for effect size
|
|
305
|
+
const varA = this.variance(valuesA);
|
|
306
|
+
const varB = this.variance(valuesB);
|
|
307
|
+
const pooledStd = Math.sqrt(((valuesA.length - 1) * varA + (valuesB.length - 1) * varB) /
|
|
308
|
+
(valuesA.length + valuesB.length - 2));
|
|
309
|
+
if (pooledStd > 0) {
|
|
310
|
+
effectSize = delta / pooledStd;
|
|
311
|
+
// Simple two-sample t-test approximation
|
|
312
|
+
const se = pooledStd * Math.sqrt(1 / valuesA.length + 1 / valuesB.length);
|
|
313
|
+
const t = delta / se;
|
|
314
|
+
const df = valuesA.length + valuesB.length - 2;
|
|
315
|
+
// Approximate p-value using normal distribution for large df
|
|
316
|
+
pValue = df >= 30 ? 2 * (1 - this.normalCdf(Math.abs(t))) : undefined;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
// Determine significance
|
|
320
|
+
const significant = pValue !== undefined &&
|
|
321
|
+
pValue < significanceLevel &&
|
|
322
|
+
(minEffectSize === undefined ||
|
|
323
|
+
(effectSize !== undefined && Math.abs(effectSize) >= minEffectSize));
|
|
324
|
+
return {
|
|
325
|
+
sutA,
|
|
326
|
+
sutB,
|
|
327
|
+
metric,
|
|
328
|
+
delta,
|
|
329
|
+
ratio,
|
|
330
|
+
pValue,
|
|
331
|
+
effectSize,
|
|
332
|
+
significant,
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Analyze case-class effects on SUT performance.
|
|
337
|
+
* @param aggregates
|
|
338
|
+
* @param metrics
|
|
339
|
+
* @param significanceLevel
|
|
340
|
+
*/
|
|
341
|
+
analyzeCaseClassEffects(aggregates, metrics, significanceLevel) {
|
|
342
|
+
const effects = [];
|
|
343
|
+
// Get unique SUTs and case classes
|
|
344
|
+
const suts = [...new Set(aggregates.map((agg) => agg.sut))];
|
|
345
|
+
const caseClasses = [...new Set(aggregates.map((agg) => agg.caseClass))];
|
|
346
|
+
// Skip if insufficient data
|
|
347
|
+
if (caseClasses.length < 2) {
|
|
348
|
+
return effects;
|
|
349
|
+
}
|
|
350
|
+
for (const metric of metrics) {
|
|
351
|
+
for (const sut of suts) {
|
|
352
|
+
// Get all values for this SUT across case classes
|
|
353
|
+
const sutAggregates = aggregates.filter((agg) => agg.sut === sut && metric in agg.metrics);
|
|
354
|
+
if (sutAggregates.length === 0)
|
|
355
|
+
continue;
|
|
356
|
+
// Compute overall mean for this SUT
|
|
357
|
+
const allValues = sutAggregates.map((agg) => agg.metrics[metric].mean);
|
|
358
|
+
const overallMean = allValues.reduce((a, b) => a + b, 0) / allValues.length;
|
|
359
|
+
const overallStd = Math.sqrt(this.variance(allValues));
|
|
360
|
+
// Compute effect for each case class
|
|
361
|
+
for (const caseClass of caseClasses) {
|
|
362
|
+
const caseAggregates = sutAggregates.filter((agg) => agg.caseClass === caseClass);
|
|
363
|
+
if (caseAggregates.length === 0)
|
|
364
|
+
continue;
|
|
365
|
+
const caseValues = caseAggregates.map((agg) => agg.metrics[metric].mean);
|
|
366
|
+
const caseMean = caseValues.reduce((a, b) => a + b, 0) / caseValues.length;
|
|
367
|
+
const deviation = caseMean - overallMean;
|
|
368
|
+
const percentageDeviation = overallMean !== 0 ? (deviation / overallMean) * 100 : 0;
|
|
369
|
+
// Determine significance using z-score if we have enough data
|
|
370
|
+
let significant = false;
|
|
371
|
+
if (overallStd > 0 && caseValues.length >= 2) {
|
|
372
|
+
const zScore = Math.abs(deviation) / (overallStd / Math.sqrt(caseValues.length));
|
|
373
|
+
const pValue = 2 * (1 - this.normalCdf(zScore));
|
|
374
|
+
significant = pValue < significanceLevel;
|
|
375
|
+
}
|
|
376
|
+
effects.push({
|
|
377
|
+
caseClass: String(caseClass),
|
|
378
|
+
sut,
|
|
379
|
+
metric,
|
|
380
|
+
deviationFromMean: deviation,
|
|
381
|
+
percentageDeviation,
|
|
382
|
+
significant,
|
|
383
|
+
});
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
return effects;
|
|
388
|
+
}
|
|
389
|
+
/**
|
|
390
|
+
* Compute correlations between metrics.
|
|
391
|
+
* @param aggregates
|
|
392
|
+
* @param metrics
|
|
393
|
+
*/
|
|
394
|
+
computeMetricCorrelations(aggregates, metrics) {
|
|
395
|
+
const correlations = [];
|
|
396
|
+
for (let index = 0; index < metrics.length; index++) {
|
|
397
|
+
for (let index_ = index + 1; index_ < metrics.length; index_++) {
|
|
398
|
+
const metricA = metrics[index];
|
|
399
|
+
const metricB = metrics[index_];
|
|
400
|
+
const correlation = this.computeCorrelation(aggregates, metricA, metricB);
|
|
401
|
+
if (correlation) {
|
|
402
|
+
correlations.push(correlation);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
return correlations;
|
|
407
|
+
}
|
|
408
|
+
/**
|
|
409
|
+
* Compute Pearson and Spearman correlation between two metrics.
|
|
410
|
+
* @param aggregates
|
|
411
|
+
* @param metricA
|
|
412
|
+
* @param metricB
|
|
413
|
+
*/
|
|
414
|
+
computeCorrelation(aggregates, metricA, metricB) {
|
|
415
|
+
// Extract paired values
|
|
416
|
+
const pairs = [];
|
|
417
|
+
for (const agg of aggregates) {
|
|
418
|
+
if (!Object.hasOwn(agg.metrics, metricA) || !Object.hasOwn(agg.metrics, metricB)) {
|
|
419
|
+
continue;
|
|
420
|
+
}
|
|
421
|
+
const statsA = agg.metrics[metricA];
|
|
422
|
+
const statsB = agg.metrics[metricB];
|
|
423
|
+
pairs.push([statsA.mean, statsB.mean]);
|
|
424
|
+
}
|
|
425
|
+
if (pairs.length < 3) {
|
|
426
|
+
return null;
|
|
427
|
+
}
|
|
428
|
+
const xValues = pairs.map(([x]) => x);
|
|
429
|
+
const yValues = pairs.map(([, y]) => y);
|
|
430
|
+
// Pearson correlation
|
|
431
|
+
const pearsonR = this.pearsonCorrelation(xValues, yValues);
|
|
432
|
+
// Spearman rank correlation
|
|
433
|
+
const spearmanRho = this.spearmanCorrelation(xValues, yValues);
|
|
434
|
+
// Interpret correlation strength
|
|
435
|
+
const interpretation = this.interpretCorrelation(pearsonR);
|
|
436
|
+
return {
|
|
437
|
+
metricA,
|
|
438
|
+
metricB,
|
|
439
|
+
pearsonR,
|
|
440
|
+
spearmanRho,
|
|
441
|
+
interpretation,
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
/**
|
|
445
|
+
* Compute Pearson correlation coefficient.
|
|
446
|
+
* @param x
|
|
447
|
+
* @param y
|
|
448
|
+
*/
|
|
449
|
+
pearsonCorrelation(x, y) {
|
|
450
|
+
const n = x.length;
|
|
451
|
+
const meanX = x.reduce((a, b) => a + b, 0) / n;
|
|
452
|
+
const meanY = y.reduce((a, b) => a + b, 0) / n;
|
|
453
|
+
let numerator = 0;
|
|
454
|
+
let sumSqX = 0;
|
|
455
|
+
let sumSqY = 0;
|
|
456
|
+
for (let index = 0; index < n; index++) {
|
|
457
|
+
const dx = x[index] - meanX;
|
|
458
|
+
const dy = y[index] - meanY;
|
|
459
|
+
numerator += dx * dy;
|
|
460
|
+
sumSqX += dx * dx;
|
|
461
|
+
sumSqY += dy * dy;
|
|
462
|
+
}
|
|
463
|
+
const denominator = Math.sqrt(sumSqX * sumSqY);
|
|
464
|
+
return denominator === 0 ? 0 : numerator / denominator;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* Compute Spearman rank correlation coefficient.
|
|
468
|
+
* @param x
|
|
469
|
+
* @param y
|
|
470
|
+
*/
|
|
471
|
+
spearmanCorrelation(x, y) {
|
|
472
|
+
const rankX = this.computeRanks(x);
|
|
473
|
+
const rankY = this.computeRanks(y);
|
|
474
|
+
return this.pearsonCorrelation(rankX, rankY);
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Compute ranks for an array of values (handling ties).
|
|
478
|
+
* @param values
|
|
479
|
+
*/
|
|
480
|
+
computeRanks(values) {
|
|
481
|
+
const indexed = values.map((v, index) => ({ value: v, index }));
|
|
482
|
+
indexed.sort((a, b) => a.value - b.value);
|
|
483
|
+
const ranks = new Array(values.length);
|
|
484
|
+
let index = 0;
|
|
485
|
+
while (index < indexed.length) {
|
|
486
|
+
let index_ = index;
|
|
487
|
+
// Find all values equal to this one (for tie handling)
|
|
488
|
+
while (index_ < indexed.length && indexed[index_].value === indexed[index].value) {
|
|
489
|
+
index_++;
|
|
490
|
+
}
|
|
491
|
+
// Average rank for tied values
|
|
492
|
+
const avgRank = (index + index_ + 1) / 2;
|
|
493
|
+
for (let k = index; k < index_; k++) {
|
|
494
|
+
ranks[indexed[k].index] = avgRank;
|
|
495
|
+
}
|
|
496
|
+
index = index_;
|
|
497
|
+
}
|
|
498
|
+
return ranks;
|
|
499
|
+
}
|
|
500
|
+
/**
|
|
501
|
+
* Interpret correlation coefficient.
|
|
502
|
+
* @param r
|
|
503
|
+
*/
|
|
504
|
+
interpretCorrelation(r) {
|
|
505
|
+
const absR = Math.abs(r);
|
|
506
|
+
if (absR >= 0.9)
|
|
507
|
+
return "very strong";
|
|
508
|
+
if (absR >= 0.7)
|
|
509
|
+
return "strong";
|
|
510
|
+
if (absR >= 0.5)
|
|
511
|
+
return "moderate";
|
|
512
|
+
if (absR >= 0.3)
|
|
513
|
+
return "weak";
|
|
514
|
+
return "negligible";
|
|
515
|
+
}
|
|
516
|
+
/**
|
|
517
|
+
* Compute variance of an array.
|
|
518
|
+
* @param values
|
|
519
|
+
*/
|
|
520
|
+
variance(values) {
|
|
521
|
+
if (values.length === 0)
|
|
522
|
+
return 0;
|
|
523
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
524
|
+
return values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* Standard normal CDF approximation.
|
|
528
|
+
* @param z
|
|
529
|
+
*/
|
|
530
|
+
normalCdf(z) {
|
|
531
|
+
// Abramowitz and Stegun approximation
|
|
532
|
+
const a1 = 0.254829592;
|
|
533
|
+
const a2 = -0.284496736;
|
|
534
|
+
const a3 = 1.421413741;
|
|
535
|
+
const a4 = -1.453152027;
|
|
536
|
+
const a5 = 1.061405429;
|
|
537
|
+
const p = 0.3275911;
|
|
538
|
+
const sign = z < 0 ? -1 : 1;
|
|
539
|
+
z = Math.abs(z) / Math.SQRT2;
|
|
540
|
+
const t = 1.0 / (1.0 + p * z);
|
|
541
|
+
const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-z * z);
|
|
542
|
+
return 0.5 * (1.0 + sign * y);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
//# sourceMappingURL=exploratory-evaluator.js.map
|