ppef 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -0
- package/dist/__tests__/framework-pipeline.integration.test.d.ts +7 -0
- package/dist/__tests__/framework-pipeline.integration.test.d.ts.map +1 -0
- package/dist/__tests__/framework-pipeline.integration.test.js +413 -0
- package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -0
- package/dist/__tests__/registry-executor.integration.test.d.ts +5 -0
- package/dist/__tests__/registry-executor.integration.test.d.ts.map +1 -0
- package/dist/__tests__/registry-executor.integration.test.js +349 -0
- package/dist/__tests__/registry-executor.integration.test.js.map +1 -0
- package/dist/__tests__/test-helpers.d.ts +94 -0
- package/dist/__tests__/test-helpers.d.ts.map +1 -0
- package/dist/__tests__/test-helpers.js +271 -0
- package/dist/__tests__/test-helpers.js.map +1 -0
- package/dist/aggregation/aggregators.d.ts +54 -0
- package/dist/aggregation/aggregators.d.ts.map +1 -0
- package/dist/aggregation/aggregators.js +228 -0
- package/dist/aggregation/aggregators.js.map +1 -0
- package/dist/aggregation/index.d.ts +8 -0
- package/dist/aggregation/index.d.ts.map +1 -0
- package/dist/aggregation/index.js +8 -0
- package/dist/aggregation/index.js.map +1 -0
- package/dist/aggregation/pipeline.d.ts +38 -0
- package/dist/aggregation/pipeline.d.ts.map +1 -0
- package/dist/aggregation/pipeline.js +198 -0
- package/dist/aggregation/pipeline.js.map +1 -0
- package/dist/claims/evaluator.d.ts +33 -0
- package/dist/claims/evaluator.d.ts.map +1 -0
- package/dist/claims/evaluator.js +174 -0
- package/dist/claims/evaluator.js.map +1 -0
- package/dist/claims/index.d.ts +7 -0
- package/dist/claims/index.d.ts.map +1 -0
- package/dist/claims/index.js +7 -0
- package/dist/claims/index.js.map +1 -0
- package/dist/collector/index.d.ts +8 -0
- package/dist/collector/index.d.ts.map +1 -0
- package/dist/collector/index.js +8 -0
- package/dist/collector/index.js.map +1 -0
- package/dist/collector/result-collector.d.ts +159 -0
- package/dist/collector/result-collector.d.ts.map +1 -0
- package/dist/collector/result-collector.js +213 -0
- package/dist/collector/result-collector.js.map +1 -0
- package/dist/collector/schema.d.ts +34 -0
- package/dist/collector/schema.d.ts.map +1 -0
- package/dist/collector/schema.js +145 -0
- package/dist/collector/schema.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts +10 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js +122 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.js +330 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js +449 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts +11 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js +224 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js +164 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js +386 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -0
- package/dist/executor/__tests__/executor.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/executor.unit.test.js +134 -0
- package/dist/executor/__tests__/executor.unit.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts +12 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js +196 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.d.ts +7 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.js +249 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.js +203 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.js.map +1 -0
- package/dist/executor/checkpoint-manager.d.ts +231 -0
- package/dist/executor/checkpoint-manager.d.ts.map +1 -0
- package/dist/executor/checkpoint-manager.js +395 -0
- package/dist/executor/checkpoint-manager.js.map +1 -0
- package/dist/executor/checkpoint-storage.d.ts +230 -0
- package/dist/executor/checkpoint-storage.d.ts.map +1 -0
- package/dist/executor/checkpoint-storage.js +370 -0
- package/dist/executor/checkpoint-storage.js.map +1 -0
- package/dist/executor/checkpoint-types.d.ts +48 -0
- package/dist/executor/checkpoint-types.d.ts.map +1 -0
- package/dist/executor/checkpoint-types.js +8 -0
- package/dist/executor/checkpoint-types.js.map +1 -0
- package/dist/executor/executor.d.ts +164 -0
- package/dist/executor/executor.d.ts.map +1 -0
- package/dist/executor/executor.js +408 -0
- package/dist/executor/executor.js.map +1 -0
- package/dist/executor/index.d.ts +11 -0
- package/dist/executor/index.d.ts.map +1 -0
- package/dist/executor/index.js +11 -0
- package/dist/executor/index.js.map +1 -0
- package/dist/executor/memory-monitor.d.ts +115 -0
- package/dist/executor/memory-monitor.d.ts.map +1 -0
- package/dist/executor/memory-monitor.js +168 -0
- package/dist/executor/memory-monitor.js.map +1 -0
- package/dist/executor/parallel-executor.d.ts +53 -0
- package/dist/executor/parallel-executor.d.ts.map +1 -0
- package/dist/executor/parallel-executor.js +194 -0
- package/dist/executor/parallel-executor.js.map +1 -0
- package/dist/executor/run-id.d.ts +71 -0
- package/dist/executor/run-id.d.ts.map +1 -0
- package/dist/executor/run-id.js +67 -0
- package/dist/executor/run-id.js.map +1 -0
- package/dist/executor/worker-entry.d.ts +8 -0
- package/dist/executor/worker-entry.d.ts.map +1 -0
- package/dist/executor/worker-entry.js +67 -0
- package/dist/executor/worker-entry.js.map +1 -0
- package/dist/index.cjs +11 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/registry/case-registry.d.ts +113 -0
- package/dist/registry/case-registry.d.ts.map +1 -0
- package/dist/registry/case-registry.js +160 -0
- package/dist/registry/case-registry.js.map +1 -0
- package/dist/registry/index.d.ts +8 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +8 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/registry/sut-registry.d.ts +96 -0
- package/dist/registry/sut-registry.d.ts.map +1 -0
- package/dist/registry/sut-registry.js +126 -0
- package/dist/registry/sut-registry.js.map +1 -0
- package/dist/renderers/index.d.ts +10 -0
- package/dist/renderers/index.d.ts.map +1 -0
- package/dist/renderers/index.js +9 -0
- package/dist/renderers/index.js.map +1 -0
- package/dist/renderers/latex-renderer.d.ts +84 -0
- package/dist/renderers/latex-renderer.d.ts.map +1 -0
- package/dist/renderers/latex-renderer.js +208 -0
- package/dist/renderers/latex-renderer.js.map +1 -0
- package/dist/renderers/types.d.ts +106 -0
- package/dist/renderers/types.d.ts.map +1 -0
- package/dist/renderers/types.js +23 -0
- package/dist/renderers/types.js.map +1 -0
- package/dist/robustness/analyzer.d.ts +61 -0
- package/dist/robustness/analyzer.d.ts.map +1 -0
- package/dist/robustness/analyzer.js +191 -0
- package/dist/robustness/analyzer.js.map +1 -0
- package/dist/robustness/index.d.ts +8 -0
- package/dist/robustness/index.d.ts.map +1 -0
- package/dist/robustness/index.js +8 -0
- package/dist/robustness/index.js.map +1 -0
- package/dist/robustness/perturbations.d.ts +46 -0
- package/dist/robustness/perturbations.d.ts.map +1 -0
- package/dist/robustness/perturbations.js +184 -0
- package/dist/robustness/perturbations.js.map +1 -0
- package/dist/statistical/index.d.ts +8 -0
- package/dist/statistical/index.d.ts.map +1 -0
- package/dist/statistical/index.js +8 -0
- package/dist/statistical/index.js.map +1 -0
- package/dist/statistical/mann-whitney-u.d.ts +62 -0
- package/dist/statistical/mann-whitney-u.d.ts.map +1 -0
- package/dist/statistical/mann-whitney-u.js +127 -0
- package/dist/statistical/mann-whitney-u.js.map +1 -0
- package/dist/types/aggregate.d.ts +124 -0
- package/dist/types/aggregate.d.ts.map +1 -0
- package/dist/types/aggregate.js +9 -0
- package/dist/types/aggregate.js.map +1 -0
- package/dist/types/case.d.ts +105 -0
- package/dist/types/case.d.ts.map +1 -0
- package/dist/types/case.js +10 -0
- package/dist/types/case.js.map +1 -0
- package/dist/types/claims.d.ts +122 -0
- package/dist/types/claims.d.ts.map +1 -0
- package/dist/types/claims.js +14 -0
- package/dist/types/claims.js.map +1 -0
- package/dist/types/index.d.ts +12 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +7 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/perturbation.d.ts +105 -0
- package/dist/types/perturbation.d.ts.map +1 -0
- package/dist/types/perturbation.js +9 -0
- package/dist/types/perturbation.js.map +1 -0
- package/dist/types/result.d.ts +150 -0
- package/dist/types/result.d.ts.map +1 -0
- package/dist/types/result.js +12 -0
- package/dist/types/result.js.map +1 -0
- package/dist/types/sut.d.ts +128 -0
- package/dist/types/sut.d.ts.map +1 -0
- package/dist/types/sut.js +12 -0
- package/dist/types/sut.js.map +1 -0
- package/package.json +283 -7
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test Helpers for Framework Tests
|
|
3
|
+
*
|
|
4
|
+
* Factory functions for creating mock evaluation results, aggregates,
|
|
5
|
+
* and claims for testing the evaluation framework.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Create a mock EvaluationResult with sensible defaults.
|
|
9
|
+
*
|
|
10
|
+
* @param overrides - Partial fields to override defaults
|
|
11
|
+
* @returns A complete EvaluationResult
|
|
12
|
+
*/
|
|
13
|
+
export const createMockResult = (overrides) => {
|
|
14
|
+
const defaultRun = {
|
|
15
|
+
runId: "mock-run-001",
|
|
16
|
+
sut: "test-sut-v1.0.0",
|
|
17
|
+
sutRole: "primary",
|
|
18
|
+
caseId: "test-case-001",
|
|
19
|
+
caseClass: "test-class",
|
|
20
|
+
};
|
|
21
|
+
const defaultCorrectness = {
|
|
22
|
+
expectedExists: true,
|
|
23
|
+
producedOutput: true,
|
|
24
|
+
valid: true,
|
|
25
|
+
matchesExpected: true,
|
|
26
|
+
};
|
|
27
|
+
const defaultOutputs = {
|
|
28
|
+
summary: {},
|
|
29
|
+
};
|
|
30
|
+
const defaultMetrics = {
|
|
31
|
+
numeric: {
|
|
32
|
+
"execution-time": 100,
|
|
33
|
+
"nodes-expanded": 50,
|
|
34
|
+
"path-diversity": 0.75,
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
const defaultProvenance = {
|
|
38
|
+
runtime: {
|
|
39
|
+
platform: "darwin",
|
|
40
|
+
arch: "arm64",
|
|
41
|
+
nodeVersion: "20.0.0",
|
|
42
|
+
},
|
|
43
|
+
timestamp: new Date().toISOString(),
|
|
44
|
+
};
|
|
45
|
+
return {
|
|
46
|
+
run: { ...defaultRun, ...overrides?.run },
|
|
47
|
+
correctness: { ...defaultCorrectness, ...overrides?.correctness },
|
|
48
|
+
outputs: { ...defaultOutputs, ...overrides?.outputs },
|
|
49
|
+
metrics: { ...defaultMetrics, ...overrides?.metrics },
|
|
50
|
+
provenance: { ...defaultProvenance, ...overrides?.provenance },
|
|
51
|
+
};
|
|
52
|
+
};
|
|
53
|
+
/**
|
|
54
|
+
* Create multiple mock results for a specific SUT.
|
|
55
|
+
*
|
|
56
|
+
* @param count - Number of results to create
|
|
57
|
+
* @param sut - SUT identifier
|
|
58
|
+
* @param role - SUT role (default: "primary")
|
|
59
|
+
* @param caseClass - Optional case class
|
|
60
|
+
* @returns Array of EvaluationResults
|
|
61
|
+
*/
|
|
62
|
+
export const createMockResults = (count, sut, role = "primary", caseClass) => Array.from({ length: count }, (_, index) => createMockResult({
|
|
63
|
+
run: {
|
|
64
|
+
runId: `${sut}-run-${index.toString().padStart(3, "0")}`,
|
|
65
|
+
sut,
|
|
66
|
+
sutRole: role,
|
|
67
|
+
caseId: `case-${index.toString().padStart(3, "0")}`,
|
|
68
|
+
caseClass,
|
|
69
|
+
},
|
|
70
|
+
metrics: {
|
|
71
|
+
numeric: {
|
|
72
|
+
"execution-time": 100 + Math.random() * 50,
|
|
73
|
+
"nodes-expanded": 50 + Math.floor(Math.random() * 20),
|
|
74
|
+
"path-diversity": 0.5 + Math.random() * 0.4,
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
}));
|
|
78
|
+
/**
|
|
79
|
+
* Create mock SummaryStats.
|
|
80
|
+
*
|
|
81
|
+
* @param values - Array of values to compute stats from
|
|
82
|
+
* @returns SummaryStats
|
|
83
|
+
*/
|
|
84
|
+
export const createMockSummaryStats = (values) => {
|
|
85
|
+
if (values.length === 0) {
|
|
86
|
+
return {
|
|
87
|
+
n: 0,
|
|
88
|
+
mean: Number.NaN,
|
|
89
|
+
median: Number.NaN,
|
|
90
|
+
min: Number.NaN,
|
|
91
|
+
max: Number.NaN,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
95
|
+
const n = values.length;
|
|
96
|
+
const sum = values.reduce((accumulator, v) => accumulator + v, 0);
|
|
97
|
+
const mean = sum / n;
|
|
98
|
+
const midIndex = Math.floor(n / 2);
|
|
99
|
+
const median = n % 2 === 0 ? (sorted[midIndex - 1] + sorted[midIndex]) / 2 : sorted[midIndex];
|
|
100
|
+
let std;
|
|
101
|
+
if (n > 1) {
|
|
102
|
+
const variance = values.map((v) => (v - mean) ** 2).reduce((accumulator, v) => accumulator + v, 0) / (n - 1);
|
|
103
|
+
std = Math.sqrt(variance);
|
|
104
|
+
}
|
|
105
|
+
return {
|
|
106
|
+
n,
|
|
107
|
+
mean,
|
|
108
|
+
median,
|
|
109
|
+
min: sorted[0],
|
|
110
|
+
max: sorted[n - 1],
|
|
111
|
+
std,
|
|
112
|
+
sum,
|
|
113
|
+
};
|
|
114
|
+
};
|
|
115
|
+
/**
|
|
116
|
+
* Create mock AggregatedResult.
|
|
117
|
+
*
|
|
118
|
+
* @param sut - SUT identifier
|
|
119
|
+
* @param role - SUT role
|
|
120
|
+
* @param caseClass - Optional case class
|
|
121
|
+
* @param metrics - Optional metrics map
|
|
122
|
+
* @returns AggregatedResult
|
|
123
|
+
*/
|
|
124
|
+
export const createMockAggregate = (sut, role = "primary", caseClass, metrics) => {
|
|
125
|
+
const defaultMetrics = {
|
|
126
|
+
"execution-time": createMockSummaryStats([100, 110, 105, 115, 120]),
|
|
127
|
+
"nodes-expanded": createMockSummaryStats([50, 55, 52, 58, 60]),
|
|
128
|
+
"path-diversity": createMockSummaryStats([0.7, 0.75, 0.72, 0.78, 0.8]),
|
|
129
|
+
};
|
|
130
|
+
return {
|
|
131
|
+
sut,
|
|
132
|
+
sutRole: role,
|
|
133
|
+
caseClass,
|
|
134
|
+
group: {
|
|
135
|
+
runCount: 5,
|
|
136
|
+
caseCount: 5,
|
|
137
|
+
},
|
|
138
|
+
correctness: {
|
|
139
|
+
validRate: 1,
|
|
140
|
+
producedOutputRate: 1,
|
|
141
|
+
matchesExpectedRate: 1,
|
|
142
|
+
},
|
|
143
|
+
metrics: metrics ?? defaultMetrics,
|
|
144
|
+
};
|
|
145
|
+
};
|
|
146
|
+
/**
|
|
147
|
+
* Create mock aggregates for testing comparisons.
|
|
148
|
+
*
|
|
149
|
+
* @returns Array with primary and baseline aggregates
|
|
150
|
+
*/
|
|
151
|
+
export const createMockAggregates = () => [
|
|
152
|
+
createMockAggregate("degree-prioritised-v1.0.0", "primary", "scale-free", {
|
|
153
|
+
"execution-time": createMockSummaryStats([80, 85, 82, 88, 90]),
|
|
154
|
+
"nodes-expanded": createMockSummaryStats([40, 45, 42, 48, 50]),
|
|
155
|
+
}),
|
|
156
|
+
createMockAggregate("standard-bfs-v1.0.0", "baseline", "scale-free", {
|
|
157
|
+
"execution-time": createMockSummaryStats([120, 125, 122, 128, 130]),
|
|
158
|
+
"nodes-expanded": createMockSummaryStats([70, 75, 72, 78, 80]),
|
|
159
|
+
}),
|
|
160
|
+
createMockAggregate("frontier-balanced-v1.0.0", "baseline", "scale-free", {
|
|
161
|
+
"execution-time": createMockSummaryStats([100, 105, 102, 108, 110]),
|
|
162
|
+
"nodes-expanded": createMockSummaryStats([60, 65, 62, 68, 70]),
|
|
163
|
+
}),
|
|
164
|
+
];
|
|
165
|
+
/**
|
|
166
|
+
* Create a mock EvaluationClaim.
|
|
167
|
+
*
|
|
168
|
+
* @param overrides - Partial fields to override defaults
|
|
169
|
+
* @returns EvaluationClaim
|
|
170
|
+
*/
|
|
171
|
+
export const createMockClaim = (overrides) => ({
|
|
172
|
+
claimId: "C001",
|
|
173
|
+
description: "Primary SUT is faster than baseline",
|
|
174
|
+
sut: "degree-prioritised-v1.0.0",
|
|
175
|
+
baseline: "standard-bfs-v1.0.0",
|
|
176
|
+
metric: "execution-time",
|
|
177
|
+
direction: "less",
|
|
178
|
+
scope: "global",
|
|
179
|
+
...overrides,
|
|
180
|
+
});
|
|
181
|
+
/**
|
|
182
|
+
* Create mock ClaimEvidence.
|
|
183
|
+
*
|
|
184
|
+
* @param overrides - Partial fields to override defaults
|
|
185
|
+
* @returns ClaimEvidence
|
|
186
|
+
*/
|
|
187
|
+
export const createMockEvidence = (overrides) => ({
|
|
188
|
+
primaryValue: 85,
|
|
189
|
+
baselineValue: 125,
|
|
190
|
+
delta: -40,
|
|
191
|
+
ratio: 0.68,
|
|
192
|
+
pValue: 0.01,
|
|
193
|
+
effectSize: 1.5,
|
|
194
|
+
n: 10,
|
|
195
|
+
...overrides,
|
|
196
|
+
});
|
|
197
|
+
/**
|
|
198
|
+
* Create mock ClaimEvaluation.
|
|
199
|
+
*
|
|
200
|
+
* @param status - Claim status
|
|
201
|
+
* @param claim - Optional claim
|
|
202
|
+
* @param evidence - Optional evidence
|
|
203
|
+
* @returns ClaimEvaluation
|
|
204
|
+
*/
|
|
205
|
+
export const createMockClaimEvaluation = (status = "satisfied", claim, evidence) => ({
|
|
206
|
+
claim: createMockClaim(claim),
|
|
207
|
+
status,
|
|
208
|
+
evidence: createMockEvidence(evidence),
|
|
209
|
+
});
|
|
210
|
+
/**
|
|
211
|
+
* Create an array of mock results with varying metrics.
|
|
212
|
+
* Useful for testing aggregation and statistics.
|
|
213
|
+
*
|
|
214
|
+
* @param executionTimes - Array of execution times
|
|
215
|
+
* @param sut - SUT identifier
|
|
216
|
+
* @param role - SUT role
|
|
217
|
+
* @returns Array of EvaluationResults
|
|
218
|
+
*/
|
|
219
|
+
export const createMockResultsWithMetrics = (executionTimes, sut, role = "primary") => executionTimes.map((time, index) => createMockResult({
|
|
220
|
+
run: {
|
|
221
|
+
runId: `${sut}-run-${index.toString().padStart(3, "0")}`,
|
|
222
|
+
sut,
|
|
223
|
+
sutRole: role,
|
|
224
|
+
caseId: `case-${index.toString().padStart(3, "0")}`,
|
|
225
|
+
},
|
|
226
|
+
metrics: {
|
|
227
|
+
numeric: {
|
|
228
|
+
"execution-time": time,
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
}));
|
|
232
|
+
/**
|
|
233
|
+
* Create a minimal valid result for validation tests.
|
|
234
|
+
* Returns the bare minimum fields required by the schema.
|
|
235
|
+
*/
|
|
236
|
+
export const createMinimalValidResult = () => ({
|
|
237
|
+
run: {
|
|
238
|
+
runId: "minimal-001",
|
|
239
|
+
sut: "test-sut",
|
|
240
|
+
sutRole: "primary",
|
|
241
|
+
caseId: "test-case",
|
|
242
|
+
},
|
|
243
|
+
correctness: {
|
|
244
|
+
expectedExists: false,
|
|
245
|
+
producedOutput: true,
|
|
246
|
+
valid: true,
|
|
247
|
+
matchesExpected: null,
|
|
248
|
+
},
|
|
249
|
+
outputs: {},
|
|
250
|
+
metrics: {
|
|
251
|
+
numeric: {},
|
|
252
|
+
},
|
|
253
|
+
provenance: {
|
|
254
|
+
runtime: {
|
|
255
|
+
platform: "test",
|
|
256
|
+
arch: "test",
|
|
257
|
+
nodeVersion: "20.0.0",
|
|
258
|
+
},
|
|
259
|
+
},
|
|
260
|
+
});
|
|
261
|
+
/**
|
|
262
|
+
* Create an invalid result missing required fields.
|
|
263
|
+
* Useful for testing validation logic.
|
|
264
|
+
*/
|
|
265
|
+
export const createInvalidResult = () => ({
|
|
266
|
+
run: {
|
|
267
|
+
// Missing runId, sut, sutRole, caseId
|
|
268
|
+
},
|
|
269
|
+
// Missing correctness, metrics, provenance
|
|
270
|
+
});
|
|
271
|
+
//# sourceMappingURL=test-helpers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-helpers.js","sourceRoot":"","sources":["../../src/__tests__/test-helpers.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAmBH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,SAAqC,EAAoB,EAAE;IAC3F,MAAM,UAAU,GAAe;QAC9B,KAAK,EAAE,cAAc;QACrB,GAAG,EAAE,iBAAiB;QACtB,OAAO,EAAE,SAAS;QAClB,MAAM,EAAE,eAAe;QACvB,SAAS,EAAE,YAAY;KACvB,CAAC;IAEF,MAAM,kBAAkB,GAAsB;QAC7C,cAAc,EAAE,IAAI;QACpB,cAAc,EAAE,IAAI;QACpB,KAAK,EAAE,IAAI;QACX,eAAe,EAAE,IAAI;KACrB,CAAC;IAEF,MAAM,cAAc,GAAkB;QACrC,OAAO,EAAE,EAAE;KACX,CAAC;IAEF,MAAM,cAAc,GAAkB;QACrC,OAAO,EAAE;YACR,gBAAgB,EAAE,GAAG;YACrB,gBAAgB,EAAE,EAAE;YACpB,gBAAgB,EAAE,IAAI;SACtB;KACD,CAAC;IAEF,MAAM,iBAAiB,GAAe;QACrC,OAAO,EAAE;YACR,QAAQ,EAAE,QAAQ;YAClB,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,QAAQ;SACrB;QACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACnC,CAAC;IAEF,OAAO;QACN,GAAG,EAAE,EAAE,GAAG,UAAU,EAAE,GAAG,SAAS,EAAE,GAAG,EAAE;QACzC,WAAW,EAAE,EAAE,GAAG,kBAAkB,EAAE,GAAG,SAAS,EAAE,WAAW,EAAE;QACjE,OAAO,EAAE,EAAE,GAAG,cAAc,EAAE,GAAG,SAAS,EAAE,OAAO,EAAE;QACrD,OAAO,EAAE,EAAE,GAAG,cAAc,EAAE,GAAG,SAAS,EAAE,OAAO,EAAE;QACrD,UAAU,EAAE,EAAE,GAAG,iBAAiB,EAAE,GAAG,SAAS,EAAE,UAAU,EAAE;KAC9D,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAChC,KAAa,EACb,GAAW,EACX,OAAgB,SAAS,EACzB,SAAkB,EACG,EAAE,CACvB,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAC1C,gBAAgB,CAAC;IAChB,GAAG,EAAE;QACJ,KAAK,EAAE,GAAG,GAAG,QAAQ,KAAK,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;QACxD,GAAG;QACH,OAAO,EAAE,IAAI;QACb,MAAM,EAAE,QAAQ,KAAK,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;QACnD,SAAS;KACT;IACD,OAAO,EAAE;QACR,OAAO,EAAE;YACR,gBAAgB,EAAE,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE;YAC1C,gBAAgB,EAAE,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC;YACrD,gBAAgB,EAAE,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG;SAC3C;KACD;CACD,CAAC,CACF,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,MAAgB,EAAgB,EAAE;IACxE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACN,CAAC,EAAE,CAAC;YACJ,IAAI,EAAE,MAAM,CAAC,GAAG;YAChB,MAAM,EAAE,MAAM,CAAC,GAAG;YAClB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,GAAG,EAAE,MAAM,CAAC,GAAG;SACf,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACjD,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC;IACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IAE9F,IAAI,GAAuB,CAAC;IAC5B,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACX,MAAM,QAAQ,GACb,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC7F,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO;QACN,CAAC;QACD,IAAI;QACJ,MAAM;QACN,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;QACd,GAAG,EAAE,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC;QAClB,GAAG;QACH,GAAG;KACH,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAClC,GAAW,EACX,OAAgB,SAAS,EACzB,SAAkB,EAClB,OAAsC,EACnB,EAAE;IACrB,MAAM,cAAc,GAAiC;QACpD,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QACnE,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;QAC9D,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;KACtE,CAAC;IAEF,OAAO;QACN,GAAG;QACH,OAAO,EAAE,IAAI;QACb,SAAS;QACT,KAAK,EAAE;YACN,QAAQ,EAAE,CAAC;YACX,SAAS,EAAE,CAAC;SACZ;QACD,WAAW,EAAE;YACZ,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,CAAC;YACrB,mBAAmB,EAAE,CAAC;SACtB;QACD,OAAO,EAAE,OAAO,IAAI,cAAc;KAClC,CAAC;AACH,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG,GAAuB,EAAE,CAAC;IAC7D,mBAAmB,CAAC,2BAA2B,EAAE,SAAS,EAAE,YAAY,EAAE;QACzE,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;QAC9D,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;KAC9D,CAAC;IACF,mBAAmB,CAAC,qBAAqB,EAAE,UAAU,EAAE,YAAY,EAAE;QACpE,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QACnE,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;KAC9D,CAAC;IACF,mBAAmB,CAAC,0BAA0B,EAAE,UAAU,EAAE,YAAY,EAAE;QACzE,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QACnE,gBAAgB,EAAE,sBAAsB,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;KAC9D,CAAC;CACF,CAAC;AAEF;;;;;GAKG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,SAAoC,EAAmB,EAAE,CAAC,CAAC;IAC1F,OAAO,EAAE,MAAM;IACf,WAAW,EAAE,qCAAqC;IAClD,GAAG,EAAE,2BAA2B;IAChC,QAAQ,EAAE,qBAAqB;IAC/B,MAAM,EAAE,gBAAgB;IACxB,SAAS,EAAE,MAAM;IACjB,KAAK,EAAE,QAAQ;IACf,GAAG,SAAS;CACZ,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,SAAkC,EAAiB,EAAE,CAAC,CAAC;IACzF,YAAY,EAAE,EAAE;IAChB,aAAa,EAAE,GAAG;IAClB,KAAK,EAAE,CAAC,EAAE;IACV,KAAK,EAAE,IAAI;IACX,MAAM,EAAE,IAAI;IACZ,UAAU,EAAE,GAAG;IACf,CAAC,EAAE,EAAE;IACL,GAAG,SAAS;CACZ,CAAC,CAAC;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG,CACxC,SAAsB,WAAW,EACjC,KAAgC,EAChC,QAAiC,EACf,EAAE,CAAC,CAAC;IACtB,KAAK,EAAE,eAAe,CAAC,KAAK,CAAC;IAC7B,MAAM;IACN,QAAQ,EAAE,kBAAkB,CAAC,QAAQ,CAAC;CACtC,CAAC,CAAC;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAG,CAC3C,cAAwB,EACxB,GAAW,EACX,OAAgB,SAAS,EACJ,EAAE,CACvB,cAAc,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAClC,gBAAgB,CAAC;IAChB,GAAG,EAAE;QACJ,KAAK,EAAE,GAAG,GAAG,QAAQ,KAAK,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;QACxD,GAAG;QACH,OAAO,EAAE,IAAI;QACb,MAAM,EAAE,QAAQ,KAAK,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;KACnD;IACD,OAAO,EAAE;QACR,OAAO,EAAE;YACR,gBAAgB,EAAE,IAAI;SACtB;KACD;CACD,CAAC,CACF,CAAC;AAEH;;;GAGG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAG,GAAqB,EAAE,CAAC,CAAC;IAChE,GAAG,EAAE;QACJ,KAAK,EAAE,aAAa;QACpB,GAAG,EAAE,UAAU;QACf,OAAO,EAAE,SAAS;QAClB,MAAM,EAAE,WAAW;KACnB;IACD,WAAW,EAAE;QACZ,cAAc,EAAE,KAAK;QACrB,cAAc,EAAE,IAAI;QACpB,KAAK,EAAE,IAAI;QACX,eAAe,EAAE,IAAI;KACrB;IACD,OAAO,EAAE,EAAE;IACX,OAAO,EAAE;QACR,OAAO,EAAE,EAAE;KACX;IACD,UAAU,EAAE;QACX,OAAO,EAAE;YACR,QAAQ,EAAE,MAAM;YAChB,IAAI,EAAE,MAAM;YACZ,WAAW,EAAE,QAAQ;SACrB;KACD;CACD,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,GAAY,EAAE,CAAC,CAAC;IAClD,GAAG,EAAE;IACJ,sCAAsC;KACtC;IACD,2CAA2C;CAC3C,CAAC,CAAC"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregation Functions
|
|
3
|
+
*
|
|
4
|
+
* Pure functions for computing aggregated statistics from evaluation results.
|
|
5
|
+
* These functions are extracted from inline table generation code to ensure
|
|
6
|
+
* separation of concerns.
|
|
7
|
+
*/
|
|
8
|
+
import type { ComparisonMetrics, SummaryStats } from "../types/aggregate.js";
|
|
9
|
+
import type { EvaluationResult } from "../types/result.js";
|
|
10
|
+
/**
|
|
11
|
+
* Compute summary statistics for an array of numbers.
|
|
12
|
+
*
|
|
13
|
+
* @param values - Array of numeric values
|
|
14
|
+
* @returns Summary statistics
|
|
15
|
+
*/
|
|
16
|
+
export declare const computeSummaryStats: (values: number[]) => SummaryStats;
|
|
17
|
+
/**
|
|
18
|
+
* Compute speedup ratio (baseline / treatment).
|
|
19
|
+
*
|
|
20
|
+
* @param baselineTime - Baseline execution time
|
|
21
|
+
* @param treatmentTime - Treatment execution time
|
|
22
|
+
* @returns Speedup ratio
|
|
23
|
+
*/
|
|
24
|
+
export declare const computeSpeedup: (baselineTime: number, treatmentTime: number) => number;
|
|
25
|
+
/**
|
|
26
|
+
* Compute maximum speedup from multiple pairs.
|
|
27
|
+
*
|
|
28
|
+
* @param pairs - Array of [baseline, treatment] time pairs
|
|
29
|
+
* @returns Maximum speedup ratio
|
|
30
|
+
*/
|
|
31
|
+
export declare const computeMaxSpeedup: (pairs: [number, number][]) => number;
|
|
32
|
+
/**
|
|
33
|
+
* Compute comparison metrics between primary and baseline results.
|
|
34
|
+
*
|
|
35
|
+
* @param primaryResults - Full result objects from primary SUT
|
|
36
|
+
* @param baselineResults - Full result objects from baseline SUT
|
|
37
|
+
* @param metricName - Metric to compare
|
|
38
|
+
* @returns Comparison metrics
|
|
39
|
+
*/
|
|
40
|
+
export declare const computeComparison: (primaryResults: EvaluationResult[], baselineResults: EvaluationResult[], metricName: string) => ComparisonMetrics;
|
|
41
|
+
/**
|
|
42
|
+
* Compute rankings from results.
|
|
43
|
+
*
|
|
44
|
+
* @param results - Results to rank
|
|
45
|
+
* @param metricName - Metric to rank by
|
|
46
|
+
* @param ascending - Sort ascending (lower is better)
|
|
47
|
+
* @returns Ranked results with positions
|
|
48
|
+
*/
|
|
49
|
+
export declare const computeRankings: (results: EvaluationResult[], metricName: string, ascending?: boolean) => {
|
|
50
|
+
result: EvaluationResult;
|
|
51
|
+
rank: number;
|
|
52
|
+
value: number;
|
|
53
|
+
}[];
|
|
54
|
+
//# sourceMappingURL=aggregators.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"aggregators.d.ts","sourceRoot":"","sources":["../../src/aggregation/aggregators.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAC7E,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAE3D;;;;;GAKG;AACH,eAAO,MAAM,mBAAmB,GAAI,QAAQ,MAAM,EAAE,KAAG,YAuDtD,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,cAAc,GAAI,cAAc,MAAM,EAAE,eAAe,MAAM,KAAG,MAG5E,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,iBAAiB,GAAI,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,KAAG,MAG7D,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,iBAAiB,GAC7B,gBAAgB,gBAAgB,EAAE,EAClC,iBAAiB,gBAAgB,EAAE,EACnC,YAAY,MAAM,KAChB,iBA+EF,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,eAAe,GAC3B,SAAS,gBAAgB,EAAE,EAC3B,YAAY,MAAM,EAClB,mBAAiB,KACf;IAAE,MAAM,EAAE,gBAAgB,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EAgB3D,CAAC"}
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregation Functions
|
|
3
|
+
*
|
|
4
|
+
* Pure functions for computing aggregated statistics from evaluation results.
|
|
5
|
+
* These functions are extracted from inline table generation code to ensure
|
|
6
|
+
* separation of concerns.
|
|
7
|
+
*/
|
|
8
|
+
import { mannWhitneyUTest } from "../statistical/mann-whitney-u.js";
|
|
9
|
+
/**
|
|
10
|
+
* Compute summary statistics for an array of numbers.
|
|
11
|
+
*
|
|
12
|
+
* @param values - Array of numeric values
|
|
13
|
+
* @returns Summary statistics
|
|
14
|
+
*/
|
|
15
|
+
export const computeSummaryStats = (values) => {
|
|
16
|
+
if (values.length === 0) {
|
|
17
|
+
return {
|
|
18
|
+
n: 0,
|
|
19
|
+
mean: Number.NaN,
|
|
20
|
+
median: Number.NaN,
|
|
21
|
+
min: Number.NaN,
|
|
22
|
+
max: Number.NaN,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
const n = values.length;
|
|
26
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
27
|
+
const sum = values.reduce((accumulator, v) => accumulator + v, 0);
|
|
28
|
+
const mean = sum / n;
|
|
29
|
+
const min = sorted[0];
|
|
30
|
+
const max = sorted[n - 1];
|
|
31
|
+
// Median
|
|
32
|
+
const midIndex = Math.floor(n / 2);
|
|
33
|
+
const median = n % 2 === 0 ? (sorted[midIndex - 1] + sorted[midIndex]) / 2 : sorted[midIndex];
|
|
34
|
+
// Standard deviation (sample)
|
|
35
|
+
let std;
|
|
36
|
+
if (n > 1) {
|
|
37
|
+
const squaredDiffs = values.map((v) => (v - mean) ** 2);
|
|
38
|
+
const variance = squaredDiffs.reduce((accumulator, v) => accumulator + v, 0) / (n - 1);
|
|
39
|
+
std = Math.sqrt(variance);
|
|
40
|
+
}
|
|
41
|
+
// 95% confidence interval (assumes normal distribution)
|
|
42
|
+
let confidence95;
|
|
43
|
+
if (std !== undefined && n > 1) {
|
|
44
|
+
const standardError = std / Math.sqrt(n);
|
|
45
|
+
const tValue = getTValue(n - 1, 0.975); // Two-tailed 95% CI
|
|
46
|
+
const margin = tValue * standardError;
|
|
47
|
+
confidence95 = [mean - margin, mean + margin];
|
|
48
|
+
}
|
|
49
|
+
// Percentiles
|
|
50
|
+
const p25 = sorted[Math.floor(n * 0.25)];
|
|
51
|
+
const p75 = sorted[Math.floor(n * 0.75)];
|
|
52
|
+
return {
|
|
53
|
+
n,
|
|
54
|
+
mean,
|
|
55
|
+
median,
|
|
56
|
+
min,
|
|
57
|
+
max,
|
|
58
|
+
std,
|
|
59
|
+
confidence95,
|
|
60
|
+
sum,
|
|
61
|
+
p25,
|
|
62
|
+
p75,
|
|
63
|
+
};
|
|
64
|
+
};
|
|
65
|
+
/**
|
|
66
|
+
* Compute speedup ratio (baseline / treatment).
|
|
67
|
+
*
|
|
68
|
+
* @param baselineTime - Baseline execution time
|
|
69
|
+
* @param treatmentTime - Treatment execution time
|
|
70
|
+
* @returns Speedup ratio
|
|
71
|
+
*/
|
|
72
|
+
export const computeSpeedup = (baselineTime, treatmentTime) => {
|
|
73
|
+
if (treatmentTime === 0)
|
|
74
|
+
return Infinity;
|
|
75
|
+
return baselineTime / treatmentTime;
|
|
76
|
+
};
|
|
77
|
+
/**
|
|
78
|
+
* Compute maximum speedup from multiple pairs.
|
|
79
|
+
*
|
|
80
|
+
* @param pairs - Array of [baseline, treatment] time pairs
|
|
81
|
+
* @returns Maximum speedup ratio
|
|
82
|
+
*/
|
|
83
|
+
export const computeMaxSpeedup = (pairs) => {
|
|
84
|
+
if (pairs.length === 0)
|
|
85
|
+
return 0;
|
|
86
|
+
return Math.max(...pairs.map(([b, t]) => computeSpeedup(b, t)));
|
|
87
|
+
};
|
|
88
|
+
/**
|
|
89
|
+
* Compute comparison metrics between primary and baseline results.
|
|
90
|
+
*
|
|
91
|
+
* @param primaryResults - Full result objects from primary SUT
|
|
92
|
+
* @param baselineResults - Full result objects from baseline SUT
|
|
93
|
+
* @param metricName - Metric to compare
|
|
94
|
+
* @returns Comparison metrics
|
|
95
|
+
*/
|
|
96
|
+
export const computeComparison = (primaryResults, baselineResults, metricName) => {
|
|
97
|
+
// Extract values and match by case ID
|
|
98
|
+
const primaryByCase = new Map();
|
|
99
|
+
const baselineByCase = new Map();
|
|
100
|
+
for (const result of primaryResults) {
|
|
101
|
+
const value = result.metrics.numeric[metricName];
|
|
102
|
+
primaryByCase.set(result.run.caseId, value);
|
|
103
|
+
}
|
|
104
|
+
for (const result of baselineResults) {
|
|
105
|
+
const value = result.metrics.numeric[metricName];
|
|
106
|
+
baselineByCase.set(result.run.caseId, value);
|
|
107
|
+
}
|
|
108
|
+
// Get matching case IDs
|
|
109
|
+
const commonCaseIds = [...primaryByCase.keys()].filter((id) => baselineByCase.has(id));
|
|
110
|
+
if (commonCaseIds.length === 0) {
|
|
111
|
+
return {
|
|
112
|
+
deltas: { default: 0 },
|
|
113
|
+
ratios: { default: 1 },
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// Extract paired values
|
|
117
|
+
const primaryValues = [];
|
|
118
|
+
const baselineValues = [];
|
|
119
|
+
for (const caseId of commonCaseIds) {
|
|
120
|
+
const primaryValue = primaryByCase.get(caseId);
|
|
121
|
+
const baselineValue = baselineByCase.get(caseId);
|
|
122
|
+
if (primaryValue !== undefined && baselineValue !== undefined) {
|
|
123
|
+
primaryValues.push(primaryValue);
|
|
124
|
+
baselineValues.push(baselineValue);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
const primaryStats = computeSummaryStats(primaryValues);
|
|
128
|
+
const baselineStats = computeSummaryStats(baselineValues);
|
|
129
|
+
const delta = primaryStats.mean - baselineStats.mean;
|
|
130
|
+
const ratio = baselineStats.mean === 0 ? Infinity : primaryStats.mean / baselineStats.mean;
|
|
131
|
+
// Win rate: percentage of cases where primary > baseline (paired by case ID)
|
|
132
|
+
let wins = 0;
|
|
133
|
+
for (const [index, primaryValue] of primaryValues.entries()) {
|
|
134
|
+
if (primaryValue > baselineValues[index]) {
|
|
135
|
+
wins++;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const betterRate = wins / primaryValues.length;
|
|
139
|
+
// Mann-Whitney U test for statistical significance
|
|
140
|
+
const mwuResult = mannWhitneyUTest(primaryValues, baselineValues);
|
|
141
|
+
// Effect size (Cohen's d)
|
|
142
|
+
let effectSize;
|
|
143
|
+
if (primaryStats.std !== undefined &&
|
|
144
|
+
baselineStats.std !== undefined &&
|
|
145
|
+
primaryStats.n > 1 &&
|
|
146
|
+
baselineStats.n > 1) {
|
|
147
|
+
const pooledStd = Math.sqrt(((primaryStats.n - 1) * primaryStats.std ** 2 +
|
|
148
|
+
(baselineStats.n - 1) * baselineStats.std ** 2) /
|
|
149
|
+
(primaryStats.n + baselineStats.n - 2));
|
|
150
|
+
effectSize = pooledStd === 0 ? 0 : Math.abs(delta) / pooledStd;
|
|
151
|
+
}
|
|
152
|
+
return {
|
|
153
|
+
deltas: { default: delta },
|
|
154
|
+
ratios: { default: ratio },
|
|
155
|
+
betterRate,
|
|
156
|
+
uStatistic: mwuResult.u,
|
|
157
|
+
pValue: mwuResult.pValue,
|
|
158
|
+
effectSize,
|
|
159
|
+
};
|
|
160
|
+
};
|
|
161
|
+
/**
|
|
162
|
+
* Compute rankings from results.
|
|
163
|
+
*
|
|
164
|
+
* @param results - Results to rank
|
|
165
|
+
* @param metricName - Metric to rank by
|
|
166
|
+
* @param ascending - Sort ascending (lower is better)
|
|
167
|
+
* @returns Ranked results with positions
|
|
168
|
+
*/
|
|
169
|
+
export const computeRankings = (results, metricName, ascending = false) => {
|
|
170
|
+
const withValues = results
|
|
171
|
+
.map((result) => ({
|
|
172
|
+
result,
|
|
173
|
+
value: result.metrics.numeric[metricName] ?? Number.NaN,
|
|
174
|
+
}))
|
|
175
|
+
.filter(({ value }) => !Number.isNaN(value));
|
|
176
|
+
// Sort
|
|
177
|
+
withValues.sort((a, b) => (ascending ? a.value - b.value : b.value - a.value));
|
|
178
|
+
// Assign ranks
|
|
179
|
+
return withValues.map((item, index) => ({
|
|
180
|
+
...item,
|
|
181
|
+
rank: index + 1,
|
|
182
|
+
}));
|
|
183
|
+
};
|
|
184
|
+
/**
|
|
185
|
+
* Get t-value for confidence interval calculation.
|
|
186
|
+
* This is a simplified lookup table for common degrees of freedom.
|
|
187
|
+
*
|
|
188
|
+
* @param df - Degrees of freedom
|
|
189
|
+
* @param probability - Probability (e.g., 0.975 for 95% two-tailed)
|
|
190
|
+
* @returns t-value
|
|
191
|
+
*/
|
|
192
|
+
const getTValue = (df, probability) => {
|
|
193
|
+
// Simplified t-table for 95% CI (probability = 0.975)
|
|
194
|
+
if (probability !== 0.975) {
|
|
195
|
+
return 1.96; // Fall back to z-value for large samples
|
|
196
|
+
}
|
|
197
|
+
const tTable = {
|
|
198
|
+
1: 12.706,
|
|
199
|
+
2: 4.303,
|
|
200
|
+
3: 3.182,
|
|
201
|
+
4: 2.776,
|
|
202
|
+
5: 2.571,
|
|
203
|
+
6: 2.447,
|
|
204
|
+
7: 2.365,
|
|
205
|
+
8: 2.306,
|
|
206
|
+
9: 2.262,
|
|
207
|
+
10: 2.228,
|
|
208
|
+
15: 2.131,
|
|
209
|
+
20: 2.086,
|
|
210
|
+
25: 2.06,
|
|
211
|
+
30: 2.042,
|
|
212
|
+
40: 2.021,
|
|
213
|
+
50: 2.009,
|
|
214
|
+
100: 1.984,
|
|
215
|
+
};
|
|
216
|
+
// Find closest df
|
|
217
|
+
const dfs = Object.keys(tTable)
|
|
218
|
+
.map(Number)
|
|
219
|
+
.sort((a, b) => a - b);
|
|
220
|
+
for (const key of dfs) {
|
|
221
|
+
if (df <= key) {
|
|
222
|
+
return tTable[key];
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// Large sample: use z-value
|
|
226
|
+
return 1.96;
|
|
227
|
+
};
|
|
228
|
+
//# sourceMappingURL=aggregators.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"aggregators.js","sourceRoot":"","sources":["../../src/aggregation/aggregators.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AAIpE;;;;;GAKG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,MAAgB,EAAgB,EAAE;IACrE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACN,CAAC,EAAE,CAAC;YACJ,IAAI,EAAE,MAAM,CAAC,GAAG;YAChB,MAAM,EAAE,MAAM,CAAC,GAAG;YAClB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,GAAG,EAAE,MAAM,CAAC,GAAG;SACf,CAAC;IACH,CAAC;IAED,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACjD,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACtB,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAE1B,SAAS;IACT,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IAE9F,8BAA8B;IAC9B,IAAI,GAAuB,CAAC;IAC5B,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACX,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACvF,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;IAED,wDAAwD;IACxD,IAAI,YAA0C,CAAC;IAC/C,IAAI,GAAG,KAAK,SAAS,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAChC,MAAM,aAAa,GAAG,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,oBAAoB;QAC5D,MAAM,MAAM,GAAG,MAAM,GAAG,aAAa,CAAC;QACtC,YAAY,GAAG,CAAC,IAAI,GAAG,MAAM,EAAE,IAAI,GAAG,MAAM,CAAC,CAAC;IAC/C,CAAC;IAED,cAAc;IACd,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC;IACzC,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC;IAEzC,OAAO;QACN,CAAC;QACD,IAAI;QACJ,MAAM;QACN,GAAG;QACH,GAAG;QACH,GAAG;QACH,YAAY;QACZ,GAAG;QACH,GAAG;QACH,GAAG;KACH,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,YAAoB,EAAE,aAAqB,EAAU,EAAE;IACrF,IAAI,aAAa,KAAK,CAAC;QAAE,OAAO,QAAQ,CAAC;IACzC,OAAO,YAAY,GAAG,aAAa,CAAC;AACrC,CAAC,CAAC;AAEF;;;;;GAKG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,KAAyB,EAAU,EAAE;IACtE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACjC,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;AACjE,CAAC,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAChC,cAAkC,EAClC,eAAmC,EACnC,UAAkB,EACE,EAAE;IACtB,sCAAsC;IACtC,MAAM,aAAa,GAAG,IAAI,GAAG,EAA8B,CAAC;IAC5D,MAAM,cAAc,GAAG,IAAI,GAAG,EAA8B,CAAC;IAE7D,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACjD,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACjD,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAC9C,CAAC;IAED,wBAAwB;IACxB,MAAM,aAAa,GAAG,CAAC,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAEvF,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO;YACN,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE;YACtB,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE;SACtB,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,MAAM,aAAa,GAAa,EAAE,CAAC;IACnC,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;QACpC,MAAM,YAAY,GAAG,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/C,MAAM,aAAa,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACjD,IAAI,YAAY,KAAK,SAAS,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;YAC/D,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACjC,cAAc,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACpC,CAAC;IACF,CAAC;IAED,MAAM,YAAY,GAAG,mBAAmB,CAAC,aAAa,CAAC,CAAC;IACxD,MAAM,aAAa,GAAG,mBAAmB,CAAC,cAAc,CAAC,CAAC;IAE1D,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC;IACrD,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC;IAE3F,6EAA6E;IAC7E,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,CAAC,KAAK,EAAE,YAAY,CAAC,IAAI,aAAa,CAAC,OAAO,EAAE,EAAE,CAAC;QAC7D,IAAI,YAAY,GAAG,cAAc,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1C,IAAI,EAAE,CAAC;QACR,CAAC;IACF,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,GAAG,aAAa,CAAC,MAAM,CAAC;IAE/C,mDAAmD;IACnD,MAAM,SAAS,GAAG,gBAAgB,CAAC,aAAa,EAAE,cAAc,CAAC,CAAC;IAElE,0BAA0B;IAC1B,IAAI,UAA8B,CAAC;IACnC,IACC,YAAY,CAAC,GAAG,KAAK,SAAS;QAC9B,aAAa,CAAC,GAAG,KAAK,SAAS;QAC/B,YAAY,CAAC,CAAC,GAAG,CAAC;QAClB,aAAa,CAAC,CAAC,GAAG,CAAC,EAClB,CAAC;QACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAC1B,CAAC,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,GAAG,IAAI,CAAC;YAC5C,CAAC,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,aAAa,CAAC,GAAG,IAAI,CAAC,CAAC;YAC/C,CAAC,YAAY,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC,CACvC,CAAC;QACF,UAAU,GAAG,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC;IAChE,CAAC;IAED,OAAO;QACN,MAAM,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;QAC1B,MAAM,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;QAC1B,UAAU;QACV,UAAU,EAAE,SAAS,CAAC,CAAC;QACvB,MAAM,EAAE,SAAS,CAAC,MAAM;QACxB,UAAU;KACV,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,CAC9B,OAA2B,EAC3B,UAAkB,EAClB,SAAS,GAAG,KAAK,EAC6C,EAAE;IAChE,MAAM,UAAU,GAAG,OAAO;SACxB,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACjB,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,MAAM,CAAC,GAAG;KACvD,CAAC,CAAC;SACF,MAAM,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;IAE9C,OAAO;IACP,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAE/E,eAAe;IACf,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;QACvC,GAAG,IAAI;QACP,IAAI,EAAE,KAAK,GAAG,CAAC;KACf,CAAC,CAAC,CAAC;AACL,CAAC,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,SAAS,GAAG,CAAC,EAAU,EAAE,WAAmB,EAAU,EAAE;IAC7D,sDAAsD;IACtD,IAAI,WAAW,KAAK,KAAK,EAAE,CAAC;QAC3B,OAAO,IAAI,CAAC,CAAC,yCAAyC;IACvD,CAAC;IAED,MAAM,MAAM,GAA2B;QACtC,CAAC,EAAE,MAAM;QACT,CAAC,EAAE,KAAK;QACR,CAAC,EAAE,KAAK;QACR,CAAC,EAAE,KAAK;QACR,CAAC,EAAE,KAAK;QACR,CAAC,EAAE,KAAK;QACR,CAAC,EAAE,KAAK;QACR,CAAC,EAAE,KAAK;QACR,CAAC,EAAE,KAAK;QACR,EAAE,EAAE,KAAK;QACT,EAAE,EAAE,KAAK;QACT,EAAE,EAAE,KAAK;QACT,EAAE,EAAE,IAAI;QACR,EAAE,EAAE,KAAK;QACT,EAAE,EAAE,KAAK;QACT,EAAE,EAAE,KAAK;QACT,GAAG,EAAE,KAAK;KACV,CAAC;IAEF,kBAAkB;IAClB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;SAC7B,GAAG,CAAC,MAAM,CAAC;SACX,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACxB,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;QACvB,IAAI,EAAE,IAAI,GAAG,EAAE,CAAC;YACf,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;QACpB,CAAC;IACF,CAAC;IAED,4BAA4B;IAC5B,OAAO,IAAI,CAAC;AACb,CAAC,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregation Module
|
|
3
|
+
*
|
|
4
|
+
* Re-exports aggregation functions and pipeline.
|
|
5
|
+
*/
|
|
6
|
+
export { computeComparison, computeMaxSpeedup, computeRankings, computeSpeedup, computeSummaryStats, } from "./aggregators.js";
|
|
7
|
+
export { aggregateResults, type AggregationPipelineOptions, createAggregationOutput, } from "./pipeline.js";
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/aggregation/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EACN,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,EACf,cAAc,EACd,mBAAmB,GACnB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACN,gBAAgB,EAChB,KAAK,0BAA0B,EAC/B,uBAAuB,GACvB,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregation Module
|
|
3
|
+
*
|
|
4
|
+
* Re-exports aggregation functions and pipeline.
|
|
5
|
+
*/
|
|
6
|
+
export { computeComparison, computeMaxSpeedup, computeRankings, computeSpeedup, computeSummaryStats, } from "./aggregators.js";
|
|
7
|
+
export { aggregateResults, createAggregationOutput, } from "./pipeline.js";
|
|
8
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/aggregation/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EACN,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,EACf,cAAc,EACd,mBAAmB,GACnB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACN,gBAAgB,EAEhB,uBAAuB,GACvB,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregation Pipeline
|
|
3
|
+
*
|
|
4
|
+
* Transforms raw evaluation results into aggregated summaries.
|
|
5
|
+
* This is the core of the Execute -> Aggregate -> Render pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import type { AggregatedResult, AggregationOutput } from "../types/aggregate.js";
|
|
8
|
+
import type { EvaluationResult } from "../types/result.js";
|
|
9
|
+
/**
|
|
10
|
+
* Options for the aggregation pipeline.
|
|
11
|
+
*/
|
|
12
|
+
export interface AggregationPipelineOptions {
|
|
13
|
+
/** Group by case class (default: true) */
|
|
14
|
+
groupByCaseClass?: boolean;
|
|
15
|
+
/** Compute comparisons with baselines (default: true) */
|
|
16
|
+
computeComparisons?: boolean;
|
|
17
|
+
/** Primary SUT ID for comparison (auto-detected if not specified) */
|
|
18
|
+
primarySut?: string;
|
|
19
|
+
/** Baseline SUT IDs for comparison (auto-detected if not specified) */
|
|
20
|
+
baselineSuts?: string[];
|
|
21
|
+
/** Metrics to aggregate (all if not specified) */
|
|
22
|
+
metrics?: string[];
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Aggregate evaluation results into summaries.
|
|
26
|
+
*
|
|
27
|
+
* @param results - Raw evaluation results
|
|
28
|
+
* @param options - Aggregation options
|
|
29
|
+
* @returns Aggregated results
|
|
30
|
+
*/
|
|
31
|
+
export declare const aggregateResults: (results: EvaluationResult[], options?: AggregationPipelineOptions) => AggregatedResult[];
|
|
32
|
+
/**
|
|
33
|
+
* Create a full aggregation output document.
|
|
34
|
+
* @param aggregates
|
|
35
|
+
* @param results
|
|
36
|
+
*/
|
|
37
|
+
export declare const createAggregationOutput: (aggregates: AggregatedResult[], results: EvaluationResult[]) => AggregationOutput;
|
|
38
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/aggregation/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EACX,gBAAgB,EAChB,iBAAiB,EAGjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG3D;;GAEG;AACH,MAAM,WAAW,0BAA0B;IAC1C,0CAA0C;IAC1C,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAE3B,yDAAyD;IACzD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B,qEAAqE;IACrE,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,uEAAuE;IACvE,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IAExB,kDAAkD;IAClD,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAUD;;;;;;GAMG;AACH,eAAO,MAAM,gBAAgB,GAC5B,SAAS,gBAAgB,EAAE,EAC3B,UAAS,0BAA+B,KACtC,gBAAgB,EAmBlB,CAAC;AAyLF;;;;GAIG;AACH,eAAO,MAAM,uBAAuB,GACnC,YAAY,gBAAgB,EAAE,EAC9B,SAAS,gBAAgB,EAAE,KACzB,iBAkBF,CAAC"}
|