ppef 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -0
- package/dist/__tests__/framework-pipeline.integration.test.d.ts +7 -0
- package/dist/__tests__/framework-pipeline.integration.test.d.ts.map +1 -0
- package/dist/__tests__/framework-pipeline.integration.test.js +413 -0
- package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -0
- package/dist/__tests__/registry-executor.integration.test.d.ts +5 -0
- package/dist/__tests__/registry-executor.integration.test.d.ts.map +1 -0
- package/dist/__tests__/registry-executor.integration.test.js +349 -0
- package/dist/__tests__/registry-executor.integration.test.js.map +1 -0
- package/dist/__tests__/test-helpers.d.ts +94 -0
- package/dist/__tests__/test-helpers.d.ts.map +1 -0
- package/dist/__tests__/test-helpers.js +271 -0
- package/dist/__tests__/test-helpers.js.map +1 -0
- package/dist/aggregation/aggregators.d.ts +54 -0
- package/dist/aggregation/aggregators.d.ts.map +1 -0
- package/dist/aggregation/aggregators.js +228 -0
- package/dist/aggregation/aggregators.js.map +1 -0
- package/dist/aggregation/index.d.ts +8 -0
- package/dist/aggregation/index.d.ts.map +1 -0
- package/dist/aggregation/index.js +8 -0
- package/dist/aggregation/index.js.map +1 -0
- package/dist/aggregation/pipeline.d.ts +38 -0
- package/dist/aggregation/pipeline.d.ts.map +1 -0
- package/dist/aggregation/pipeline.js +198 -0
- package/dist/aggregation/pipeline.js.map +1 -0
- package/dist/claims/evaluator.d.ts +33 -0
- package/dist/claims/evaluator.d.ts.map +1 -0
- package/dist/claims/evaluator.js +174 -0
- package/dist/claims/evaluator.js.map +1 -0
- package/dist/claims/index.d.ts +7 -0
- package/dist/claims/index.d.ts.map +1 -0
- package/dist/claims/index.js +7 -0
- package/dist/claims/index.js.map +1 -0
- package/dist/collector/index.d.ts +8 -0
- package/dist/collector/index.d.ts.map +1 -0
- package/dist/collector/index.js +8 -0
- package/dist/collector/index.js.map +1 -0
- package/dist/collector/result-collector.d.ts +159 -0
- package/dist/collector/result-collector.d.ts.map +1 -0
- package/dist/collector/result-collector.js +213 -0
- package/dist/collector/result-collector.js.map +1 -0
- package/dist/collector/schema.d.ts +34 -0
- package/dist/collector/schema.d.ts.map +1 -0
- package/dist/collector/schema.js +145 -0
- package/dist/collector/schema.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts +10 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js +122 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.js +330 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js +449 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts +11 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js +224 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js +164 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js +386 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -0
- package/dist/executor/__tests__/executor.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/executor.unit.test.js +134 -0
- package/dist/executor/__tests__/executor.unit.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts +12 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js +196 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.d.ts +7 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.js +249 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.js +203 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.js.map +1 -0
- package/dist/executor/checkpoint-manager.d.ts +231 -0
- package/dist/executor/checkpoint-manager.d.ts.map +1 -0
- package/dist/executor/checkpoint-manager.js +395 -0
- package/dist/executor/checkpoint-manager.js.map +1 -0
- package/dist/executor/checkpoint-storage.d.ts +230 -0
- package/dist/executor/checkpoint-storage.d.ts.map +1 -0
- package/dist/executor/checkpoint-storage.js +370 -0
- package/dist/executor/checkpoint-storage.js.map +1 -0
- package/dist/executor/checkpoint-types.d.ts +48 -0
- package/dist/executor/checkpoint-types.d.ts.map +1 -0
- package/dist/executor/checkpoint-types.js +8 -0
- package/dist/executor/checkpoint-types.js.map +1 -0
- package/dist/executor/executor.d.ts +164 -0
- package/dist/executor/executor.d.ts.map +1 -0
- package/dist/executor/executor.js +408 -0
- package/dist/executor/executor.js.map +1 -0
- package/dist/executor/index.d.ts +11 -0
- package/dist/executor/index.d.ts.map +1 -0
- package/dist/executor/index.js +11 -0
- package/dist/executor/index.js.map +1 -0
- package/dist/executor/memory-monitor.d.ts +115 -0
- package/dist/executor/memory-monitor.d.ts.map +1 -0
- package/dist/executor/memory-monitor.js +168 -0
- package/dist/executor/memory-monitor.js.map +1 -0
- package/dist/executor/parallel-executor.d.ts +53 -0
- package/dist/executor/parallel-executor.d.ts.map +1 -0
- package/dist/executor/parallel-executor.js +194 -0
- package/dist/executor/parallel-executor.js.map +1 -0
- package/dist/executor/run-id.d.ts +71 -0
- package/dist/executor/run-id.d.ts.map +1 -0
- package/dist/executor/run-id.js +67 -0
- package/dist/executor/run-id.js.map +1 -0
- package/dist/executor/worker-entry.d.ts +8 -0
- package/dist/executor/worker-entry.d.ts.map +1 -0
- package/dist/executor/worker-entry.js +67 -0
- package/dist/executor/worker-entry.js.map +1 -0
- package/dist/index.cjs +11 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/registry/case-registry.d.ts +113 -0
- package/dist/registry/case-registry.d.ts.map +1 -0
- package/dist/registry/case-registry.js +160 -0
- package/dist/registry/case-registry.js.map +1 -0
- package/dist/registry/index.d.ts +8 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +8 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/registry/sut-registry.d.ts +96 -0
- package/dist/registry/sut-registry.d.ts.map +1 -0
- package/dist/registry/sut-registry.js +126 -0
- package/dist/registry/sut-registry.js.map +1 -0
- package/dist/renderers/index.d.ts +10 -0
- package/dist/renderers/index.d.ts.map +1 -0
- package/dist/renderers/index.js +9 -0
- package/dist/renderers/index.js.map +1 -0
- package/dist/renderers/latex-renderer.d.ts +84 -0
- package/dist/renderers/latex-renderer.d.ts.map +1 -0
- package/dist/renderers/latex-renderer.js +208 -0
- package/dist/renderers/latex-renderer.js.map +1 -0
- package/dist/renderers/types.d.ts +106 -0
- package/dist/renderers/types.d.ts.map +1 -0
- package/dist/renderers/types.js +23 -0
- package/dist/renderers/types.js.map +1 -0
- package/dist/robustness/analyzer.d.ts +61 -0
- package/dist/robustness/analyzer.d.ts.map +1 -0
- package/dist/robustness/analyzer.js +191 -0
- package/dist/robustness/analyzer.js.map +1 -0
- package/dist/robustness/index.d.ts +8 -0
- package/dist/robustness/index.d.ts.map +1 -0
- package/dist/robustness/index.js +8 -0
- package/dist/robustness/index.js.map +1 -0
- package/dist/robustness/perturbations.d.ts +46 -0
- package/dist/robustness/perturbations.d.ts.map +1 -0
- package/dist/robustness/perturbations.js +184 -0
- package/dist/robustness/perturbations.js.map +1 -0
- package/dist/statistical/index.d.ts +8 -0
- package/dist/statistical/index.d.ts.map +1 -0
- package/dist/statistical/index.js +8 -0
- package/dist/statistical/index.js.map +1 -0
- package/dist/statistical/mann-whitney-u.d.ts +62 -0
- package/dist/statistical/mann-whitney-u.d.ts.map +1 -0
- package/dist/statistical/mann-whitney-u.js +127 -0
- package/dist/statistical/mann-whitney-u.js.map +1 -0
- package/dist/types/aggregate.d.ts +124 -0
- package/dist/types/aggregate.d.ts.map +1 -0
- package/dist/types/aggregate.js +9 -0
- package/dist/types/aggregate.js.map +1 -0
- package/dist/types/case.d.ts +105 -0
- package/dist/types/case.d.ts.map +1 -0
- package/dist/types/case.js +10 -0
- package/dist/types/case.js.map +1 -0
- package/dist/types/claims.d.ts +122 -0
- package/dist/types/claims.d.ts.map +1 -0
- package/dist/types/claims.js +14 -0
- package/dist/types/claims.js.map +1 -0
- package/dist/types/index.d.ts +12 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +7 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/perturbation.d.ts +105 -0
- package/dist/types/perturbation.d.ts.map +1 -0
- package/dist/types/perturbation.js +9 -0
- package/dist/types/perturbation.js.map +1 -0
- package/dist/types/result.d.ts +150 -0
- package/dist/types/result.d.ts.map +1 -0
- package/dist/types/result.js +12 -0
- package/dist/types/result.js.map +1 -0
- package/dist/types/sut.d.ts +128 -0
- package/dist/types/sut.d.ts.map +1 -0
- package/dist/types/sut.js +12 -0
- package/dist/types/sut.js.map +1 -0
- package/package.json +283 -7
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregation Pipeline
|
|
3
|
+
*
|
|
4
|
+
* Transforms raw evaluation results into aggregated summaries.
|
|
5
|
+
* This is the core of the Execute -> Aggregate -> Render pipeline.
|
|
6
|
+
*/
|
|
7
|
+
import { computeComparison, computeSummaryStats } from "./aggregators.js";
|
|
8
|
+
/**
|
|
9
|
+
* Default pipeline options.
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULT_OPTIONS = {
|
|
12
|
+
groupByCaseClass: true,
|
|
13
|
+
computeComparisons: true,
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Aggregate evaluation results into summaries.
|
|
17
|
+
*
|
|
18
|
+
* @param results - Raw evaluation results
|
|
19
|
+
* @param options - Aggregation options
|
|
20
|
+
* @returns Aggregated results
|
|
21
|
+
*/
|
|
22
|
+
export const aggregateResults = (results, options = {}) => {
|
|
23
|
+
const options_ = { ...DEFAULT_OPTIONS, ...options };
|
|
24
|
+
const aggregates = [];
|
|
25
|
+
// Group results
|
|
26
|
+
const groups = groupResults(results, options_.groupByCaseClass ?? true);
|
|
27
|
+
// Aggregate each group
|
|
28
|
+
for (const [key, groupResults] of groups) {
|
|
29
|
+
const aggregate = aggregateGroup(key, groupResults, options_.metrics);
|
|
30
|
+
aggregates.push(aggregate);
|
|
31
|
+
}
|
|
32
|
+
// Compute comparisons if enabled
|
|
33
|
+
if (options_.computeComparisons) {
|
|
34
|
+
computeAllComparisons(aggregates, results, options_);
|
|
35
|
+
}
|
|
36
|
+
return aggregates;
|
|
37
|
+
};
|
|
38
|
+
/**
|
|
39
|
+
* Group results by SUT and optionally by case class.
|
|
40
|
+
* @param results
|
|
41
|
+
* @param groupByCaseClass
|
|
42
|
+
*/
|
|
43
|
+
const groupResults = (results, groupByCaseClass) => {
|
|
44
|
+
const groups = new Map();
|
|
45
|
+
for (const result of results) {
|
|
46
|
+
const key = groupByCaseClass && result.run.caseClass
|
|
47
|
+
? `${result.run.sut}::${result.run.caseClass}`
|
|
48
|
+
: result.run.sut;
|
|
49
|
+
const existing = groups.get(key) ?? [];
|
|
50
|
+
existing.push(result);
|
|
51
|
+
groups.set(key, existing);
|
|
52
|
+
}
|
|
53
|
+
return groups;
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Aggregate a single group of results.
|
|
57
|
+
* @param key
|
|
58
|
+
* @param results
|
|
59
|
+
* @param metricNames
|
|
60
|
+
*/
|
|
61
|
+
const aggregateGroup = (key, results, metricNames) => {
|
|
62
|
+
const [sut, caseClass] = key.split("::");
|
|
63
|
+
const firstResult = results[0];
|
|
64
|
+
// Collect all unique cases
|
|
65
|
+
const uniqueCases = new Set(results.map((r) => r.run.caseId));
|
|
66
|
+
// Correctness aggregation
|
|
67
|
+
const validCount = results.filter((r) => r.correctness.valid).length;
|
|
68
|
+
const producedCount = results.filter((r) => r.correctness.producedOutput).length;
|
|
69
|
+
const matchedCount = results.filter((r) => r.correctness.matchesExpected === true).length;
|
|
70
|
+
const hasExpected = results.some((r) => r.correctness.expectedExists);
|
|
71
|
+
// Metric aggregation
|
|
72
|
+
const metricStats = {};
|
|
73
|
+
const allMetricNames = metricNames ?? getAllMetricNames(results);
|
|
74
|
+
for (const metricName of allMetricNames) {
|
|
75
|
+
const values = results
|
|
76
|
+
.map((r) => r.metrics.numeric[metricName])
|
|
77
|
+
.filter((v) => typeof v === "number" && !Number.isNaN(v));
|
|
78
|
+
if (values.length > 0) {
|
|
79
|
+
metricStats[metricName] = computeSummaryStats(values);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// Coverage
|
|
83
|
+
const metricCoverage = {};
|
|
84
|
+
for (const metricName of allMetricNames) {
|
|
85
|
+
const count = results.filter((r) => metricName in r.metrics.numeric).length;
|
|
86
|
+
metricCoverage[metricName] = count / results.length;
|
|
87
|
+
}
|
|
88
|
+
return {
|
|
89
|
+
sut,
|
|
90
|
+
sutRole: firstResult.run.sutRole,
|
|
91
|
+
caseClass,
|
|
92
|
+
group: {
|
|
93
|
+
runCount: results.length,
|
|
94
|
+
caseCount: uniqueCases.size,
|
|
95
|
+
},
|
|
96
|
+
correctness: {
|
|
97
|
+
validRate: results.length > 0 ? validCount / results.length : 0,
|
|
98
|
+
producedOutputRate: results.length > 0 ? producedCount / results.length : 0,
|
|
99
|
+
matchesExpectedRate: hasExpected && results.length > 0 ? matchedCount / results.length : undefined,
|
|
100
|
+
},
|
|
101
|
+
metrics: metricStats,
|
|
102
|
+
coverage: {
|
|
103
|
+
caseCoverage: 1, // Would need total cases to compute properly
|
|
104
|
+
metricCoverage,
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Get all unique metric names from results.
|
|
110
|
+
* @param results
|
|
111
|
+
*/
|
|
112
|
+
const getAllMetricNames = (results) => {
|
|
113
|
+
const names = new Set();
|
|
114
|
+
for (const result of results) {
|
|
115
|
+
for (const name of Object.keys(result.metrics.numeric)) {
|
|
116
|
+
names.add(name);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return [...names];
|
|
120
|
+
};
|
|
121
|
+
/**
|
|
122
|
+
* Compute comparisons between primary and baseline SUTs.
|
|
123
|
+
* @param aggregates
|
|
124
|
+
* @param results
|
|
125
|
+
* @param options
|
|
126
|
+
*/
|
|
127
|
+
const computeAllComparisons = (aggregates, results, options) => {
|
|
128
|
+
// Find primary and baselines
|
|
129
|
+
const primarySut = options.primarySut ?? aggregates.find((a) => a.sutRole === "primary")?.sut;
|
|
130
|
+
const baselineSuts = options.baselineSuts ?? aggregates.filter((a) => a.sutRole === "baseline").map((a) => a.sut);
|
|
131
|
+
if (!primarySut || baselineSuts.length === 0) {
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
// For each primary aggregate, compute comparisons
|
|
135
|
+
const primaryAggregates = aggregates.filter((a) => a.sut === primarySut);
|
|
136
|
+
for (const primaryAgg of primaryAggregates) {
|
|
137
|
+
primaryAgg.comparisons = {};
|
|
138
|
+
for (const baselineSut of baselineSuts) {
|
|
139
|
+
// Find matching baseline aggregate (same case class)
|
|
140
|
+
const baselineAgg = aggregates.find((a) => a.sut === baselineSut && a.caseClass === primaryAgg.caseClass);
|
|
141
|
+
if (!baselineAgg)
|
|
142
|
+
continue;
|
|
143
|
+
// Compute comparison for each shared metric
|
|
144
|
+
const comparisonDeltas = {};
|
|
145
|
+
const comparisonRatios = {};
|
|
146
|
+
for (const metricName of Object.keys(primaryAgg.metrics)) {
|
|
147
|
+
const primaryStats = primaryAgg.metrics[metricName];
|
|
148
|
+
const baselineStats = baselineAgg.metrics[metricName];
|
|
149
|
+
comparisonDeltas[metricName] = primaryStats.mean - baselineStats.mean;
|
|
150
|
+
comparisonRatios[metricName] =
|
|
151
|
+
baselineStats.mean === 0 ? Infinity : primaryStats.mean / baselineStats.mean;
|
|
152
|
+
}
|
|
153
|
+
// Get raw results for detailed comparison (matched by case ID)
|
|
154
|
+
const primaryResults = results.filter((r) => r.run.sut === primarySut && r.run.caseClass === primaryAgg.caseClass);
|
|
155
|
+
const baselineResults = results.filter((r) => r.run.sut === baselineSut && r.run.caseClass === primaryAgg.caseClass);
|
|
156
|
+
// Compute statistical comparison using first shared metric
|
|
157
|
+
const sharedMetrics = Object.keys(primaryAgg.metrics).filter((m) => m in baselineAgg.metrics);
|
|
158
|
+
let comparisonMetrics;
|
|
159
|
+
if (sharedMetrics.length > 0) {
|
|
160
|
+
const metricName = sharedMetrics[0];
|
|
161
|
+
comparisonMetrics = computeComparison(primaryResults, baselineResults, metricName);
|
|
162
|
+
}
|
|
163
|
+
// Merge per-metric deltas with statistical metrics
|
|
164
|
+
primaryAgg.comparisons[baselineSut] = {
|
|
165
|
+
deltas: comparisonDeltas,
|
|
166
|
+
ratios: comparisonRatios,
|
|
167
|
+
betterRate: comparisonMetrics?.betterRate,
|
|
168
|
+
uStatistic: comparisonMetrics?.uStatistic,
|
|
169
|
+
pValue: comparisonMetrics?.pValue,
|
|
170
|
+
effectSize: comparisonMetrics?.effectSize,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
/**
|
|
176
|
+
* Create a full aggregation output document.
|
|
177
|
+
* @param aggregates
|
|
178
|
+
* @param results
|
|
179
|
+
*/
|
|
180
|
+
export const createAggregationOutput = (aggregates, results) => {
|
|
181
|
+
const uniqueSuts = [...new Set(aggregates.map((a) => a.sut))];
|
|
182
|
+
const uniqueCaseClasses = [
|
|
183
|
+
...new Set(aggregates.map((a) => a.caseClass).filter((c) => c !== undefined)),
|
|
184
|
+
];
|
|
185
|
+
const uniqueCases = [...new Set(results.map((r) => r.run.caseId))];
|
|
186
|
+
return {
|
|
187
|
+
version: "1.0.0",
|
|
188
|
+
timestamp: new Date().toISOString(),
|
|
189
|
+
aggregates,
|
|
190
|
+
metadata: {
|
|
191
|
+
totalRuns: results.length,
|
|
192
|
+
totalCases: uniqueCases.length,
|
|
193
|
+
sutsIncluded: uniqueSuts,
|
|
194
|
+
caseClassesIncluded: uniqueCaseClasses.length > 0 ? uniqueCaseClasses : undefined,
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
};
|
|
198
|
+
//# sourceMappingURL=pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/aggregation/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AASH,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAsB1E;;GAEG;AACH,MAAM,eAAe,GAA+B;IACnD,gBAAgB,EAAE,IAAI;IACtB,kBAAkB,EAAE,IAAI;CACxB,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAC/B,OAA2B,EAC3B,UAAsC,EAAE,EACnB,EAAE;IACvB,MAAM,QAAQ,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IACpD,MAAM,UAAU,GAAuB,EAAE,CAAC;IAE1C,gBAAgB;IAChB,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,gBAAgB,IAAI,IAAI,CAAC,CAAC;IAExE,uBAAuB;IACvB,KAAK,MAAM,CAAC,GAAG,EAAE,YAAY,CAAC,IAAI,MAAM,EAAE,CAAC;QAC1C,MAAM,SAAS,GAAG,cAAc,CAAC,GAAG,EAAE,YAAY,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;QACtE,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC5B,CAAC;IAED,iCAAiC;IACjC,IAAI,QAAQ,CAAC,kBAAkB,EAAE,CAAC;QACjC,qBAAqB,CAAC,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,UAAU,CAAC;AACnB,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,YAAY,GAAG,CACpB,OAA2B,EAC3B,gBAAyB,EACS,EAAE;IACpC,MAAM,MAAM,GAAG,IAAI,GAAG,EAA8B,CAAC;IAErD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,MAAM,GAAG,GACR,gBAAgB,IAAI,MAAM,CAAC,GAAG,CAAC,SAAS;YACvC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,KAAK,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE;YAC9C,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC;QAEnB,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC,CAAC;AAEF;;;;;GAKG;AACH,MAAM,cAAc,GAAG,CACtB,GAAW,EACX,OAA2B,EAC3B,WAAsB,EACH,EAAE;IACrB,MAAM,CAAC,GAAG,EAAE,SAAS,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAE/B,2BAA2B;IAC3B,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IAE9D,0BAA0B;IAC1B,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;IACrE,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC,MAAM,CAAC;IACjF,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,eAAe,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC;IAC1F,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;IAEtE,qBAAqB;IACrB,MAAM,WAAW,GAAiC,EAAE,CAAC;IACrD,MAAM,cAAc,GAAG,WAAW,IAAI,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAEjE,KAAK,MAAM,UAAU,IAAI,cAAc,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,OAAO;aACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;aACzC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAE3D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,WAAW,CAAC,UAAU,CAAC,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC;QACvD,CAAC;IACF,CAAC;IAED,WAAW;IACX,MAAM,cAAc,GAA2B,EAAE,CAAC;IAClD,KAAK,MAAM,UAAU,IAAI,cAAc,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,IAAI,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAC5E,cAAc,CAAC,UAAU,CAAC,GAAG,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IACrD,CAAC;IAED,OAAO;QACN,GAAG;QACH,OAAO,EAAE,WAAW,CAAC,GAAG,CAAC,OAAO;QAChC,SAAS;QACT,KAAK,EAAE;YACN,QAAQ,EAAE,OAAO,CAAC,MAAM;YACxB,SAAS,EAAE,WAAW,CAAC,IAAI;SAC3B;QACD,WAAW,EAAE;YACZ,SAAS,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC/D,kBAAkB,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC3E,mBAAmB,EAClB,WAAW,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;SAC9E;QACD,OAAO,EAAE,WAAW;QACpB,QAAQ,EAAE;YACT,YAAY,EAAE,CAAC,EAAE,6CAA6C;YAC9D,cAAc;SACd;KACD,CAAC;AACH,CAAC,CAAC;AAEF;;;GAGG;AACH,MAAM,iBAAiB,GAAG,CAAC,OAA2B,EAAY,EAAE;IACnE,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YACxD,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACjB,CAAC;IACF,CAAC;IACD,OAAO,CAAC,GAAG,KAAK,CAAC,CAAC;AACnB,CAAC,CAAC;AAEF;;;;;GAKG;AACH,MAAM,qBAAqB,GAAG,CAC7B,UAA8B,EAC9B,OAA2B,EAC3B,OAAmC,EAC5B,EAAE;IACT,6BAA6B;IAC7B,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,EAAE,GAAG,CAAC;IAE9F,MAAM,YAAY,GACjB,OAAO,CAAC,YAAY,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAE9F,IAAI,CAAC,UAAU,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9C,OAAO;IACR,CAAC;IAED,kDAAkD;IAClD,MAAM,iBAAiB,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,UAAU,CAAC,CAAC;IAEzE,KAAK,MAAM,UAAU,IAAI,iBAAiB,EAAE,CAAC;QAC5C,UAAU,CAAC,WAAW,GAAG,EAAE,CAAC;QAE5B,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE,CAAC;YACxC,qDAAqD;YACrD,MAAM,WAAW,GAAG,UAAU,CAAC,IAAI,CAClC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,WAAW,IAAI,CAAC,CAAC,SAAS,KAAK,UAAU,CAAC,SAAS,CACpE,CAAC;YAEF,IAAI,CAAC,WAAW;gBAAE,SAAS;YAE3B,4CAA4C;YAC5C,MAAM,gBAAgB,GAA2B,EAAE,CAAC;YACpD,MAAM,gBAAgB,GAA2B,EAAE,CAAC;YAEpD,KAAK,MAAM,UAAU,IAAI,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1D,MAAM,YAAY,GAAG,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;gBACpD,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;gBAEtD,gBAAgB,CAAC,UAAU,CAAC,GAAG,YAAY,CAAC,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC;gBACtE,gBAAgB,CAAC,UAAU,CAAC;oBAC3B,aAAa,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC;YAC/E,CAAC;YAED,+DAA+D;YAC/D,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CACpC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,KAAK,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC,SAAS,KAAK,UAAU,CAAC,SAAS,CAC3E,CAAC;YACF,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CACrC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,KAAK,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC,SAAS,KAAK,UAAU,CAAC,SAAS,CAC5E,CAAC;YAEF,2DAA2D;YAC3D,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC;YAE9F,IAAI,iBAAgD,CAAC;YACrD,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,UAAU,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;gBACpC,iBAAiB,GAAG,iBAAiB,CAAC,cAAc,EAAE,eAAe,EAAE,UAAU,CAAC,CAAC;YACpF,CAAC;YAED,mDAAmD;YACnD,UAAU,CAAC,WAAW,CAAC,WAAW,CAAC,GAAG;gBACrC,MAAM,EAAE,gBAAgB;gBACxB,MAAM,EAAE,gBAAgB;gBACxB,UAAU,EAAE,iBAAiB,EAAE,UAAU;gBACzC,UAAU,EAAE,iBAAiB,EAAE,UAAU;gBACzC,MAAM,EAAE,iBAAiB,EAAE,MAAM;gBACjC,UAAU,EAAE,iBAAiB,EAAE,UAAU;aACzC,CAAC;QACH,CAAC;IACF,CAAC;AACF,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,CACtC,UAA8B,EAC9B,OAA2B,EACP,EAAE;IACtB,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9D,MAAM,iBAAiB,GAAG;QACzB,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;KAC1F,CAAC;IACF,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAEnE,OAAO;QACN,OAAO,EAAE,OAAO;QAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,UAAU;QACV,QAAQ,EAAE;YACT,SAAS,EAAE,OAAO,CAAC,MAAM;YACzB,UAAU,EAAE,WAAW,CAAC,MAAM;YAC9B,YAAY,EAAE,UAAU;YACxB,mBAAmB,EAAE,iBAAiB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,SAAS;SACjF;KACD,CAAC;AACH,CAAC,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claims Evaluator
|
|
3
|
+
*
|
|
4
|
+
* Evaluates explicit hypotheses (claims) against aggregated results.
|
|
5
|
+
* This enables claim-driven evaluation where experiments test specific
|
|
6
|
+
* hypotheses rather than collect arbitrary metrics.
|
|
7
|
+
*/
|
|
8
|
+
import type { AggregatedResult } from "../types/aggregate.js";
|
|
9
|
+
import type { ClaimEvaluation, ClaimEvaluationSummary, EvaluationClaim } from "../types/claims.js";
|
|
10
|
+
/**
|
|
11
|
+
* Evaluate a single claim against aggregated results.
|
|
12
|
+
*
|
|
13
|
+
* @param claim - The claim to evaluate
|
|
14
|
+
* @param aggregates - Aggregated results from the pipeline
|
|
15
|
+
* @returns Claim evaluation with status and evidence
|
|
16
|
+
*/
|
|
17
|
+
export declare const evaluateClaim: (claim: EvaluationClaim, aggregates: AggregatedResult[]) => ClaimEvaluation;
|
|
18
|
+
/**
|
|
19
|
+
* Evaluate multiple claims against aggregated results.
|
|
20
|
+
*
|
|
21
|
+
* @param claims - Claims to evaluate
|
|
22
|
+
* @param aggregates - Aggregated results
|
|
23
|
+
* @returns Array of claim evaluations
|
|
24
|
+
*/
|
|
25
|
+
export declare const evaluateClaims: (claims: EvaluationClaim[], aggregates: AggregatedResult[]) => ClaimEvaluation[];
|
|
26
|
+
/**
|
|
27
|
+
* Create a claim evaluation summary.
|
|
28
|
+
*
|
|
29
|
+
* @param evaluations - Completed claim evaluations
|
|
30
|
+
* @returns Summary with counts and rates
|
|
31
|
+
*/
|
|
32
|
+
export declare const createClaimSummary: (evaluations: ClaimEvaluation[]) => ClaimEvaluationSummary;
|
|
33
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../src/claims/evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,OAAO,KAAK,EACX,eAAe,EACf,sBAAsB,EAGtB,eAAe,EACf,MAAM,oBAAoB,CAAC;AAE5B;;;;;;GAMG;AACH,eAAO,MAAM,aAAa,GACzB,OAAO,eAAe,EACtB,YAAY,gBAAgB,EAAE,KAC5B,eA4DF,CAAC;AAsGF;;;;;;GAMG;AACH,eAAO,MAAM,cAAc,GAC1B,QAAQ,eAAe,EAAE,EACzB,YAAY,gBAAgB,EAAE,KAC5B,eAAe,EAA6D,CAAC;AAEhF;;;;;GAKG;AACH,eAAO,MAAM,kBAAkB,GAAI,aAAa,eAAe,EAAE,KAAG,sBAoBnE,CAAC"}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claims Evaluator
|
|
3
|
+
*
|
|
4
|
+
* Evaluates explicit hypotheses (claims) against aggregated results.
|
|
5
|
+
* This enables claim-driven evaluation where experiments test specific
|
|
6
|
+
* hypotheses rather than collect arbitrary metrics.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Evaluate a single claim against aggregated results.
|
|
10
|
+
*
|
|
11
|
+
* @param claim - The claim to evaluate
|
|
12
|
+
* @param aggregates - Aggregated results from the pipeline
|
|
13
|
+
* @returns Claim evaluation with status and evidence
|
|
14
|
+
*/
|
|
15
|
+
export const evaluateClaim = (claim, aggregates) => {
|
|
16
|
+
// Filter aggregates by scope constraints
|
|
17
|
+
const filteredAggregates = filterByScope(aggregates, claim);
|
|
18
|
+
// Find primary and baseline aggregates
|
|
19
|
+
const primaryAgg = filteredAggregates.find((a) => a.sut === claim.sut);
|
|
20
|
+
const baselineAgg = filteredAggregates.find((a) => a.sut === claim.baseline);
|
|
21
|
+
// Handle missing data
|
|
22
|
+
if (!primaryAgg || !baselineAgg) {
|
|
23
|
+
return createInconclusiveResult(claim, primaryAgg ? undefined : "Primary SUT not found", baselineAgg ? undefined : "Baseline SUT not found");
|
|
24
|
+
}
|
|
25
|
+
// Get metric values
|
|
26
|
+
const primaryMetric = claim.metric;
|
|
27
|
+
const baselineMetric = claim.metric;
|
|
28
|
+
const primaryStats = primaryAgg.metrics[primaryMetric];
|
|
29
|
+
const baselineStats = baselineAgg.metrics[baselineMetric];
|
|
30
|
+
if (!(primaryMetric in primaryAgg.metrics) || !(baselineMetric in baselineAgg.metrics)) {
|
|
31
|
+
return createInconclusiveResult(claim, "Metric not found in primary results", "Metric not found in baseline results");
|
|
32
|
+
}
|
|
33
|
+
// Compute evidence
|
|
34
|
+
const primaryValue = primaryStats.mean;
|
|
35
|
+
const baselineValue = baselineStats.mean;
|
|
36
|
+
const delta = primaryValue - baselineValue;
|
|
37
|
+
const ratio = baselineValue === 0 ? Infinity : primaryValue / baselineValue;
|
|
38
|
+
// Get statistical significance if available
|
|
39
|
+
const comparison = primaryAgg.comparisons?.[claim.baseline];
|
|
40
|
+
const pValue = comparison?.pValue;
|
|
41
|
+
const effectSize = comparison?.effectSize;
|
|
42
|
+
const evidence = {
|
|
43
|
+
primaryValue,
|
|
44
|
+
baselineValue,
|
|
45
|
+
delta,
|
|
46
|
+
ratio,
|
|
47
|
+
pValue,
|
|
48
|
+
effectSize,
|
|
49
|
+
n: primaryStats.n + baselineStats.n,
|
|
50
|
+
};
|
|
51
|
+
// Determine claim status
|
|
52
|
+
const status = determineClaimStatus(claim, evidence);
|
|
53
|
+
return {
|
|
54
|
+
claim,
|
|
55
|
+
status,
|
|
56
|
+
evidence,
|
|
57
|
+
};
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Filter aggregates by claim scope constraints.
|
|
61
|
+
* @param aggregates
|
|
62
|
+
* @param claim
|
|
63
|
+
*/
|
|
64
|
+
const filterByScope = (aggregates, claim) => {
|
|
65
|
+
if (!claim.scopeConstraints) {
|
|
66
|
+
return aggregates;
|
|
67
|
+
}
|
|
68
|
+
return aggregates.filter((agg) => {
|
|
69
|
+
for (const [key, value] of Object.entries(claim.scopeConstraints ?? {})) {
|
|
70
|
+
if (key === "caseClass") {
|
|
71
|
+
const allowedClasses = Array.isArray(value) ? value : [value];
|
|
72
|
+
if (!allowedClasses.includes(agg.caseClass)) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
// Add more scope constraint checks as needed
|
|
77
|
+
}
|
|
78
|
+
return true;
|
|
79
|
+
});
|
|
80
|
+
};
|
|
81
|
+
/**
|
|
82
|
+
* Create an inconclusive result with reasons.
|
|
83
|
+
* @param claim
|
|
84
|
+
* @param reasons
|
|
85
|
+
*/
|
|
86
|
+
const createInconclusiveResult = (claim, ...reasons) => {
|
|
87
|
+
const validReasons = reasons.filter((r) => r !== undefined);
|
|
88
|
+
return {
|
|
89
|
+
claim,
|
|
90
|
+
status: "inconclusive",
|
|
91
|
+
evidence: {
|
|
92
|
+
primaryValue: Number.NaN,
|
|
93
|
+
baselineValue: Number.NaN,
|
|
94
|
+
delta: Number.NaN,
|
|
95
|
+
ratio: Number.NaN,
|
|
96
|
+
},
|
|
97
|
+
inconclusiveReason: validReasons.join("; "),
|
|
98
|
+
};
|
|
99
|
+
};
|
|
100
|
+
/**
|
|
101
|
+
* Determine claim status based on evidence.
|
|
102
|
+
* @param claim
|
|
103
|
+
* @param evidence
|
|
104
|
+
*/
|
|
105
|
+
const determineClaimStatus = (claim, evidence) => {
|
|
106
|
+
// Check for missing data
|
|
107
|
+
if (Number.isNaN(evidence.primaryValue) || Number.isNaN(evidence.baselineValue)) {
|
|
108
|
+
return "inconclusive";
|
|
109
|
+
}
|
|
110
|
+
// Check statistical significance if required
|
|
111
|
+
const significanceLevel = claim.significanceLevel ?? 0.05;
|
|
112
|
+
if (evidence.pValue !== undefined && evidence.pValue > significanceLevel) {
|
|
113
|
+
return "inconclusive";
|
|
114
|
+
}
|
|
115
|
+
// Check minimum effect size if required
|
|
116
|
+
if (claim.minEffectSize !== undefined &&
|
|
117
|
+
evidence.effectSize !== undefined &&
|
|
118
|
+
Math.abs(evidence.effectSize) < claim.minEffectSize) {
|
|
119
|
+
return "inconclusive";
|
|
120
|
+
}
|
|
121
|
+
// Evaluate direction
|
|
122
|
+
switch (claim.direction) {
|
|
123
|
+
case "greater": {
|
|
124
|
+
if (claim.threshold !== undefined) {
|
|
125
|
+
return evidence.delta >= claim.threshold ? "satisfied" : "violated";
|
|
126
|
+
}
|
|
127
|
+
return evidence.delta > 0 ? "satisfied" : "violated";
|
|
128
|
+
}
|
|
129
|
+
case "less": {
|
|
130
|
+
if (claim.threshold !== undefined) {
|
|
131
|
+
return evidence.delta <= -claim.threshold ? "satisfied" : "violated";
|
|
132
|
+
}
|
|
133
|
+
return evidence.delta < 0 ? "satisfied" : "violated";
|
|
134
|
+
}
|
|
135
|
+
case "equal": {
|
|
136
|
+
const epsilon = claim.threshold ?? 0.001;
|
|
137
|
+
return Math.abs(evidence.delta) <= epsilon ? "satisfied" : "violated";
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
/**
|
|
142
|
+
* Evaluate multiple claims against aggregated results.
|
|
143
|
+
*
|
|
144
|
+
* @param claims - Claims to evaluate
|
|
145
|
+
* @param aggregates - Aggregated results
|
|
146
|
+
* @returns Array of claim evaluations
|
|
147
|
+
*/
|
|
148
|
+
export const evaluateClaims = (claims, aggregates) => claims.map((claim) => evaluateClaim(claim, aggregates));
|
|
149
|
+
/**
|
|
150
|
+
* Create a claim evaluation summary.
|
|
151
|
+
*
|
|
152
|
+
* @param evaluations - Completed claim evaluations
|
|
153
|
+
* @returns Summary with counts and rates
|
|
154
|
+
*/
|
|
155
|
+
export const createClaimSummary = (evaluations) => {
|
|
156
|
+
const satisfied = evaluations.filter((e) => e.status === "satisfied").length;
|
|
157
|
+
const violated = evaluations.filter((e) => e.status === "violated").length;
|
|
158
|
+
const inconclusive = evaluations.filter((e) => e.status === "inconclusive").length;
|
|
159
|
+
const definitive = satisfied + violated;
|
|
160
|
+
const satisfactionRate = definitive > 0 ? satisfied / definitive : 0;
|
|
161
|
+
return {
|
|
162
|
+
version: "1.0.0",
|
|
163
|
+
timestamp: new Date().toISOString(),
|
|
164
|
+
evaluations,
|
|
165
|
+
summary: {
|
|
166
|
+
total: evaluations.length,
|
|
167
|
+
satisfied,
|
|
168
|
+
violated,
|
|
169
|
+
inconclusive,
|
|
170
|
+
satisfactionRate,
|
|
171
|
+
},
|
|
172
|
+
};
|
|
173
|
+
};
|
|
174
|
+
//# sourceMappingURL=evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../src/claims/evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAYH;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,CAC5B,KAAsB,EACtB,UAA8B,EACZ,EAAE;IACpB,yCAAyC;IACzC,MAAM,kBAAkB,GAAG,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;IAE5D,uCAAuC;IACvC,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,GAAG,CAAC,CAAC;IACvE,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,QAAQ,CAAC,CAAC;IAE7E,sBAAsB;IACtB,IAAI,CAAC,UAAU,IAAI,CAAC,WAAW,EAAE,CAAC;QACjC,OAAO,wBAAwB,CAC9B,KAAK,EACL,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,uBAAuB,EAChD,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,wBAAwB,CAClD,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC;IACnC,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC;IACpC,MAAM,YAAY,GAAG,UAAU,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IACvD,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;IAE1D,IAAI,CAAC,CAAC,aAAa,IAAI,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,cAAc,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACxF,OAAO,wBAAwB,CAC9B,KAAK,EACL,qCAAqC,EACrC,sCAAsC,CACtC,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC;IACvC,MAAM,aAAa,GAAG,aAAa,CAAC,IAAI,CAAC;IACzC,MAAM,KAAK,GAAG,YAAY,GAAG,aAAa,CAAC;IAC3C,MAAM,KAAK,GAAG,aAAa,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,GAAG,aAAa,CAAC;IAE5E,4CAA4C;IAC5C,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC5D,MAAM,MAAM,GAAG,UAAU,EAAE,MAAM,CAAC;IAClC,MAAM,UAAU,GAAG,UAAU,EAAE,UAAU,CAAC;IAE1C,MAAM,QAAQ,GAAkB;QAC/B,YAAY;QACZ,aAAa;QACb,KAAK;QACL,KAAK;QACL,MAAM;QACN,UAAU;QACV,CAAC,EAAE,YAAY,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC;KACnC,CAAC;IAEF,yBAAyB;IACzB,MAAM,MAAM,GAAG,oBAAoB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAErD,OAAO;QACN,KAAK;QACL,MAAM;QACN,QAAQ;KACR,CAAC;AACH,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,aAAa,GAAG,CACrB,UAA8B,EAC9B,KAAsB,EACD,EAAE;IACvB,IAAI,CAAC,KAAK,CAAC,gBAAgB,EAAE,CAAC;QAC7B,OAAO,UAAU,CAAC;IACnB,CAAC;IAED,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;QAChC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;YACzE,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,cAAc,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;gBAC9D,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAsB,CAAC,EAAE,CAAC;oBAC1D,OAAO,KAAK,CAAC;gBACd,CAAC;YACF,CAAC;YACD,6CAA6C;QAC9C,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,wBAAwB,GAAG,CAChC,KAAsB,EACtB,GAAG,OAA+B,EAChB,EAAE;IACpB,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;IAEzE,OAAO;QACN,KAAK;QACL,MAAM,EAAE,cAAc;QACtB,QAAQ,EAAE;YACT,YAAY,EAAE,MAAM,CAAC,GAAG;YACxB,aAAa,EAAE,MAAM,CAAC,GAAG;YACzB,KAAK,EAAE,MAAM,CAAC,GAAG;YACjB,KAAK,EAAE,MAAM,CAAC,GAAG;SACjB;QACD,kBAAkB,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;KAC3C,CAAC;AACH,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,oBAAoB,GAAG,CAAC,KAAsB,EAAE,QAAuB,EAAe,EAAE;IAC7F,yBAAyB;IACzB,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QACjF,OAAO,cAAc,CAAC;IACvB,CAAC;IAED,6CAA6C;IAC7C,MAAM,iBAAiB,GAAG,KAAK,CAAC,iBAAiB,IAAI,IAAI,CAAC;IAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;QAC1E,OAAO,cAAc,CAAC;IACvB,CAAC;IAED,wCAAwC;IACxC,IACC,KAAK,CAAC,aAAa,KAAK,SAAS;QACjC,QAAQ,CAAC,UAAU,KAAK,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,KAAK,CAAC,aAAa,EAClD,CAAC;QACF,OAAO,cAAc,CAAC;IACvB,CAAC;IAED,qBAAqB;IACrB,QAAQ,KAAK,CAAC,SAAS,EAAE,CAAC;QACzB,KAAK,SAAS,CAAC,CAAC,CAAC;YAChB,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;gBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACrE,CAAC;YACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;QACtD,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACb,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;gBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACtE,CAAC;YACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;QACtD,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACd,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC;YACzC,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;QACvE,CAAC;IACF,CAAC;AACF,CAAC,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG,CAC7B,MAAyB,EACzB,UAA8B,EACV,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC;AAEhF;;;;;GAKG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,WAA8B,EAA0B,EAAE;IAC5F,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;IAC7E,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC3E,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;IAEnF,MAAM,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;IACxC,MAAM,gBAAgB,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAErE,OAAO;QACN,OAAO,EAAE,OAAO;QAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,WAAW;QACX,OAAO,EAAE;YACR,KAAK,EAAE,WAAW,CAAC,MAAM;YACzB,SAAS;YACT,QAAQ;YACR,YAAY;YACZ,gBAAgB;SAChB;KACD,CAAC;AACH,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/claims/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/claims/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Collector Module
|
|
3
|
+
*
|
|
4
|
+
* Re-exports collector components.
|
|
5
|
+
*/
|
|
6
|
+
export { type AggregationOptions, ResultCollector, resultCollector, type ResultFilter, type ValidationError, } from "./result-collector.js";
|
|
7
|
+
export { deepFreeze, type SchemaValidation, validateCase, validateResult, validateSutRegistration, } from "./schema.js";
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/collector/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EACN,KAAK,kBAAkB,EACvB,eAAe,EACf,eAAe,EACf,KAAK,YAAY,EACjB,KAAK,eAAe,GACpB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACN,UAAU,EACV,KAAK,gBAAgB,EACrB,YAAY,EACZ,cAAc,EACd,uBAAuB,GACvB,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Collector Module
|
|
3
|
+
*
|
|
4
|
+
* Re-exports collector components.
|
|
5
|
+
*/
|
|
6
|
+
export { ResultCollector, resultCollector, } from "./result-collector.js";
|
|
7
|
+
export { deepFreeze, validateCase, validateResult, validateSutRegistration, } from "./schema.js";
|
|
8
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/collector/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAEN,eAAe,EACf,eAAe,GAGf,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACN,UAAU,EAEV,YAAY,EACZ,cAAc,EACd,uBAAuB,GACvB,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Result Collector
|
|
3
|
+
*
|
|
4
|
+
* Collects, validates, and stores evaluation results.
|
|
5
|
+
* Replaces the simple MetricsCollector with a schema-aware implementation.
|
|
6
|
+
*/
|
|
7
|
+
import type { Primitive } from "../types/case.js";
|
|
8
|
+
import type { EvaluationResult, ResultBatch } from "../types/result.js";
|
|
9
|
+
import type { SutRole } from "../types/sut.js";
|
|
10
|
+
/**
|
|
11
|
+
* Filter criteria for querying results.
|
|
12
|
+
*/
|
|
13
|
+
export interface ResultFilter {
|
|
14
|
+
/** Filter by SUT ID */
|
|
15
|
+
sut?: string;
|
|
16
|
+
/** Filter by SUT role */
|
|
17
|
+
sutRole?: SutRole;
|
|
18
|
+
/** Filter by case ID */
|
|
19
|
+
caseId?: string;
|
|
20
|
+
/** Filter by case class */
|
|
21
|
+
caseClass?: string;
|
|
22
|
+
/** Filter by validity */
|
|
23
|
+
valid?: boolean;
|
|
24
|
+
/** Filter by metric presence */
|
|
25
|
+
hasMetric?: string;
|
|
26
|
+
/** Custom predicate */
|
|
27
|
+
predicate?: (result: EvaluationResult) => boolean;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Aggregation options.
|
|
31
|
+
*/
|
|
32
|
+
export interface AggregationOptions {
|
|
33
|
+
/** Group by SUT */
|
|
34
|
+
groupBySut?: boolean;
|
|
35
|
+
/** Group by case class */
|
|
36
|
+
groupByCaseClass?: boolean;
|
|
37
|
+
/** Metrics to aggregate */
|
|
38
|
+
metrics?: string[];
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Schema validation error.
|
|
42
|
+
*/
|
|
43
|
+
export interface ValidationError {
|
|
44
|
+
field: string;
|
|
45
|
+
message: string;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Result collector with schema validation and querying.
|
|
49
|
+
*/
|
|
50
|
+
export declare class ResultCollector {
|
|
51
|
+
private results;
|
|
52
|
+
private readonly schemaVersion;
|
|
53
|
+
/**
|
|
54
|
+
* Record a single result with validation.
|
|
55
|
+
*
|
|
56
|
+
* @param result - Result to record
|
|
57
|
+
* @throws Error if result fails validation
|
|
58
|
+
*/
|
|
59
|
+
record(result: EvaluationResult): void;
|
|
60
|
+
/**
|
|
61
|
+
* Record multiple results.
|
|
62
|
+
*
|
|
63
|
+
* @param results - Results to record
|
|
64
|
+
*/
|
|
65
|
+
recordBatch(results: EvaluationResult[]): void;
|
|
66
|
+
/**
|
|
67
|
+
* Validate a result against the schema.
|
|
68
|
+
*
|
|
69
|
+
* @param result - Result to validate
|
|
70
|
+
* @returns Array of validation errors (empty if valid)
|
|
71
|
+
*/
|
|
72
|
+
validate(result: EvaluationResult): ValidationError[];
|
|
73
|
+
/**
|
|
74
|
+
* Query results with filters.
|
|
75
|
+
*
|
|
76
|
+
* @param filter - Filter criteria
|
|
77
|
+
* @returns Matching results
|
|
78
|
+
*/
|
|
79
|
+
query(filter?: ResultFilter): EvaluationResult[];
|
|
80
|
+
/**
|
|
81
|
+
* Get all results for a specific SUT.
|
|
82
|
+
*
|
|
83
|
+
* @param sutId - SUT identifier
|
|
84
|
+
* @returns Results for that SUT
|
|
85
|
+
*/
|
|
86
|
+
getBySut(sutId: string): EvaluationResult[];
|
|
87
|
+
/**
|
|
88
|
+
* Get all results for a specific case class.
|
|
89
|
+
*
|
|
90
|
+
* @param caseClass - Case class
|
|
91
|
+
* @returns Results for that case class
|
|
92
|
+
*/
|
|
93
|
+
getByCaseClass(caseClass: string): EvaluationResult[];
|
|
94
|
+
/**
|
|
95
|
+
* Get unique SUT IDs in the collection.
|
|
96
|
+
*/
|
|
97
|
+
getUniqueSuts(): string[];
|
|
98
|
+
/**
|
|
99
|
+
* Get unique case classes in the collection.
|
|
100
|
+
*/
|
|
101
|
+
getUniqueCaseClasses(): string[];
|
|
102
|
+
/**
|
|
103
|
+
* Get unique metric names in the collection.
|
|
104
|
+
*/
|
|
105
|
+
getUniqueMetrics(): string[];
|
|
106
|
+
/**
|
|
107
|
+
* Get all results.
|
|
108
|
+
*/
|
|
109
|
+
getAll(): EvaluationResult[];
|
|
110
|
+
/**
|
|
111
|
+
* Get result count.
|
|
112
|
+
*/
|
|
113
|
+
get count(): number;
|
|
114
|
+
/**
|
|
115
|
+
* Check if empty.
|
|
116
|
+
*/
|
|
117
|
+
get isEmpty(): boolean;
|
|
118
|
+
/**
|
|
119
|
+
* Clear all results.
|
|
120
|
+
*/
|
|
121
|
+
clear(): void;
|
|
122
|
+
/**
|
|
123
|
+
* Serialize to ResultBatch format.
|
|
124
|
+
*
|
|
125
|
+
* @param metadata - Optional batch metadata
|
|
126
|
+
* @returns Serializable batch
|
|
127
|
+
*/
|
|
128
|
+
serialize(metadata?: Record<string, Primitive>): ResultBatch;
|
|
129
|
+
/**
|
|
130
|
+
* Load from a ResultBatch.
|
|
131
|
+
*
|
|
132
|
+
* @param batch - Batch to load
|
|
133
|
+
* @param append - Whether to append to existing results
|
|
134
|
+
*/
|
|
135
|
+
load(batch: ResultBatch, append?: boolean): void;
|
|
136
|
+
/**
|
|
137
|
+
* Extract a specific metric across all results.
|
|
138
|
+
*
|
|
139
|
+
* @param metricName - Metric to extract
|
|
140
|
+
* @returns Array of { runId, value } pairs
|
|
141
|
+
*/
|
|
142
|
+
extractMetric(metricName: string): {
|
|
143
|
+
runId: string;
|
|
144
|
+
value: number;
|
|
145
|
+
}[];
|
|
146
|
+
/**
|
|
147
|
+
* Get metric values for a specific SUT.
|
|
148
|
+
*
|
|
149
|
+
* @param sutId - SUT identifier
|
|
150
|
+
* @param metricName - Metric name
|
|
151
|
+
* @returns Array of metric values
|
|
152
|
+
*/
|
|
153
|
+
getMetricValues(sutId: string, metricName: string): number[];
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Global result collector instance.
|
|
157
|
+
*/
|
|
158
|
+
export declare const resultCollector: ResultCollector;
|
|
159
|
+
//# sourceMappingURL=result-collector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"result-collector.d.ts","sourceRoot":"","sources":["../../src/collector/result-collector.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,KAAK,EAAE,gBAAgB,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACxE,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAE/C;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,uBAAuB;IACvB,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,yBAAyB;IACzB,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB,wBAAwB;IACxB,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,yBAAyB;IACzB,KAAK,CAAC,EAAE,OAAO,CAAC;IAEhB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,uBAAuB;IACvB,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,OAAO,CAAC;CAClD;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,mBAAmB;IACnB,UAAU,CAAC,EAAE,OAAO,CAAC;IAErB,0BAA0B;IAC1B,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAE3B,2BAA2B;IAC3B,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,qBAAa,eAAe;IAC3B,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAW;IAEzC;;;;;OAKG;IACH,MAAM,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAUtC;;;;OAIG;IACH,WAAW,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,IAAI;IAM9C;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,EAAE,gBAAgB,GAAG,eAAe,EAAE;IAyBrD;;;;;OAKG;IACH,KAAK,CAAC,MAAM,GAAE,YAAiB,GAAG,gBAAgB,EAAE;IAapD;;;;;OAKG;IACH,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,gBAAgB,EAAE;IAI3C;;;;;OAKG;IACH,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,gBAAgB,EAAE;IAIrD;;OAEG;IACH,aAAa,IAAI,MAAM,EAAE;IAIzB;;OAEG;IACH,oBAAoB,IAAI,MAAM,EAAE;IAOhC;;OAEG;IACH,gBAAgB,IAAI,MAAM,EAAE;IAU5B;;OAEG;IACH,MAAM,IAAI,gBAAgB,EAAE;IAI5B;;OAEG;IACH,IAAI,KAAK,IAAI,MAAM,CAElB;IAED;;OAEG;IACH,IAAI,OAAO,IAAI,OAAO,CAErB;IAED;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;;;;OAKG;IACH,SAAS,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,GAAG,WAAW;IAS5D;;;;;OAKG;IACH,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,UAAQ,GAAG,IAAI;IAO9C;;;;;OAKG;IACH,aAAa,CAAC,UAAU,EAAE,MAAM,GAAG;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,EAAE;IASrE;;;;;;OAMG;IACH,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE;CAK5D;AAED;;GAEG;AACH,eAAO,MAAM,eAAe,iBAAwB,CAAC"}
|