ppef 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -125
- package/bin/ppef.mjs +20 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.d.ts +8 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.d.ts.map +1 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.js +308 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.js +405 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js +424 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/registry.unit.test.d.ts +7 -0
- package/dist/__tests__/evaluators/registry.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/registry.unit.test.js +173 -0
- package/dist/__tests__/evaluators/registry.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js +260 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/framework-pipeline.integration.test.js +49 -20
- package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -1
- package/dist/__tests__/index-exports.unit.test.d.ts +8 -0
- package/dist/__tests__/index-exports.unit.test.d.ts.map +1 -0
- package/dist/__tests__/index-exports.unit.test.js +124 -0
- package/dist/__tests__/index-exports.unit.test.js.map +1 -0
- package/dist/__tests__/registry-executor.integration.test.js +12 -9
- package/dist/__tests__/registry-executor.integration.test.js.map +1 -1
- package/dist/aggregation/__tests__/aggregators.unit.test.d.ts +7 -0
- package/dist/aggregation/__tests__/aggregators.unit.test.d.ts.map +1 -0
- package/dist/aggregation/__tests__/aggregators.unit.test.js +350 -0
- package/dist/aggregation/__tests__/aggregators.unit.test.js.map +1 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.d.ts +7 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.d.ts.map +1 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.js +213 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.js.map +1 -0
- package/dist/aggregation/aggregators.d.ts +9 -0
- package/dist/aggregation/aggregators.d.ts.map +1 -1
- package/dist/aggregation/aggregators.js +1 -1
- package/dist/aggregation/aggregators.js.map +1 -1
- package/dist/aggregation/index.d.ts +1 -1
- package/dist/aggregation/index.d.ts.map +1 -1
- package/dist/aggregation/index.js +1 -1
- package/dist/aggregation/index.js.map +1 -1
- package/dist/aggregation/pipeline.d.ts.map +1 -1
- package/dist/aggregation/pipeline.js +40 -3
- package/dist/aggregation/pipeline.js.map +1 -1
- package/dist/claims/index.d.ts +6 -3
- package/dist/claims/index.d.ts.map +1 -1
- package/dist/claims/index.js +6 -3
- package/dist/claims/index.js.map +1 -1
- package/dist/cli/__tests__/aggregate.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.js +399 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.js.map +1 -0
- package/dist/cli/__tests__/binary-sut.integration.test.d.ts +8 -0
- package/dist/cli/__tests__/binary-sut.integration.test.d.ts.map +1 -0
- package/dist/cli/__tests__/binary-sut.integration.test.js +165 -0
- package/dist/cli/__tests__/binary-sut.integration.test.js.map +1 -0
- package/dist/cli/__tests__/commands.unit.test.d.ts +10 -0
- package/dist/cli/__tests__/commands.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/commands.unit.test.js +217 -0
- package/dist/cli/__tests__/commands.unit.test.js.map +1 -0
- package/dist/cli/__tests__/config-loader.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/config-loader.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/config-loader.unit.test.js +611 -0
- package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -0
- package/dist/cli/__tests__/index.unit.test.d.ts +10 -0
- package/dist/cli/__tests__/index.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/index.unit.test.js +65 -0
- package/dist/cli/__tests__/index.unit.test.js.map +1 -0
- package/dist/cli/__tests__/logger.unit.test.d.ts +11 -0
- package/dist/cli/__tests__/logger.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/logger.unit.test.js +180 -0
- package/dist/cli/__tests__/logger.unit.test.js.map +1 -0
- package/dist/cli/__tests__/module-loader.unit.test.d.ts +11 -0
- package/dist/cli/__tests__/module-loader.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/module-loader.unit.test.js +262 -0
- package/dist/cli/__tests__/module-loader.unit.test.js.map +1 -0
- package/dist/cli/__tests__/output-writer.unit.test.d.ts +10 -0
- package/dist/cli/__tests__/output-writer.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/output-writer.unit.test.js +216 -0
- package/dist/cli/__tests__/output-writer.unit.test.js.map +1 -0
- package/dist/cli/__tests__/plan.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/plan.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/plan.command.unit.test.js +289 -0
- package/dist/cli/__tests__/plan.command.unit.test.js.map +1 -0
- package/dist/cli/__tests__/run.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/run.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/run.command.unit.test.js +422 -0
- package/dist/cli/__tests__/run.command.unit.test.js.map +1 -0
- package/dist/cli/__tests__/validate.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/validate.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/validate.command.unit.test.js +226 -0
- package/dist/cli/__tests__/validate.command.unit.test.js.map +1 -0
- package/dist/cli/command-deps.d.ts +137 -0
- package/dist/cli/command-deps.d.ts.map +1 -0
- package/dist/cli/command-deps.js +7 -0
- package/dist/cli/command-deps.js.map +1 -0
- package/dist/cli/commands/aggregate.d.ts +35 -0
- package/dist/cli/commands/aggregate.d.ts.map +1 -0
- package/dist/cli/commands/aggregate.js +124 -0
- package/dist/cli/commands/aggregate.js.map +1 -0
- package/dist/cli/commands/evaluate.d.ts +41 -0
- package/dist/cli/commands/evaluate.d.ts.map +1 -0
- package/dist/cli/commands/evaluate.js +287 -0
- package/dist/cli/commands/evaluate.js.map +1 -0
- package/dist/cli/commands/plan.d.ts +36 -0
- package/dist/cli/commands/plan.d.ts.map +1 -0
- package/dist/cli/commands/plan.js +109 -0
- package/dist/cli/commands/plan.js.map +1 -0
- package/dist/cli/commands/run.d.ts +33 -0
- package/dist/cli/commands/run.d.ts.map +1 -0
- package/dist/cli/commands/run.js +277 -0
- package/dist/cli/commands/run.js.map +1 -0
- package/dist/cli/commands/validate.d.ts +27 -0
- package/dist/cli/commands/validate.d.ts.map +1 -0
- package/dist/cli/commands/validate.js +88 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/config-loader.d.ts +30 -0
- package/dist/cli/config-loader.d.ts.map +1 -0
- package/dist/cli/config-loader.js +181 -0
- package/dist/cli/config-loader.js.map +1 -0
- package/dist/cli/index.d.ts +27 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +60 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/logger.d.ts +75 -0
- package/dist/cli/logger.d.ts.map +1 -0
- package/dist/cli/logger.js +131 -0
- package/dist/cli/logger.js.map +1 -0
- package/dist/cli/module-loader.d.ts +68 -0
- package/dist/cli/module-loader.d.ts.map +1 -0
- package/dist/cli/module-loader.js +134 -0
- package/dist/cli/module-loader.js.map +1 -0
- package/dist/cli/output-writer.d.ts +51 -0
- package/dist/cli/output-writer.d.ts.map +1 -0
- package/dist/cli/output-writer.js +65 -0
- package/dist/cli/output-writer.js.map +1 -0
- package/dist/cli/types.d.ts +193 -0
- package/dist/cli/types.d.ts.map +1 -0
- package/dist/cli/types.js +7 -0
- package/dist/cli/types.js.map +1 -0
- package/dist/collector/__tests__/result-collector.unit.test.d.ts +7 -0
- package/dist/collector/__tests__/result-collector.unit.test.d.ts.map +1 -0
- package/dist/collector/__tests__/result-collector.unit.test.js +1021 -0
- package/dist/collector/__tests__/result-collector.unit.test.js.map +1 -0
- package/dist/collector/__tests__/schema.unit.test.d.ts +7 -0
- package/dist/collector/__tests__/schema.unit.test.d.ts.map +1 -0
- package/dist/collector/__tests__/schema.unit.test.js +360 -0
- package/dist/collector/__tests__/schema.unit.test.js.map +1 -0
- package/dist/evaluators/claims-evaluator.d.ts +87 -0
- package/dist/evaluators/claims-evaluator.d.ts.map +1 -0
- package/dist/evaluators/claims-evaluator.js +289 -0
- package/dist/evaluators/claims-evaluator.js.map +1 -0
- package/dist/evaluators/exploratory-evaluator.d.ts +136 -0
- package/dist/evaluators/exploratory-evaluator.d.ts.map +1 -0
- package/dist/evaluators/exploratory-evaluator.js +545 -0
- package/dist/evaluators/exploratory-evaluator.js.map +1 -0
- package/dist/evaluators/index.d.ts +13 -0
- package/dist/evaluators/index.d.ts.map +1 -0
- package/dist/evaluators/index.js +14 -0
- package/dist/evaluators/index.js.map +1 -0
- package/dist/evaluators/metrics-evaluator.d.ts +114 -0
- package/dist/evaluators/metrics-evaluator.d.ts.map +1 -0
- package/dist/evaluators/metrics-evaluator.js +433 -0
- package/dist/evaluators/metrics-evaluator.js.map +1 -0
- package/dist/evaluators/registry.d.ts +106 -0
- package/dist/evaluators/registry.d.ts.map +1 -0
- package/dist/evaluators/registry.js +148 -0
- package/dist/evaluators/registry.js.map +1 -0
- package/dist/evaluators/robustness-evaluator.d.ts +57 -0
- package/dist/evaluators/robustness-evaluator.d.ts.map +1 -0
- package/dist/evaluators/robustness-evaluator.js +186 -0
- package/dist/evaluators/robustness-evaluator.js.map +1 -0
- package/dist/executor/__tests__/binary-sut.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/binary-sut.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/binary-sut.unit.test.js +313 -0
- package/dist/executor/__tests__/binary-sut.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js +83 -1
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js.map +1 -1
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts +3 -6
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts.map +1 -1
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js +428 -159
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js.map +1 -1
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js +148 -1
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -1
- package/dist/executor/__tests__/executor.unit.test.js +123 -8
- package/dist/executor/__tests__/executor.unit.test.js.map +1 -1
- package/dist/executor/__tests__/memory-monitor.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/memory-monitor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/memory-monitor.unit.test.js +285 -0
- package/dist/executor/__tests__/memory-monitor.unit.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts +2 -1
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts.map +1 -1
- package/dist/executor/__tests__/parallel-executor.unit.test.js +426 -156
- package/dist/executor/__tests__/parallel-executor.unit.test.js.map +1 -1
- package/dist/executor/__tests__/resource-calculator.unit.test.d.ts +10 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.js +104 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.js.map +1 -0
- package/dist/executor/__tests__/run-id.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/run-id.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/run-id.unit.test.js +156 -0
- package/dist/executor/__tests__/run-id.unit.test.js.map +1 -0
- package/dist/executor/__tests__/worker-entry.integration.test.d.ts +24 -0
- package/dist/executor/__tests__/worker-entry.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/worker-entry.integration.test.js +82 -0
- package/dist/executor/__tests__/worker-entry.integration.test.js.map +1 -0
- package/dist/executor/__tests__/worker-entry.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/worker-entry.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/worker-entry.unit.test.js +364 -0
- package/dist/executor/__tests__/worker-entry.unit.test.js.map +1 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.js +276 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.js.map +1 -0
- package/dist/executor/binary-sut.d.ts +105 -0
- package/dist/executor/binary-sut.d.ts.map +1 -0
- package/dist/executor/binary-sut.js +174 -0
- package/dist/executor/binary-sut.js.map +1 -0
- package/dist/executor/checkpoint-storage.d.ts.map +1 -1
- package/dist/executor/checkpoint-storage.js +6 -4
- package/dist/executor/checkpoint-storage.js.map +1 -1
- package/dist/executor/executor.d.ts +28 -0
- package/dist/executor/executor.d.ts.map +1 -1
- package/dist/executor/executor.js +85 -24
- package/dist/executor/executor.js.map +1 -1
- package/dist/executor/index.d.ts +4 -0
- package/dist/executor/index.d.ts.map +1 -1
- package/dist/executor/index.js +4 -0
- package/dist/executor/index.js.map +1 -1
- package/dist/executor/parallel-executor.d.ts +186 -0
- package/dist/executor/parallel-executor.d.ts.map +1 -1
- package/dist/executor/parallel-executor.js +218 -83
- package/dist/executor/parallel-executor.js.map +1 -1
- package/dist/executor/resource-calculator.d.ts +49 -0
- package/dist/executor/resource-calculator.d.ts.map +1 -0
- package/dist/executor/resource-calculator.js +129 -0
- package/dist/executor/resource-calculator.js.map +1 -0
- package/dist/executor/run-id.d.ts.map +1 -1
- package/dist/executor/run-id.js +8 -1
- package/dist/executor/run-id.js.map +1 -1
- package/dist/executor/worker-entry.d.ts +2 -0
- package/dist/executor/worker-entry.d.ts.map +1 -1
- package/dist/executor/worker-entry.js +46 -55
- package/dist/executor/worker-entry.js.map +1 -1
- package/dist/executor/worker-executor.d.ts +257 -0
- package/dist/executor/worker-executor.d.ts.map +1 -0
- package/dist/executor/worker-executor.js +308 -0
- package/dist/executor/worker-executor.js.map +1 -0
- package/dist/executor/worker-threads-executor.d.ts +245 -0
- package/dist/executor/worker-threads-executor.d.ts.map +1 -0
- package/dist/executor/worker-threads-executor.js +332 -0
- package/dist/executor/worker-threads-executor.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/renderers/latex-renderer.d.ts +60 -0
- package/dist/renderers/latex-renderer.d.ts.map +1 -1
- package/dist/renderers/latex-renderer.js +299 -0
- package/dist/renderers/latex-renderer.js.map +1 -1
- package/dist/renderers/types.d.ts +9 -0
- package/dist/renderers/types.d.ts.map +1 -1
- package/dist/renderers/types.js.map +1 -1
- package/dist/robustness/__tests__/perturbations.unit.test.d.ts +11 -0
- package/dist/robustness/__tests__/perturbations.unit.test.d.ts.map +1 -0
- package/dist/robustness/__tests__/perturbations.unit.test.js +284 -0
- package/dist/robustness/__tests__/perturbations.unit.test.js.map +1 -0
- package/dist/robustness/index.d.ts +5 -2
- package/dist/robustness/index.d.ts.map +1 -1
- package/dist/robustness/index.js +4 -2
- package/dist/robustness/index.js.map +1 -1
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts +7 -0
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts.map +1 -0
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.js +185 -0
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.js.map +1 -0
- package/dist/types/evaluator.d.ts +449 -0
- package/dist/types/evaluator.d.ts.map +1 -0
- package/dist/types/evaluator.js +9 -0
- package/dist/types/evaluator.js.map +1 -0
- package/dist/types/result.d.ts +2 -0
- package/dist/types/result.d.ts.map +1 -1
- package/package.json +8 -1
- package/dist/claims/evaluator.d.ts +0 -33
- package/dist/claims/evaluator.d.ts.map +0 -1
- package/dist/claims/evaluator.js +0 -174
- package/dist/claims/evaluator.js.map +0 -1
- package/dist/robustness/analyzer.d.ts +0 -61
- package/dist/robustness/analyzer.d.ts.map +0 -1
- package/dist/robustness/analyzer.js +0 -191
- package/dist/robustness/analyzer.js.map +0 -1
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claims Evaluator
|
|
3
|
+
*
|
|
4
|
+
* Evaluates explicit hypotheses (claims) against aggregated results.
|
|
5
|
+
* Refactored from src/claims/evaluator.ts into a class-based design
|
|
6
|
+
* that implements the Evaluator interface.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Claims evaluator - evaluates hypotheses against aggregated results.
|
|
10
|
+
*/
|
|
11
|
+
export class ClaimsEvaluator {
|
|
12
|
+
/** Type identifier */
|
|
13
|
+
type = "claims";
|
|
14
|
+
/** Schema version */
|
|
15
|
+
static VERSION = "1.0.0";
|
|
16
|
+
/**
|
|
17
|
+
* Validate claims evaluator configuration.
|
|
18
|
+
*
|
|
19
|
+
* @param config - Configuration to validate
|
|
20
|
+
* @returns Validation result
|
|
21
|
+
*/
|
|
22
|
+
validateConfig(config) {
|
|
23
|
+
const errors = [];
|
|
24
|
+
const warnings = [];
|
|
25
|
+
// Check claims array
|
|
26
|
+
if (!Array.isArray(config.claims)) {
|
|
27
|
+
errors.push("claims must be an array");
|
|
28
|
+
return { valid: false, errors, warnings };
|
|
29
|
+
}
|
|
30
|
+
if (config.claims.length === 0) {
|
|
31
|
+
warnings.push("No claims provided - evaluation will produce empty results");
|
|
32
|
+
}
|
|
33
|
+
// Validate each claim
|
|
34
|
+
for (let i = 0; i < config.claims.length; i++) {
|
|
35
|
+
const claim = config.claims[i];
|
|
36
|
+
const claimErrors = this.validateClaim(claim, i);
|
|
37
|
+
errors.push(...claimErrors);
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
valid: errors.length === 0,
|
|
41
|
+
errors: errors.length > 0 ? errors : undefined,
|
|
42
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Validate a single claim.
|
|
47
|
+
*
|
|
48
|
+
* @param claim - Claim to validate
|
|
49
|
+
* @param index - Index in claims array (for error messages)
|
|
50
|
+
* @returns Array of error messages
|
|
51
|
+
*/
|
|
52
|
+
validateClaim(claim, index) {
|
|
53
|
+
const errors = [];
|
|
54
|
+
const prefix = `Claim[${index}]`;
|
|
55
|
+
if (!claim.claimId || typeof claim.claimId !== "string") {
|
|
56
|
+
errors.push(`${prefix}: claimId is required`);
|
|
57
|
+
}
|
|
58
|
+
if (!claim.description || typeof claim.description !== "string") {
|
|
59
|
+
errors.push(`${prefix}: description is required`);
|
|
60
|
+
}
|
|
61
|
+
if (!claim.sut || typeof claim.sut !== "string") {
|
|
62
|
+
errors.push(`${prefix}: sut is required`);
|
|
63
|
+
}
|
|
64
|
+
if (!claim.baseline || typeof claim.baseline !== "string") {
|
|
65
|
+
errors.push(`${prefix}: baseline is required`);
|
|
66
|
+
}
|
|
67
|
+
if (!claim.metric || typeof claim.metric !== "string") {
|
|
68
|
+
errors.push(`${prefix}: metric is required`);
|
|
69
|
+
}
|
|
70
|
+
if (!["greater", "less", "equal"].includes(claim.direction)) {
|
|
71
|
+
errors.push(`${prefix}: direction must be 'greater', 'less', or 'equal'`);
|
|
72
|
+
}
|
|
73
|
+
if (claim.threshold !== undefined && typeof claim.threshold !== "number") {
|
|
74
|
+
errors.push(`${prefix}: threshold must be a number`);
|
|
75
|
+
}
|
|
76
|
+
if (!["global", "caseClass", "parameterRange", "localStructure"].includes(claim.scope)) {
|
|
77
|
+
errors.push(`${prefix}: scope must be a valid ValidityScope`);
|
|
78
|
+
}
|
|
79
|
+
return errors;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Evaluate claims against aggregated results.
|
|
83
|
+
*
|
|
84
|
+
* @param config - Claims evaluator configuration
|
|
85
|
+
* @param input - Evaluation context with aggregates
|
|
86
|
+
* @returns Evaluation output
|
|
87
|
+
*/
|
|
88
|
+
evaluate(config, input) {
|
|
89
|
+
const { aggregates } = input;
|
|
90
|
+
// Evaluate all claims
|
|
91
|
+
const evaluations = config.claims.map((claim) => this.evaluateClaim(claim, aggregates));
|
|
92
|
+
// Create summary
|
|
93
|
+
const summary = this.createClaimSummary(evaluations);
|
|
94
|
+
return {
|
|
95
|
+
type: "claims",
|
|
96
|
+
version: ClaimsEvaluator.VERSION,
|
|
97
|
+
timestamp: new Date().toISOString(),
|
|
98
|
+
data: summary,
|
|
99
|
+
metadata: {
|
|
100
|
+
inputSource: input.metadata?.source,
|
|
101
|
+
config,
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Evaluate a single claim against aggregated results.
|
|
107
|
+
*
|
|
108
|
+
* @param claim - The claim to evaluate
|
|
109
|
+
* @param aggregates - Aggregated results from the pipeline
|
|
110
|
+
* @returns Claim evaluation with status and evidence
|
|
111
|
+
*/
|
|
112
|
+
evaluateClaim(claim, aggregates) {
|
|
113
|
+
// Filter aggregates by scope constraints
|
|
114
|
+
const filteredAggregates = this.filterByScope(aggregates, claim);
|
|
115
|
+
// Find primary and baseline aggregates
|
|
116
|
+
const primaryAgg = filteredAggregates.find((a) => a.sut === claim.sut);
|
|
117
|
+
const baselineAgg = filteredAggregates.find((a) => a.sut === claim.baseline);
|
|
118
|
+
// Handle missing data
|
|
119
|
+
if (!primaryAgg || !baselineAgg) {
|
|
120
|
+
return this.createInconclusiveResult(claim, primaryAgg ? undefined : "Primary SUT not found", baselineAgg ? undefined : "Baseline SUT not found");
|
|
121
|
+
}
|
|
122
|
+
// Get metric values
|
|
123
|
+
const primaryMetric = claim.metric;
|
|
124
|
+
const baselineMetric = claim.metric;
|
|
125
|
+
const primaryStats = primaryAgg.metrics[primaryMetric];
|
|
126
|
+
const baselineStats = baselineAgg.metrics[baselineMetric];
|
|
127
|
+
if (!(primaryMetric in primaryAgg.metrics) || !(baselineMetric in baselineAgg.metrics)) {
|
|
128
|
+
return this.createInconclusiveResult(claim, "Metric not found in primary results", "Metric not found in baseline results");
|
|
129
|
+
}
|
|
130
|
+
// Compute evidence
|
|
131
|
+
const primaryValue = primaryStats.mean;
|
|
132
|
+
const baselineValue = baselineStats.mean;
|
|
133
|
+
const delta = primaryValue - baselineValue;
|
|
134
|
+
const ratio = baselineValue === 0 ? Infinity : primaryValue / baselineValue;
|
|
135
|
+
// Get statistical significance if available
|
|
136
|
+
const comparison = primaryAgg.comparisons?.[claim.baseline];
|
|
137
|
+
const pValue = comparison?.pValue;
|
|
138
|
+
const effectSize = comparison?.effectSize;
|
|
139
|
+
const evidence = {
|
|
140
|
+
primaryValue,
|
|
141
|
+
baselineValue,
|
|
142
|
+
delta,
|
|
143
|
+
ratio,
|
|
144
|
+
pValue,
|
|
145
|
+
effectSize,
|
|
146
|
+
n: primaryStats.n + baselineStats.n,
|
|
147
|
+
};
|
|
148
|
+
// Determine claim status
|
|
149
|
+
const status = this.determineClaimStatus(claim, evidence);
|
|
150
|
+
return {
|
|
151
|
+
claim,
|
|
152
|
+
status,
|
|
153
|
+
evidence,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Filter aggregates by claim scope constraints.
|
|
158
|
+
*
|
|
159
|
+
* @param aggregates - All aggregates
|
|
160
|
+
* @param claim - Claim with scope constraints
|
|
161
|
+
* @returns Filtered aggregates
|
|
162
|
+
*/
|
|
163
|
+
filterByScope(aggregates, claim) {
|
|
164
|
+
if (!claim.scopeConstraints) {
|
|
165
|
+
return aggregates;
|
|
166
|
+
}
|
|
167
|
+
return aggregates.filter((agg) => {
|
|
168
|
+
for (const [key, value] of Object.entries(claim.scopeConstraints ?? {})) {
|
|
169
|
+
if (key === "caseClass") {
|
|
170
|
+
const allowedClasses = Array.isArray(value) ? value : [value];
|
|
171
|
+
if (!allowedClasses.includes(agg.caseClass)) {
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// Add more scope constraint checks as needed
|
|
176
|
+
}
|
|
177
|
+
return true;
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Create an inconclusive result with reasons.
|
|
182
|
+
*
|
|
183
|
+
* @param claim - The claim being evaluated
|
|
184
|
+
* @param reasons - Reasons for inconclusive status
|
|
185
|
+
* @returns Inconclusive claim evaluation
|
|
186
|
+
*/
|
|
187
|
+
createInconclusiveResult(claim, ...reasons) {
|
|
188
|
+
const validReasons = reasons.filter((r) => r !== undefined);
|
|
189
|
+
return {
|
|
190
|
+
claim,
|
|
191
|
+
status: "inconclusive",
|
|
192
|
+
evidence: {
|
|
193
|
+
primaryValue: Number.NaN,
|
|
194
|
+
baselineValue: Number.NaN,
|
|
195
|
+
delta: Number.NaN,
|
|
196
|
+
ratio: Number.NaN,
|
|
197
|
+
},
|
|
198
|
+
inconclusiveReason: validReasons.join("; "),
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Determine claim status based on evidence.
|
|
203
|
+
*
|
|
204
|
+
* @param claim - The claim being evaluated
|
|
205
|
+
* @param evidence - Computed evidence
|
|
206
|
+
* @returns Claim status
|
|
207
|
+
*/
|
|
208
|
+
determineClaimStatus(claim, evidence) {
|
|
209
|
+
// Check for missing data
|
|
210
|
+
if (Number.isNaN(evidence.primaryValue) || Number.isNaN(evidence.baselineValue)) {
|
|
211
|
+
return "inconclusive";
|
|
212
|
+
}
|
|
213
|
+
// Check statistical significance if required
|
|
214
|
+
const significanceLevel = claim.significanceLevel ?? 0.05;
|
|
215
|
+
if (evidence.pValue !== undefined && evidence.pValue > significanceLevel) {
|
|
216
|
+
return "inconclusive";
|
|
217
|
+
}
|
|
218
|
+
// Check minimum effect size if required
|
|
219
|
+
if (claim.minEffectSize !== undefined &&
|
|
220
|
+
evidence.effectSize !== undefined &&
|
|
221
|
+
Math.abs(evidence.effectSize) < claim.minEffectSize) {
|
|
222
|
+
return "inconclusive";
|
|
223
|
+
}
|
|
224
|
+
// Evaluate direction
|
|
225
|
+
switch (claim.direction) {
|
|
226
|
+
case "greater": {
|
|
227
|
+
if (claim.threshold !== undefined) {
|
|
228
|
+
return evidence.delta >= claim.threshold ? "satisfied" : "violated";
|
|
229
|
+
}
|
|
230
|
+
return evidence.delta > 0 ? "satisfied" : "violated";
|
|
231
|
+
}
|
|
232
|
+
case "less": {
|
|
233
|
+
if (claim.threshold !== undefined) {
|
|
234
|
+
return evidence.delta <= -claim.threshold ? "satisfied" : "violated";
|
|
235
|
+
}
|
|
236
|
+
return evidence.delta < 0 ? "satisfied" : "violated";
|
|
237
|
+
}
|
|
238
|
+
case "equal": {
|
|
239
|
+
const epsilon = claim.threshold ?? 0.001;
|
|
240
|
+
return Math.abs(evidence.delta) <= epsilon ? "satisfied" : "violated";
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Create a claim evaluation summary.
|
|
246
|
+
*
|
|
247
|
+
* @param evaluations - Completed claim evaluations
|
|
248
|
+
* @returns Summary with counts and rates
|
|
249
|
+
*/
|
|
250
|
+
createClaimSummary(evaluations) {
|
|
251
|
+
const satisfied = evaluations.filter((e) => e.status === "satisfied").length;
|
|
252
|
+
const violated = evaluations.filter((e) => e.status === "violated").length;
|
|
253
|
+
const inconclusive = evaluations.filter((e) => e.status === "inconclusive").length;
|
|
254
|
+
const definitive = satisfied + violated;
|
|
255
|
+
const satisfactionRate = definitive > 0 ? satisfied / definitive : 0;
|
|
256
|
+
return {
|
|
257
|
+
version: "1.0.0",
|
|
258
|
+
timestamp: new Date().toISOString(),
|
|
259
|
+
evaluations,
|
|
260
|
+
summary: {
|
|
261
|
+
total: evaluations.length,
|
|
262
|
+
satisfied,
|
|
263
|
+
violated,
|
|
264
|
+
inconclusive,
|
|
265
|
+
satisfactionRate,
|
|
266
|
+
},
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Summarize evaluation output.
|
|
271
|
+
*
|
|
272
|
+
* @param output - Evaluation output to summarize
|
|
273
|
+
* @returns Summary statistics
|
|
274
|
+
*/
|
|
275
|
+
summarize(output) {
|
|
276
|
+
const { summary } = output.data;
|
|
277
|
+
return {
|
|
278
|
+
total: summary.total,
|
|
279
|
+
passed: summary.satisfied,
|
|
280
|
+
failed: summary.violated,
|
|
281
|
+
inconclusive: summary.inconclusive,
|
|
282
|
+
passRate: summary.satisfactionRate,
|
|
283
|
+
additional: {
|
|
284
|
+
satisfactionRate: summary.satisfactionRate,
|
|
285
|
+
},
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
//# sourceMappingURL=claims-evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claims-evaluator.js","sourceRoot":"","sources":["../../src/evaluators/claims-evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAsBH;;GAEG;AACH,MAAM,OAAO,eAAe;IAG3B,sBAAsB;IACb,IAAI,GAAG,QAAiB,CAAC;IAElC,qBAAqB;IACb,MAAM,CAAU,OAAO,GAAG,OAAO,CAAC;IAE1C;;;;;OAKG;IACH,cAAc,CAAC,MAA6B;QAC3C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,qBAAqB;QACrB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;YACvC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QAC3C,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChC,QAAQ,CAAC,IAAI,CAAC,4DAA4D,CAAC,CAAC;QAC7E,CAAC;QAED,sBAAsB;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;QAC7B,CAAC;QAED,OAAO;YACN,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;YAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAC9C,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;SACpD,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,aAAa,CAAC,KAAsB,EAAE,KAAa;QAC1D,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,SAAS,KAAK,GAAG,CAAC;QAEjC,IAAI,CAAC,KAAK,CAAC,OAAO,IAAI,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACzD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,uBAAuB,CAAC,CAAC;QAC/C,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,WAAW,IAAI,OAAO,KAAK,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;YACjE,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,2BAA2B,CAAC,CAAC;QACnD,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,mBAAmB,CAAC,CAAC;QAC3C,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,OAAO,KAAK,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;YAC3D,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,wBAAwB,CAAC,CAAC;QAChD,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACvD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,sBAAsB,CAAC,CAAC;QAC9C,CAAC;QACD,IAAI,CAAC,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,mDAAmD,CAAC,CAAC;QAC3E,CAAC;QACD,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,IAAI,OAAO,KAAK,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;YAC1E,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,8BAA8B,CAAC,CAAC;QACtD,CAAC;QACD,IAAI,CAAC,CAAC,QAAQ,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;YACxF,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,uCAAuC,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;IAED;;;;;;OAMG;IACH,QAAQ,CACP,MAA6B,EAC7B,KAAwB;QAExB,MAAM,EAAE,UAAU,EAAE,GAAG,KAAK,CAAC;QAE7B,sBAAsB;QACtB,MAAM,WAAW,GAAsB,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAClE,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,UAAU,CAAC,CACrC,CAAC;QAEF,iBAAiB;QACjB,MAAM,OAAO,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,CAAC,CAAC;QAErD,OAAO;YACN,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,eAAe,CAAC,OAAO;YAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,IAAI,EAAE,OAAO;YACb,QAAQ,EAAE;gBACT,WAAW,EAAE,KAAK,CAAC,QAAQ,EAAE,MAAM;gBACnC,MAAM;aACN;SACD,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,aAAa,CAAC,KAAsB,EAAE,UAA8B;QAC3E,yCAAyC;QACzC,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAEjE,uCAAuC;QACvC,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,GAAG,CAAC,CAAC;QACvE,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,QAAQ,CAAC,CAAC;QAE7E,sBAAsB;QACtB,IAAI,CAAC,UAAU,IAAI,CAAC,WAAW,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC,wBAAwB,CACnC,KAAK,EACL,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,uBAAuB,EAChD,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,wBAAwB,CAClD,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC;QACnC,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC;QACpC,MAAM,YAAY,GAAG,UAAU,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACvD,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAE1D,IAAI,CAAC,CAAC,aAAa,IAAI,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,cAAc,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;YACxF,OAAO,IAAI,CAAC,wBAAwB,CACnC,KAAK,EACL,qCAAqC,EACrC,sCAAsC,CACtC,CAAC;QACH,CAAC;QAED,mBAAmB;QACnB,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC;QACvC,MAAM,aAAa,GAAG,aAAa,CAAC,IAAI,CAAC;QACzC,MAAM,KAAK,GAAG,YAAY,GAAG,aAAa,CAAC;QAC3C,MAAM,KAAK,GAAG,aAAa,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,GAAG,aAAa,CAAC;QAE5E,4CAA4C;QAC5C,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,UAAU,EAAE,MAAM,CAAC;QAClC,MAAM,UAAU,GAAG,UAAU,EAAE,UAAU,CAAC;QAE1C,MAAM,QAAQ,GAAkB;YAC/B,YAAY;YACZ,aAAa;YACb,KAAK;YACL,KAAK;YACL,MAAM;YACN,UAAU;YACV,CAAC,EAAE,YAAY,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC;SACnC,CAAC;QAEF,yBAAyB;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,oBAAoB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAE1D,OAAO;YACN,KAAK;YACL,MAAM;YACN,QAAQ;SACR,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,aAAa,CACpB,UAA8B,EAC9B,KAAsB;QAEtB,IAAI,CAAC,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC7B,OAAO,UAAU,CAAC;QACnB,CAAC;QAED,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;gBACzE,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;oBACzB,MAAM,cAAc,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;oBAC9D,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAsB,CAAC,EAAE,CAAC;wBAC1D,OAAO,KAAK,CAAC;oBACd,CAAC;gBACF,CAAC;gBACD,6CAA6C;YAC9C,CAAC;YACD,OAAO,IAAI,CAAC;QACb,CAAC,CAAC,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACK,wBAAwB,CAC/B,KAAsB,EACtB,GAAG,OAA+B;QAElC,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;QAEzE,OAAO;YACN,KAAK;YACL,MAAM,EAAE,cAAc;YACtB,QAAQ,EAAE;gBACT,YAAY,EAAE,MAAM,CAAC,GAAG;gBACxB,aAAa,EAAE,MAAM,CAAC,GAAG;gBACzB,KAAK,EAAE,MAAM,CAAC,GAAG;gBACjB,KAAK,EAAE,MAAM,CAAC,GAAG;aACjB;YACD,kBAAkB,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;SAC3C,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,oBAAoB,CAAC,KAAsB,EAAE,QAAuB;QAC3E,yBAAyB;QACzB,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YACjF,OAAO,cAAc,CAAC;QACvB,CAAC;QAED,6CAA6C;QAC7C,MAAM,iBAAiB,GAAG,KAAK,CAAC,iBAAiB,IAAI,IAAI,CAAC;QAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;YAC1E,OAAO,cAAc,CAAC;QACvB,CAAC;QAED,wCAAwC;QACxC,IACC,KAAK,CAAC,aAAa,KAAK,SAAS;YACjC,QAAQ,CAAC,UAAU,KAAK,SAAS;YACjC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,KAAK,CAAC,aAAa,EAClD,CAAC;YACF,OAAO,cAAc,CAAC;QACvB,CAAC;QAED,qBAAqB;QACrB,QAAQ,KAAK,CAAC,SAAS,EAAE,CAAC;YACzB,KAAK,SAAS,CAAC,CAAC,CAAC;gBAChB,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;oBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;gBACrE,CAAC;gBACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACtD,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACb,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;oBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;gBACtE,CAAC;gBACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACtD,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACd,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC;gBACzC,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACvE,CAAC;QACF,CAAC;IACF,CAAC;IAED;;;;;OAKG;IACK,kBAAkB,CAAC,WAA8B;QACxD,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;QAC7E,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;QAC3E,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;QAEnF,MAAM,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;QACxC,MAAM,gBAAgB,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAErE,OAAO;YACN,OAAO,EAAE,OAAO;YAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,WAAW;YACX,OAAO,EAAE;gBACR,KAAK,EAAE,WAAW,CAAC,MAAM;gBACzB,SAAS;gBACT,QAAQ;gBACR,YAAY;gBACZ,gBAAgB;aAChB;SACD,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,SAAS,CAAC,MAA6C;QACtD,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC;QAEhC,OAAO;YACN,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,SAAS;YACzB,MAAM,EAAE,OAAO,CAAC,QAAQ;YACxB,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,QAAQ,EAAE,OAAO,CAAC,gBAAgB;YAClC,UAAU,EAAE;gBACX,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;aAC1C;SACD,CAAC;IACH,CAAC"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exploratory Evaluator
|
|
3
|
+
*
|
|
4
|
+
* Hypothesis-free analysis for discovering patterns in evaluation data.
|
|
5
|
+
* Unlike ClaimsEvaluator which tests predefined hypotheses, this evaluator
|
|
6
|
+
* performs exploratory analysis including:
|
|
7
|
+
* - Ranking all SUTs by any metric (not just primary vs baseline)
|
|
8
|
+
* - Finding significant pairwise differences (N-way comparisons)
|
|
9
|
+
* - Discovering case-class effects
|
|
10
|
+
* - Computing metric correlations
|
|
11
|
+
*/
|
|
12
|
+
import type { EvaluationContext, EvaluationOutput, EvaluationSummary, Evaluator, ExploratoryEvaluatorConfig, ExploratoryEvaluatorData, IEvaluator, ValidationResult } from "../types/evaluator.js";
|
|
13
|
+
/**
|
|
14
|
+
* Exploratory evaluator - hypothesis-free comparative analysis.
|
|
15
|
+
*/
|
|
16
|
+
export declare class ExploratoryEvaluator implements Evaluator<ExploratoryEvaluatorConfig, EvaluationContext, ExploratoryEvaluatorData>, IEvaluator {
|
|
17
|
+
/** Type identifier */
|
|
18
|
+
readonly type: "exploratory";
|
|
19
|
+
/** Schema version */
|
|
20
|
+
private static readonly VERSION;
|
|
21
|
+
/** Default significance level */
|
|
22
|
+
private static readonly DEFAULT_SIGNIFICANCE;
|
|
23
|
+
/**
|
|
24
|
+
* Validate exploratory evaluator configuration.
|
|
25
|
+
*
|
|
26
|
+
* @param config - Configuration to validate
|
|
27
|
+
* @returns Validation result
|
|
28
|
+
*/
|
|
29
|
+
validateConfig(config: ExploratoryEvaluatorConfig): ValidationResult;
|
|
30
|
+
/**
|
|
31
|
+
* Perform exploratory evaluation.
|
|
32
|
+
*
|
|
33
|
+
* @param config - Exploratory evaluator configuration
|
|
34
|
+
* @param input - Evaluation context with aggregates
|
|
35
|
+
* @returns Evaluation output
|
|
36
|
+
*/
|
|
37
|
+
evaluate(config: ExploratoryEvaluatorConfig, input: EvaluationContext): EvaluationOutput<ExploratoryEvaluatorData>;
|
|
38
|
+
/**
|
|
39
|
+
* Summarize evaluation output.
|
|
40
|
+
*
|
|
41
|
+
* @param output - Evaluation output to summarize
|
|
42
|
+
* @returns Summary statistics
|
|
43
|
+
*/
|
|
44
|
+
summarize(output: EvaluationOutput<ExploratoryEvaluatorData>): EvaluationSummary;
|
|
45
|
+
/**
|
|
46
|
+
* Determine which SUTs to analyze.
|
|
47
|
+
* @param aggregates
|
|
48
|
+
* @param configSuts
|
|
49
|
+
*/
|
|
50
|
+
private determineSuts;
|
|
51
|
+
/**
|
|
52
|
+
* Determine which metrics to analyze.
|
|
53
|
+
* @param aggregates
|
|
54
|
+
* @param configMetrics
|
|
55
|
+
*/
|
|
56
|
+
private determineMetrics;
|
|
57
|
+
/**
|
|
58
|
+
* Compute rankings for a single metric.
|
|
59
|
+
* @param aggregates
|
|
60
|
+
* @param metric
|
|
61
|
+
* @param direction
|
|
62
|
+
*/
|
|
63
|
+
private computeRankings;
|
|
64
|
+
/**
|
|
65
|
+
* Compute all pairwise comparisons.
|
|
66
|
+
* @param aggregates
|
|
67
|
+
* @param suts
|
|
68
|
+
* @param metrics
|
|
69
|
+
* @param significanceLevel
|
|
70
|
+
* @param minEffectSize
|
|
71
|
+
*/
|
|
72
|
+
private computePairwiseComparisons;
|
|
73
|
+
/**
|
|
74
|
+
* Compare a single pair of SUTs for a metric.
|
|
75
|
+
* @param aggregates
|
|
76
|
+
* @param sutA
|
|
77
|
+
* @param sutB
|
|
78
|
+
* @param metric
|
|
79
|
+
* @param significanceLevel
|
|
80
|
+
* @param minEffectSize
|
|
81
|
+
*/
|
|
82
|
+
private compareSutPair;
|
|
83
|
+
/**
|
|
84
|
+
* Analyze case-class effects on SUT performance.
|
|
85
|
+
* @param aggregates
|
|
86
|
+
* @param metrics
|
|
87
|
+
* @param significanceLevel
|
|
88
|
+
*/
|
|
89
|
+
private analyzeCaseClassEffects;
|
|
90
|
+
/**
|
|
91
|
+
* Compute correlations between metrics.
|
|
92
|
+
* @param aggregates
|
|
93
|
+
* @param metrics
|
|
94
|
+
*/
|
|
95
|
+
private computeMetricCorrelations;
|
|
96
|
+
/**
|
|
97
|
+
* Compute Pearson and Spearman correlation between two metrics.
|
|
98
|
+
* @param aggregates
|
|
99
|
+
* @param metricA
|
|
100
|
+
* @param metricB
|
|
101
|
+
*/
|
|
102
|
+
private computeCorrelation;
|
|
103
|
+
/**
|
|
104
|
+
* Compute Pearson correlation coefficient.
|
|
105
|
+
* @param x
|
|
106
|
+
* @param y
|
|
107
|
+
*/
|
|
108
|
+
private pearsonCorrelation;
|
|
109
|
+
/**
|
|
110
|
+
* Compute Spearman rank correlation coefficient.
|
|
111
|
+
* @param x
|
|
112
|
+
* @param y
|
|
113
|
+
*/
|
|
114
|
+
private spearmanCorrelation;
|
|
115
|
+
/**
|
|
116
|
+
* Compute ranks for an array of values (handling ties).
|
|
117
|
+
* @param values
|
|
118
|
+
*/
|
|
119
|
+
private computeRanks;
|
|
120
|
+
/**
|
|
121
|
+
* Interpret correlation coefficient.
|
|
122
|
+
* @param r
|
|
123
|
+
*/
|
|
124
|
+
private interpretCorrelation;
|
|
125
|
+
/**
|
|
126
|
+
* Compute variance of an array.
|
|
127
|
+
* @param values
|
|
128
|
+
*/
|
|
129
|
+
private variance;
|
|
130
|
+
/**
|
|
131
|
+
* Standard normal CDF approximation.
|
|
132
|
+
* @param z
|
|
133
|
+
*/
|
|
134
|
+
private normalCdf;
|
|
135
|
+
}
|
|
136
|
+
//# sourceMappingURL=exploratory-evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exploratory-evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/exploratory-evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,KAAK,EAEX,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,SAAS,EACT,0BAA0B,EAC1B,wBAAwB,EAExB,UAAU,EAKV,gBAAgB,EAChB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,qBAAa,oBACZ,YACC,SAAS,CAAC,0BAA0B,EAAE,iBAAiB,EAAE,wBAAwB,CAAC,EAClF,UAAU;IAEX,sBAAsB;IACtB,QAAQ,CAAC,IAAI,EAAG,aAAa,CAAU;IAEvC,qBAAqB;IACrB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAW;IAE1C,iCAAiC;IACjC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAQ;IAEpD;;;;;OAKG;IACH,cAAc,CAAC,MAAM,EAAE,0BAA0B,GAAG,gBAAgB;IA2CpE;;;;;;OAMG;IACH,QAAQ,CACP,MAAM,EAAE,0BAA0B,EAClC,KAAK,EAAE,iBAAiB,GACtB,gBAAgB,CAAC,wBAAwB,CAAC;IAsF7C;;;;;OAKG;IACH,SAAS,CAAC,MAAM,EAAE,gBAAgB,CAAC,wBAAwB,CAAC,GAAG,iBAAiB;IAchF;;;;OAIG;IACH,OAAO,CAAC,aAAa;IAQrB;;;;OAIG;IACH,OAAO,CAAC,gBAAgB;IAcxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAuEvB;;;;;;;OAOG;IACH,OAAO,CAAC,0BAA0B;IAmClC;;;;;;;;OAQG;IACH,OAAO,CAAC,cAAc;IAuFtB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA8D/B;;;;OAIG;IACH,OAAO,CAAC,yBAAyB;IAqBjC;;;;;OAKG;IACH,OAAO,CAAC,kBAAkB;IA2C1B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;IAqB1B;;;;OAIG;IACH,OAAO,CAAC,mBAAmB;IAM3B;;;OAGG;IACH,OAAO,CAAC,YAAY;IA0BpB;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAS5B;;;OAGG;IACH,OAAO,CAAC,QAAQ;IAMhB;;;OAGG;IACH,OAAO,CAAC,SAAS;CAiBjB"}
|