ppef 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -0
- package/bin/ppef.mjs +20 -0
- package/dist/__tests__/framework-pipeline.integration.test.d.ts +7 -0
- package/dist/__tests__/framework-pipeline.integration.test.d.ts.map +1 -0
- package/dist/__tests__/framework-pipeline.integration.test.js +415 -0
- package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -0
- package/dist/__tests__/index-exports.unit.test.d.ts +8 -0
- package/dist/__tests__/index-exports.unit.test.d.ts.map +1 -0
- package/dist/__tests__/index-exports.unit.test.js +127 -0
- package/dist/__tests__/index-exports.unit.test.js.map +1 -0
- package/dist/__tests__/registry-executor.integration.test.d.ts +5 -0
- package/dist/__tests__/registry-executor.integration.test.d.ts.map +1 -0
- package/dist/__tests__/registry-executor.integration.test.js +352 -0
- package/dist/__tests__/registry-executor.integration.test.js.map +1 -0
- package/dist/__tests__/test-helpers.d.ts +94 -0
- package/dist/__tests__/test-helpers.d.ts.map +1 -0
- package/dist/__tests__/test-helpers.js +271 -0
- package/dist/__tests__/test-helpers.js.map +1 -0
- package/dist/aggregation/__tests__/aggregators.unit.test.d.ts +7 -0
- package/dist/aggregation/__tests__/aggregators.unit.test.d.ts.map +1 -0
- package/dist/aggregation/__tests__/aggregators.unit.test.js +350 -0
- package/dist/aggregation/__tests__/aggregators.unit.test.js.map +1 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.d.ts +7 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.d.ts.map +1 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.js +213 -0
- package/dist/aggregation/__tests__/pipeline.unit.test.js.map +1 -0
- package/dist/aggregation/aggregators.d.ts +63 -0
- package/dist/aggregation/aggregators.d.ts.map +1 -0
- package/dist/aggregation/aggregators.js +228 -0
- package/dist/aggregation/aggregators.js.map +1 -0
- package/dist/aggregation/index.d.ts +8 -0
- package/dist/aggregation/index.d.ts.map +1 -0
- package/dist/aggregation/index.js +8 -0
- package/dist/aggregation/index.js.map +1 -0
- package/dist/aggregation/pipeline.d.ts +38 -0
- package/dist/aggregation/pipeline.d.ts.map +1 -0
- package/dist/aggregation/pipeline.js +198 -0
- package/dist/aggregation/pipeline.js.map +1 -0
- package/dist/claims/__tests__/evaluator.unit.test.d.ts +12 -0
- package/dist/claims/__tests__/evaluator.unit.test.d.ts.map +1 -0
- package/dist/claims/__tests__/evaluator.unit.test.js +801 -0
- package/dist/claims/__tests__/evaluator.unit.test.js.map +1 -0
- package/dist/claims/evaluator.d.ts +33 -0
- package/dist/claims/evaluator.d.ts.map +1 -0
- package/dist/claims/evaluator.js +174 -0
- package/dist/claims/evaluator.js.map +1 -0
- package/dist/claims/index.d.ts +7 -0
- package/dist/claims/index.d.ts.map +1 -0
- package/dist/claims/index.js +7 -0
- package/dist/claims/index.js.map +1 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.js +396 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.js.map +1 -0
- package/dist/cli/__tests__/commands.unit.test.d.ts +10 -0
- package/dist/cli/__tests__/commands.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/commands.unit.test.js +217 -0
- package/dist/cli/__tests__/commands.unit.test.js.map +1 -0
- package/dist/cli/__tests__/index.unit.test.d.ts +10 -0
- package/dist/cli/__tests__/index.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/index.unit.test.js +65 -0
- package/dist/cli/__tests__/index.unit.test.js.map +1 -0
- package/dist/cli/__tests__/logger.unit.test.d.ts +11 -0
- package/dist/cli/__tests__/logger.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/logger.unit.test.js +180 -0
- package/dist/cli/__tests__/logger.unit.test.js.map +1 -0
- package/dist/cli/__tests__/module-loader.unit.test.d.ts +11 -0
- package/dist/cli/__tests__/module-loader.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/module-loader.unit.test.js +262 -0
- package/dist/cli/__tests__/module-loader.unit.test.js.map +1 -0
- package/dist/cli/__tests__/output-writer.unit.test.d.ts +10 -0
- package/dist/cli/__tests__/output-writer.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/output-writer.unit.test.js +216 -0
- package/dist/cli/__tests__/output-writer.unit.test.js.map +1 -0
- package/dist/cli/__tests__/plan.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/plan.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/plan.command.unit.test.js +289 -0
- package/dist/cli/__tests__/plan.command.unit.test.js.map +1 -0
- package/dist/cli/__tests__/run.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/run.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/run.command.unit.test.js +422 -0
- package/dist/cli/__tests__/run.command.unit.test.js.map +1 -0
- package/dist/cli/__tests__/validate.command.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/validate.command.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/validate.command.unit.test.js +226 -0
- package/dist/cli/__tests__/validate.command.unit.test.js.map +1 -0
- package/dist/cli/command-deps.d.ts +125 -0
- package/dist/cli/command-deps.d.ts.map +1 -0
- package/dist/cli/command-deps.js +7 -0
- package/dist/cli/command-deps.js.map +1 -0
- package/dist/cli/commands/aggregate.d.ts +35 -0
- package/dist/cli/commands/aggregate.d.ts.map +1 -0
- package/dist/cli/commands/aggregate.js +121 -0
- package/dist/cli/commands/aggregate.js.map +1 -0
- package/dist/cli/commands/plan.d.ts +36 -0
- package/dist/cli/commands/plan.d.ts.map +1 -0
- package/dist/cli/commands/plan.js +109 -0
- package/dist/cli/commands/plan.js.map +1 -0
- package/dist/cli/commands/run.d.ts +33 -0
- package/dist/cli/commands/run.d.ts.map +1 -0
- package/dist/cli/commands/run.js +185 -0
- package/dist/cli/commands/run.js.map +1 -0
- package/dist/cli/commands/validate.d.ts +27 -0
- package/dist/cli/commands/validate.d.ts.map +1 -0
- package/dist/cli/commands/validate.js +88 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/config-loader.d.ts +30 -0
- package/dist/cli/config-loader.d.ts.map +1 -0
- package/dist/cli/config-loader.js +181 -0
- package/dist/cli/config-loader.js.map +1 -0
- package/dist/cli/index.d.ts +26 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +58 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/logger.d.ts +75 -0
- package/dist/cli/logger.d.ts.map +1 -0
- package/dist/cli/logger.js +131 -0
- package/dist/cli/logger.js.map +1 -0
- package/dist/cli/module-loader.d.ts +46 -0
- package/dist/cli/module-loader.d.ts.map +1 -0
- package/dist/cli/module-loader.js +116 -0
- package/dist/cli/module-loader.js.map +1 -0
- package/dist/cli/output-writer.d.ts +51 -0
- package/dist/cli/output-writer.d.ts.map +1 -0
- package/dist/cli/output-writer.js +65 -0
- package/dist/cli/output-writer.js.map +1 -0
- package/dist/cli/types.d.ts +174 -0
- package/dist/cli/types.d.ts.map +1 -0
- package/dist/cli/types.js +7 -0
- package/dist/cli/types.js.map +1 -0
- package/dist/collector/__tests__/result-collector.unit.test.d.ts +7 -0
- package/dist/collector/__tests__/result-collector.unit.test.d.ts.map +1 -0
- package/dist/collector/__tests__/result-collector.unit.test.js +1021 -0
- package/dist/collector/__tests__/result-collector.unit.test.js.map +1 -0
- package/dist/collector/__tests__/schema.unit.test.d.ts +7 -0
- package/dist/collector/__tests__/schema.unit.test.d.ts.map +1 -0
- package/dist/collector/__tests__/schema.unit.test.js +360 -0
- package/dist/collector/__tests__/schema.unit.test.js.map +1 -0
- package/dist/collector/index.d.ts +8 -0
- package/dist/collector/index.d.ts.map +1 -0
- package/dist/collector/index.js +8 -0
- package/dist/collector/index.js.map +1 -0
- package/dist/collector/result-collector.d.ts +159 -0
- package/dist/collector/result-collector.d.ts.map +1 -0
- package/dist/collector/result-collector.js +213 -0
- package/dist/collector/result-collector.js.map +1 -0
- package/dist/collector/schema.d.ts +34 -0
- package/dist/collector/schema.d.ts.map +1 -0
- package/dist/collector/schema.js +145 -0
- package/dist/collector/schema.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts +10 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js +122 -0
- package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.js +330 -0
- package/dist/executor/__tests__/checkpoint-manager.integration.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js +531 -0
- package/dist/executor/__tests__/checkpoint-manager.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts +8 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js +493 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js +164 -0
- package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js +490 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -0
- package/dist/executor/__tests__/executor.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/executor.unit.test.js +202 -0
- package/dist/executor/__tests__/executor.unit.test.js.map +1 -0
- package/dist/executor/__tests__/memory-monitor.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/memory-monitor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/memory-monitor.unit.test.js +285 -0
- package/dist/executor/__tests__/memory-monitor.unit.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts +12 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js +196 -0
- package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.d.ts +7 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.js +249 -0
- package/dist/executor/__tests__/parallel-executor.integration.test.js.map +1 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.js +473 -0
- package/dist/executor/__tests__/parallel-executor.unit.test.js.map +1 -0
- package/dist/executor/__tests__/run-id.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/run-id.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/run-id.unit.test.js +156 -0
- package/dist/executor/__tests__/run-id.unit.test.js.map +1 -0
- package/dist/executor/__tests__/worker-entry.integration.test.d.ts +24 -0
- package/dist/executor/__tests__/worker-entry.integration.test.d.ts.map +1 -0
- package/dist/executor/__tests__/worker-entry.integration.test.js +82 -0
- package/dist/executor/__tests__/worker-entry.integration.test.js.map +1 -0
- package/dist/executor/__tests__/worker-entry.unit.test.d.ts +7 -0
- package/dist/executor/__tests__/worker-entry.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/worker-entry.unit.test.js +364 -0
- package/dist/executor/__tests__/worker-entry.unit.test.js.map +1 -0
- package/dist/executor/checkpoint-manager.d.ts +231 -0
- package/dist/executor/checkpoint-manager.d.ts.map +1 -0
- package/dist/executor/checkpoint-manager.js +395 -0
- package/dist/executor/checkpoint-manager.js.map +1 -0
- package/dist/executor/checkpoint-storage.d.ts +230 -0
- package/dist/executor/checkpoint-storage.d.ts.map +1 -0
- package/dist/executor/checkpoint-storage.js +370 -0
- package/dist/executor/checkpoint-storage.js.map +1 -0
- package/dist/executor/checkpoint-types.d.ts +48 -0
- package/dist/executor/checkpoint-types.d.ts.map +1 -0
- package/dist/executor/checkpoint-types.js +8 -0
- package/dist/executor/checkpoint-types.js.map +1 -0
- package/dist/executor/executor.d.ts +164 -0
- package/dist/executor/executor.d.ts.map +1 -0
- package/dist/executor/executor.js +408 -0
- package/dist/executor/executor.js.map +1 -0
- package/dist/executor/index.d.ts +11 -0
- package/dist/executor/index.d.ts.map +1 -0
- package/dist/executor/index.js +11 -0
- package/dist/executor/index.js.map +1 -0
- package/dist/executor/memory-monitor.d.ts +115 -0
- package/dist/executor/memory-monitor.d.ts.map +1 -0
- package/dist/executor/memory-monitor.js +168 -0
- package/dist/executor/memory-monitor.js.map +1 -0
- package/dist/executor/parallel-executor.d.ts +239 -0
- package/dist/executor/parallel-executor.d.ts.map +1 -0
- package/dist/executor/parallel-executor.js +329 -0
- package/dist/executor/parallel-executor.js.map +1 -0
- package/dist/executor/run-id.d.ts +71 -0
- package/dist/executor/run-id.d.ts.map +1 -0
- package/dist/executor/run-id.js +74 -0
- package/dist/executor/run-id.js.map +1 -0
- package/dist/executor/worker-entry.d.ts +10 -0
- package/dist/executor/worker-entry.d.ts.map +1 -0
- package/dist/executor/worker-entry.js +42 -0
- package/dist/executor/worker-entry.js.map +1 -0
- package/dist/executor/worker-executor.d.ts +156 -0
- package/dist/executor/worker-executor.d.ts.map +1 -0
- package/dist/executor/worker-executor.js +88 -0
- package/dist/executor/worker-executor.js.map +1 -0
- package/dist/index.cjs +11 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/registry/case-registry.d.ts +113 -0
- package/dist/registry/case-registry.d.ts.map +1 -0
- package/dist/registry/case-registry.js +160 -0
- package/dist/registry/case-registry.js.map +1 -0
- package/dist/registry/index.d.ts +8 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +8 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/registry/sut-registry.d.ts +96 -0
- package/dist/registry/sut-registry.d.ts.map +1 -0
- package/dist/registry/sut-registry.js +126 -0
- package/dist/registry/sut-registry.js.map +1 -0
- package/dist/renderers/index.d.ts +10 -0
- package/dist/renderers/index.d.ts.map +1 -0
- package/dist/renderers/index.js +9 -0
- package/dist/renderers/index.js.map +1 -0
- package/dist/renderers/latex-renderer.d.ts +84 -0
- package/dist/renderers/latex-renderer.d.ts.map +1 -0
- package/dist/renderers/latex-renderer.js +208 -0
- package/dist/renderers/latex-renderer.js.map +1 -0
- package/dist/renderers/types.d.ts +106 -0
- package/dist/renderers/types.d.ts.map +1 -0
- package/dist/renderers/types.js +23 -0
- package/dist/renderers/types.js.map +1 -0
- package/dist/robustness/__tests__/analyzer.unit.test.d.ts +11 -0
- package/dist/robustness/__tests__/analyzer.unit.test.d.ts.map +1 -0
- package/dist/robustness/__tests__/analyzer.unit.test.js +455 -0
- package/dist/robustness/__tests__/analyzer.unit.test.js.map +1 -0
- package/dist/robustness/__tests__/perturbations.unit.test.d.ts +11 -0
- package/dist/robustness/__tests__/perturbations.unit.test.d.ts.map +1 -0
- package/dist/robustness/__tests__/perturbations.unit.test.js +284 -0
- package/dist/robustness/__tests__/perturbations.unit.test.js.map +1 -0
- package/dist/robustness/analyzer.d.ts +61 -0
- package/dist/robustness/analyzer.d.ts.map +1 -0
- package/dist/robustness/analyzer.js +191 -0
- package/dist/robustness/analyzer.js.map +1 -0
- package/dist/robustness/index.d.ts +8 -0
- package/dist/robustness/index.d.ts.map +1 -0
- package/dist/robustness/index.js +8 -0
- package/dist/robustness/index.js.map +1 -0
- package/dist/robustness/perturbations.d.ts +46 -0
- package/dist/robustness/perturbations.d.ts.map +1 -0
- package/dist/robustness/perturbations.js +184 -0
- package/dist/robustness/perturbations.js.map +1 -0
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts +7 -0
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts.map +1 -0
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.js +185 -0
- package/dist/statistical/__tests__/mann-whitney-u.unit.test.js.map +1 -0
- package/dist/statistical/index.d.ts +8 -0
- package/dist/statistical/index.d.ts.map +1 -0
- package/dist/statistical/index.js +8 -0
- package/dist/statistical/index.js.map +1 -0
- package/dist/statistical/mann-whitney-u.d.ts +62 -0
- package/dist/statistical/mann-whitney-u.d.ts.map +1 -0
- package/dist/statistical/mann-whitney-u.js +127 -0
- package/dist/statistical/mann-whitney-u.js.map +1 -0
- package/dist/types/aggregate.d.ts +124 -0
- package/dist/types/aggregate.d.ts.map +1 -0
- package/dist/types/aggregate.js +9 -0
- package/dist/types/aggregate.js.map +1 -0
- package/dist/types/case.d.ts +105 -0
- package/dist/types/case.d.ts.map +1 -0
- package/dist/types/case.js +10 -0
- package/dist/types/case.js.map +1 -0
- package/dist/types/claims.d.ts +122 -0
- package/dist/types/claims.d.ts.map +1 -0
- package/dist/types/claims.js +14 -0
- package/dist/types/claims.js.map +1 -0
- package/dist/types/index.d.ts +12 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +7 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/perturbation.d.ts +105 -0
- package/dist/types/perturbation.d.ts.map +1 -0
- package/dist/types/perturbation.js +9 -0
- package/dist/types/perturbation.js.map +1 -0
- package/dist/types/result.d.ts +150 -0
- package/dist/types/result.d.ts.map +1 -0
- package/dist/types/result.js +12 -0
- package/dist/types/result.js.map +1 -0
- package/dist/types/sut.d.ts +128 -0
- package/dist/types/sut.d.ts.map +1 -0
- package/dist/types/sut.js +12 -0
- package/dist/types/sut.js.map +1 -0
- package/package.json +290 -7
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregated Result Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Aggregated results summarise multiple evaluation runs with statistical
|
|
5
|
+
* measures. This is the intermediate format between raw results and
|
|
6
|
+
* final rendered output (LaTeX tables, etc.).
|
|
7
|
+
*/
|
|
8
|
+
import type { Primitive } from "./case.js";
|
|
9
|
+
import type { SutRole } from "./sut.js";
|
|
10
|
+
/**
|
|
11
|
+
* Summary statistics for a numeric metric.
|
|
12
|
+
*/
|
|
13
|
+
export interface SummaryStats {
|
|
14
|
+
/** Number of observations */
|
|
15
|
+
n: number;
|
|
16
|
+
/** Arithmetic mean */
|
|
17
|
+
mean: number;
|
|
18
|
+
/** Median (50th percentile) */
|
|
19
|
+
median: number;
|
|
20
|
+
/** Minimum value */
|
|
21
|
+
min: number;
|
|
22
|
+
/** Maximum value */
|
|
23
|
+
max: number;
|
|
24
|
+
/** Standard deviation (sample) */
|
|
25
|
+
std?: number;
|
|
26
|
+
/** 95% confidence interval [lower, upper] */
|
|
27
|
+
confidence95?: [number, number];
|
|
28
|
+
/** Sum of all values */
|
|
29
|
+
sum?: number;
|
|
30
|
+
/** 25th percentile */
|
|
31
|
+
p25?: number;
|
|
32
|
+
/** 75th percentile */
|
|
33
|
+
p75?: number;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Comparison metrics between primary and baseline SUTs.
|
|
37
|
+
*/
|
|
38
|
+
export interface ComparisonMetrics {
|
|
39
|
+
/** Absolute deltas (primary - baseline) */
|
|
40
|
+
deltas: Record<string, number>;
|
|
41
|
+
/** Ratios (primary / baseline) */
|
|
42
|
+
ratios: Record<string, number>;
|
|
43
|
+
/** Win rate (% of cases where primary beats baseline) */
|
|
44
|
+
betterRate?: number;
|
|
45
|
+
/** Mann-Whitney U statistic */
|
|
46
|
+
uStatistic?: number;
|
|
47
|
+
/** Statistical significance (p-value) */
|
|
48
|
+
pValue?: number;
|
|
49
|
+
/** Effect size (Cohen's d) */
|
|
50
|
+
effectSize?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Coverage information for the aggregation.
|
|
54
|
+
*/
|
|
55
|
+
export interface CoverageMetrics {
|
|
56
|
+
/** Fraction of cases covered */
|
|
57
|
+
caseCoverage: number;
|
|
58
|
+
/** Metric availability (metric name -> coverage fraction) */
|
|
59
|
+
metricCoverage: Record<string, number>;
|
|
60
|
+
/** Missing case IDs */
|
|
61
|
+
missingCases?: string[];
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Aggregated result for a SUT (optionally grouped by case class).
|
|
65
|
+
*/
|
|
66
|
+
export interface AggregatedResult {
|
|
67
|
+
/** SUT identifier */
|
|
68
|
+
sut: string;
|
|
69
|
+
/** SUT role */
|
|
70
|
+
sutRole: SutRole;
|
|
71
|
+
/** Case class (if grouped) */
|
|
72
|
+
caseClass?: string;
|
|
73
|
+
/** Grouping information */
|
|
74
|
+
group: {
|
|
75
|
+
/** Number of runs in this aggregate */
|
|
76
|
+
runCount: number;
|
|
77
|
+
/** Number of unique cases */
|
|
78
|
+
caseCount: number;
|
|
79
|
+
/** Hash of configuration (for homogeneity check) */
|
|
80
|
+
configHash?: string;
|
|
81
|
+
};
|
|
82
|
+
/** Correctness summary */
|
|
83
|
+
correctness: {
|
|
84
|
+
/** Fraction of runs that produced valid output */
|
|
85
|
+
validRate: number;
|
|
86
|
+
/** Fraction of runs that produced any output */
|
|
87
|
+
producedOutputRate: number;
|
|
88
|
+
/** Fraction of runs matching expected (if oracle available) */
|
|
89
|
+
matchesExpectedRate?: number;
|
|
90
|
+
/** Breakdown of failure types */
|
|
91
|
+
failureBreakdown?: Record<string, number>;
|
|
92
|
+
};
|
|
93
|
+
/** Aggregated metrics (metric name -> summary stats) */
|
|
94
|
+
metrics: Record<string, SummaryStats>;
|
|
95
|
+
/** Comparisons with baselines (baseline SUT id -> comparison) */
|
|
96
|
+
comparisons?: Record<string, ComparisonMetrics>;
|
|
97
|
+
/** Coverage information */
|
|
98
|
+
coverage?: CoverageMetrics;
|
|
99
|
+
/** Additional metadata */
|
|
100
|
+
metadata?: Record<string, Primitive>;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Complete aggregation output.
|
|
104
|
+
*/
|
|
105
|
+
export interface AggregationOutput {
|
|
106
|
+
/** Schema version */
|
|
107
|
+
version: string;
|
|
108
|
+
/** Generation timestamp */
|
|
109
|
+
timestamp: string;
|
|
110
|
+
/** Aggregated results */
|
|
111
|
+
aggregates: AggregatedResult[];
|
|
112
|
+
/** Global metadata */
|
|
113
|
+
metadata?: {
|
|
114
|
+
/** Total runs processed */
|
|
115
|
+
totalRuns: number;
|
|
116
|
+
/** Total unique cases */
|
|
117
|
+
totalCases: number;
|
|
118
|
+
/** SUTs included */
|
|
119
|
+
sutsIncluded: string[];
|
|
120
|
+
/** Case classes included */
|
|
121
|
+
caseClassesIncluded?: string[];
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
//# sourceMappingURL=aggregate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"aggregate.d.ts","sourceRoot":"","sources":["../../src/types/aggregate.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAExC;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,6BAA6B;IAC7B,CAAC,EAAE,MAAM,CAAC;IAEV,sBAAsB;IACtB,IAAI,EAAE,MAAM,CAAC;IAEb,+BAA+B;IAC/B,MAAM,EAAE,MAAM,CAAC;IAEf,oBAAoB;IACpB,GAAG,EAAE,MAAM,CAAC;IAEZ,oBAAoB;IACpB,GAAG,EAAE,MAAM,CAAC;IAEZ,kCAAkC;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,6CAA6C;IAC7C,YAAY,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEhC,wBAAwB;IACxB,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,sBAAsB;IACtB,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,sBAAsB;IACtB,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE/B,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,8BAA8B;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,gCAAgC;IAChC,YAAY,EAAE,MAAM,CAAC;IAErB,6DAA6D;IAC7D,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEvC,uBAAuB;IACvB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,qBAAqB;IACrB,GAAG,EAAE,MAAM,CAAC;IAEZ,eAAe;IACf,OAAO,EAAE,OAAO,CAAC;IAEjB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,2BAA2B;IAC3B,KAAK,EAAE;QACN,uCAAuC;QACvC,QAAQ,EAAE,MAAM,CAAC;QAEjB,6BAA6B;QAC7B,SAAS,EAAE,MAAM,CAAC;QAElB,oDAAoD;QACpD,UAAU,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;IAEF,0BAA0B;IAC1B,WAAW,EAAE;QACZ,kDAAkD;QAClD,SAAS,EAAE,MAAM,CAAC;QAElB,gDAAgD;QAChD,kBAAkB,EAAE,MAAM,CAAC;QAE3B,+DAA+D;QAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAE7B,iCAAiC;QACjC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAC1C,CAAC;IAEF,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IAEtC,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAEhD,2BAA2B;IAC3B,QAAQ,CAAC,EAAE,eAAe,CAAC;IAE3B,0BAA0B;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,yBAAyB;IACzB,UAAU,EAAE,gBAAgB,EAAE,CAAC;IAE/B,sBAAsB;IACtB,QAAQ,CAAC,EAAE;QACV,2BAA2B;QAC3B,SAAS,EAAE,MAAM,CAAC;QAElB,yBAAyB;QACzB,UAAU,EAAE,MAAM,CAAC;QAEnB,oBAAoB;QACpB,YAAY,EAAE,MAAM,EAAE,CAAC;QAEvB,4BAA4B;QAC5B,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC/B,CAAC;CACF"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregated Result Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Aggregated results summarise multiple evaluation runs with statistical
|
|
5
|
+
* measures. This is the intermediate format between raw results and
|
|
6
|
+
* final rendered output (LaTeX tables, etc.).
|
|
7
|
+
*/
|
|
8
|
+
export {};
|
|
9
|
+
//# sourceMappingURL=aggregate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"aggregate.js","sourceRoot":"","sources":["../../src/types/aggregate.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation Case Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* A case represents a single evaluation scenario with:
|
|
5
|
+
* - Deterministic inputs (graph, seeds, etc.)
|
|
6
|
+
* - Expected behavior or ground truth (if applicable)
|
|
7
|
+
* - Grouping metadata for aggregation
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Primitive types allowed in case summaries.
|
|
11
|
+
*/
|
|
12
|
+
export type Primitive = string | number | boolean | null;
|
|
13
|
+
/**
|
|
14
|
+
* Reference to an external artefact (graph file, path set, etc.).
|
|
15
|
+
*/
|
|
16
|
+
export interface ArtefactReference {
|
|
17
|
+
/** Type of artefact */
|
|
18
|
+
type: "graph" | "path-set" | "subgraph" | "embedding" | "other";
|
|
19
|
+
/** URI or path to artefact */
|
|
20
|
+
uri: string;
|
|
21
|
+
/** Content hash for integrity verification */
|
|
22
|
+
hash?: string;
|
|
23
|
+
/** Optional metadata */
|
|
24
|
+
metadata?: Record<string, Primitive>;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Input specification for an evaluation case.
|
|
28
|
+
*/
|
|
29
|
+
export interface CaseInputs {
|
|
30
|
+
/** Scalar summary values (e.g., { nodes: 100, seeds: ["a", "b"] }) */
|
|
31
|
+
summary?: Record<string, Primitive | Primitive[]>;
|
|
32
|
+
/** References to external artefacts */
|
|
33
|
+
artefacts?: ArtefactReference[];
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* A single evaluation case.
|
|
37
|
+
*
|
|
38
|
+
* The caseId should be a deterministic hash of the canonical inputs
|
|
39
|
+
* to ensure reproducibility across runs.
|
|
40
|
+
*/
|
|
41
|
+
export interface EvaluationCase {
|
|
42
|
+
/** Deterministic ID (SHA-256 of canonical inputs) */
|
|
43
|
+
caseId: string;
|
|
44
|
+
/** Human-readable name */
|
|
45
|
+
name?: string;
|
|
46
|
+
/** Grouping label for aggregation (e.g., "scale-free", "bidirectional") */
|
|
47
|
+
caseClass?: string;
|
|
48
|
+
/** Input specification */
|
|
49
|
+
inputs: CaseInputs;
|
|
50
|
+
/** Optional expected output for oracle-based evaluation */
|
|
51
|
+
expectedOutput?: {
|
|
52
|
+
/** Expected summary values */
|
|
53
|
+
summary?: Record<string, Primitive | Primitive[]>;
|
|
54
|
+
/** Expected labels */
|
|
55
|
+
labels?: Record<string, Primitive>;
|
|
56
|
+
/** Expected ranking (for ranking tasks) */
|
|
57
|
+
ranking?: {
|
|
58
|
+
itemId: string;
|
|
59
|
+
score: number;
|
|
60
|
+
}[];
|
|
61
|
+
};
|
|
62
|
+
/** Version of this case definition */
|
|
63
|
+
version?: string;
|
|
64
|
+
/** Tags for filtering */
|
|
65
|
+
tags?: readonly string[];
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Complete case definition with universal input factories.
|
|
69
|
+
*
|
|
70
|
+
* The framework doesn't need to know what "expander" or "seeds" mean.
|
|
71
|
+
* It only needs:
|
|
72
|
+
* 1. getInput() - Load whatever resource the algorithm needs (graph, dataset, API client, etc.)
|
|
73
|
+
* 2. getInputs() - Get algorithm-specific inputs from the case
|
|
74
|
+
*
|
|
75
|
+
* @template TInput - The resource type (e.g., Graph, Dataset, API client)
|
|
76
|
+
* @template TInputs - The algorithm inputs type
|
|
77
|
+
*/
|
|
78
|
+
export interface CaseDefinition<TInput = unknown, TInputs = unknown> {
|
|
79
|
+
/** The case specification */
|
|
80
|
+
case: EvaluationCase;
|
|
81
|
+
/**
|
|
82
|
+
* Load the primary resource needed by the algorithm.
|
|
83
|
+
* This is called once per case and cached.
|
|
84
|
+
*
|
|
85
|
+
* Examples:
|
|
86
|
+
* - Expansion: Load a benchmark graph
|
|
87
|
+
* - Ranking: Load a graph with source/target metadata
|
|
88
|
+
* - ML: Load training dataset
|
|
89
|
+
*
|
|
90
|
+
* @returns Promise resolving to the resource
|
|
91
|
+
*/
|
|
92
|
+
getInput(): Promise<TInput>;
|
|
93
|
+
/**
|
|
94
|
+
* Get algorithm-specific inputs for this case.
|
|
95
|
+
*
|
|
96
|
+
* Examples:
|
|
97
|
+
* - Expansion: { seeds: ["node1", "node2"] }
|
|
98
|
+
* - Ranking: { source: "node1", target: "node2", maxPaths: 10 }
|
|
99
|
+
* - Classification: { labels: ["cat", "dog"], threshold: 0.5 }
|
|
100
|
+
*
|
|
101
|
+
* @returns Algorithm inputs
|
|
102
|
+
*/
|
|
103
|
+
getInputs(): TInputs;
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=case.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"case.d.ts","sourceRoot":"","sources":["../../src/types/case.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAAC;AAEzD;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,uBAAuB;IACvB,IAAI,EAAE,OAAO,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,GAAG,OAAO,CAAC;IAEhE,8BAA8B;IAC9B,GAAG,EAAE,MAAM,CAAC;IAEZ,8CAA8C;IAC9C,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,wBAAwB;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,sEAAsE;IACtE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;IAElD,uCAAuC;IACvC,SAAS,CAAC,EAAE,iBAAiB,EAAE,CAAC;CAChC;AAED;;;;;GAKG;AACH,MAAM,WAAW,cAAc;IAC9B,qDAAqD;IACrD,MAAM,EAAE,MAAM,CAAC;IAEf,0BAA0B;IAC1B,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,0BAA0B;IAC1B,MAAM,EAAE,UAAU,CAAC;IAEnB,2DAA2D;IAC3D,cAAc,CAAC,EAAE;QAChB,8BAA8B;QAC9B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;QAElD,sBAAsB;QACtB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAEnC,2CAA2C;QAC3C,OAAO,CAAC,EAAE;YAAE,MAAM,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;KAC9C,CAAC;IAEF,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,yBAAyB;IACzB,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CACzB;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,cAAc,CAAC,MAAM,GAAG,OAAO,EAAE,OAAO,GAAG,OAAO;IAClE,6BAA6B;IAC7B,IAAI,EAAE,cAAc,CAAC;IAErB;;;;;;;;;;OAUG;IACH,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAE5B;;;;;;;;;OASG;IACH,SAAS,IAAI,OAAO,CAAC;CACrB"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation Case Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* A case represents a single evaluation scenario with:
|
|
5
|
+
* - Deterministic inputs (graph, seeds, etc.)
|
|
6
|
+
* - Expected behavior or ground truth (if applicable)
|
|
7
|
+
* - Grouping metadata for aggregation
|
|
8
|
+
*/
|
|
9
|
+
export {};
|
|
10
|
+
//# sourceMappingURL=case.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"case.js","sourceRoot":"","sources":["../../src/types/case.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG"}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation Claims Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Claims represent explicit hypotheses to be tested. Each claim specifies:
|
|
5
|
+
* - Which SUTs are being compared
|
|
6
|
+
* - Which metric is being evaluated
|
|
7
|
+
* - The expected relationship (greater, less, equal)
|
|
8
|
+
* - The scope of validity
|
|
9
|
+
*
|
|
10
|
+
* This enables claim-driven evaluation where experiments are designed
|
|
11
|
+
* to test specific hypotheses rather than collect arbitrary metrics.
|
|
12
|
+
*/
|
|
13
|
+
import type { Primitive } from "./case.js";
|
|
14
|
+
/**
|
|
15
|
+
* Scope of claim validity.
|
|
16
|
+
*
|
|
17
|
+
* - `global`: Claim should hold across all cases
|
|
18
|
+
* - `caseClass`: Claim holds within specific case classes
|
|
19
|
+
* - `parameterRange`: Claim holds for specific parameter ranges
|
|
20
|
+
* - `localStructure`: Claim depends on local graph structure
|
|
21
|
+
*/
|
|
22
|
+
export type ValidityScope = "global" | "caseClass" | "parameterRange" | "localStructure";
|
|
23
|
+
/**
|
|
24
|
+
* Direction of comparison.
|
|
25
|
+
*/
|
|
26
|
+
export type ComparisonDirection = "greater" | "less" | "equal";
|
|
27
|
+
/**
|
|
28
|
+
* An evaluation claim (hypothesis).
|
|
29
|
+
*/
|
|
30
|
+
export interface EvaluationClaim {
|
|
31
|
+
/** Unique identifier for this claim */
|
|
32
|
+
claimId: string;
|
|
33
|
+
/** Human-readable description */
|
|
34
|
+
description: string;
|
|
35
|
+
/** Primary SUT being evaluated */
|
|
36
|
+
sut: string;
|
|
37
|
+
/** Baseline SUT for comparison */
|
|
38
|
+
baseline: string;
|
|
39
|
+
/** Metric being compared */
|
|
40
|
+
metric: string;
|
|
41
|
+
/** Expected direction of difference */
|
|
42
|
+
direction: ComparisonDirection;
|
|
43
|
+
/** Optional threshold for the difference */
|
|
44
|
+
threshold?: number;
|
|
45
|
+
/** Scope of validity */
|
|
46
|
+
scope: ValidityScope;
|
|
47
|
+
/** Scope constraints (e.g., { caseClass: "scale-free" }) */
|
|
48
|
+
scopeConstraints?: Record<string, Primitive | Primitive[]>;
|
|
49
|
+
/** Required significance level (default: 0.05) */
|
|
50
|
+
significanceLevel?: number;
|
|
51
|
+
/** Minimum effect size (Cohen's d) */
|
|
52
|
+
minEffectSize?: number;
|
|
53
|
+
/** Tags for filtering */
|
|
54
|
+
tags?: readonly string[];
|
|
55
|
+
/** Citation/reference for the claim */
|
|
56
|
+
citation?: string;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Status of a claim evaluation.
|
|
60
|
+
*/
|
|
61
|
+
export type ClaimStatus = "satisfied" | "violated" | "inconclusive";
|
|
62
|
+
/**
|
|
63
|
+
* Evidence supporting a claim evaluation.
|
|
64
|
+
*/
|
|
65
|
+
export interface ClaimEvidence {
|
|
66
|
+
/** Primary SUT metric value */
|
|
67
|
+
primaryValue: number;
|
|
68
|
+
/** Baseline SUT metric value */
|
|
69
|
+
baselineValue: number;
|
|
70
|
+
/** Absolute delta (primary - baseline) */
|
|
71
|
+
delta: number;
|
|
72
|
+
/** Ratio (primary / baseline) */
|
|
73
|
+
ratio: number;
|
|
74
|
+
/** P-value from statistical test */
|
|
75
|
+
pValue?: number;
|
|
76
|
+
/** Effect size (Cohen's d) */
|
|
77
|
+
effectSize?: number;
|
|
78
|
+
/** Number of observations */
|
|
79
|
+
n?: number;
|
|
80
|
+
/** 95% confidence interval for delta */
|
|
81
|
+
deltaCI95?: [number, number];
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Result of evaluating a single claim.
|
|
85
|
+
*/
|
|
86
|
+
export interface ClaimEvaluation {
|
|
87
|
+
/** The claim being evaluated */
|
|
88
|
+
claim: EvaluationClaim;
|
|
89
|
+
/** Evaluation status */
|
|
90
|
+
status: ClaimStatus;
|
|
91
|
+
/** Supporting evidence */
|
|
92
|
+
evidence: ClaimEvidence;
|
|
93
|
+
/** Reason for inconclusive status (if applicable) */
|
|
94
|
+
inconclusiveReason?: string;
|
|
95
|
+
/** Additional notes */
|
|
96
|
+
notes?: string[];
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Summary of all claim evaluations.
|
|
100
|
+
*/
|
|
101
|
+
export interface ClaimEvaluationSummary {
|
|
102
|
+
/** Schema version */
|
|
103
|
+
version: string;
|
|
104
|
+
/** Generation timestamp */
|
|
105
|
+
timestamp: string;
|
|
106
|
+
/** Individual claim evaluations */
|
|
107
|
+
evaluations: ClaimEvaluation[];
|
|
108
|
+
/** Summary statistics */
|
|
109
|
+
summary: {
|
|
110
|
+
/** Total claims evaluated */
|
|
111
|
+
total: number;
|
|
112
|
+
/** Claims satisfied */
|
|
113
|
+
satisfied: number;
|
|
114
|
+
/** Claims violated */
|
|
115
|
+
violated: number;
|
|
116
|
+
/** Claims inconclusive */
|
|
117
|
+
inconclusive: number;
|
|
118
|
+
/** Satisfaction rate (satisfied / (satisfied + violated)) */
|
|
119
|
+
satisfactionRate: number;
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=claims.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claims.d.ts","sourceRoot":"","sources":["../../src/types/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAE3C;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,WAAW,GAAG,gBAAgB,GAAG,gBAAgB,CAAC;AAEzF;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;AAE/D;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAEhB,iCAAiC;IACjC,WAAW,EAAE,MAAM,CAAC;IAEpB,kCAAkC;IAClC,GAAG,EAAE,MAAM,CAAC;IAEZ,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IAEjB,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAC;IAEf,uCAAuC;IACvC,SAAS,EAAE,mBAAmB,CAAC;IAE/B,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,wBAAwB;IACxB,KAAK,EAAE,aAAa,CAAC;IAErB,4DAA4D;IAC5D,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;IAE3D,kDAAkD;IAClD,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B,sCAAsC;IACtC,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,yBAAyB;IACzB,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAEzB,uCAAuC;IACvC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,UAAU,GAAG,cAAc,CAAC;AAEpE;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;IAErB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAC;IAEtB,0CAA0C;IAC1C,KAAK,EAAE,MAAM,CAAC;IAEd,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IAEd,oCAAoC;IACpC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,8BAA8B;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,6BAA6B;IAC7B,CAAC,CAAC,EAAE,MAAM,CAAC;IAEX,wCAAwC;IACxC,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,gCAAgC;IAChC,KAAK,EAAE,eAAe,CAAC;IAEvB,wBAAwB;IACxB,MAAM,EAAE,WAAW,CAAC;IAEpB,0BAA0B;IAC1B,QAAQ,EAAE,aAAa,CAAC;IAExB,qDAAqD;IACrD,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B,uBAAuB;IACvB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACtC,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,mCAAmC;IACnC,WAAW,EAAE,eAAe,EAAE,CAAC;IAE/B,yBAAyB;IACzB,OAAO,EAAE;QACR,6BAA6B;QAC7B,KAAK,EAAE,MAAM,CAAC;QAEd,uBAAuB;QACvB,SAAS,EAAE,MAAM,CAAC;QAElB,sBAAsB;QACtB,QAAQ,EAAE,MAAM,CAAC;QAEjB,0BAA0B;QAC1B,YAAY,EAAE,MAAM,CAAC;QAErB,6DAA6D;QAC7D,gBAAgB,EAAE,MAAM,CAAC;KACzB,CAAC;CACF"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation Claims Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Claims represent explicit hypotheses to be tested. Each claim specifies:
|
|
5
|
+
* - Which SUTs are being compared
|
|
6
|
+
* - Which metric is being evaluated
|
|
7
|
+
* - The expected relationship (greater, less, equal)
|
|
8
|
+
* - The scope of validity
|
|
9
|
+
*
|
|
10
|
+
* This enables claim-driven evaluation where experiments are designed
|
|
11
|
+
* to test specific hypotheses rather than collect arbitrary metrics.
|
|
12
|
+
*/
|
|
13
|
+
export {};
|
|
14
|
+
//# sourceMappingURL=claims.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claims.js","sourceRoot":"","sources":["../../src/types/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Framework Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Re-exports all canonical types for the evaluation framework.
|
|
5
|
+
*/
|
|
6
|
+
export type { SutDefinition, SutFactory, SutRegistration, SutRole } from "./sut.js";
|
|
7
|
+
export type { ArtefactReference, CaseDefinition, CaseInputs, EvaluationCase, Primitive, } from "./case.js";
|
|
8
|
+
export type { CorrectnessResult, EvaluationResult, FailureType, Provenance, RankedItem, ResultBatch, ResultMetrics, ResultOutputs, RunContext, } from "./result.js";
|
|
9
|
+
export type { AggregatedResult, AggregationOutput, ComparisonMetrics, CoverageMetrics, SummaryStats, } from "./aggregate.js";
|
|
10
|
+
export type { ClaimEvaluation, ClaimEvaluationSummary, ClaimEvidence, ClaimStatus, ComparisonDirection, EvaluationClaim, ValidityScope, } from "./claims.js";
|
|
11
|
+
export type { Perturbation, PerturbationConfig, PerturbationType, RobustnessAnalysisOutput, RobustnessAnalysisResult, RobustnessMetrics, } from "./perturbation.js";
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,eAAe,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAGpF,YAAY,EACX,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,cAAc,EACd,SAAS,GACT,MAAM,WAAW,CAAC;AAGnB,YAAY,EACX,iBAAiB,EACjB,gBAAgB,EAChB,WAAW,EACX,UAAU,EACV,UAAU,EACV,WAAW,EACX,aAAa,EACb,aAAa,EACb,UAAU,GACV,MAAM,aAAa,CAAC;AAGrB,YAAY,EACX,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,EACf,YAAY,GACZ,MAAM,gBAAgB,CAAC;AAGxB,YAAY,EACX,eAAe,EACf,sBAAsB,EACtB,aAAa,EACb,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,aAAa,GACb,MAAM,aAAa,CAAC;AAGrB,YAAY,EACX,YAAY,EACZ,kBAAkB,EAClB,gBAAgB,EAChB,wBAAwB,EACxB,wBAAwB,EACxB,iBAAiB,GACjB,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Perturbation Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Perturbations modify evaluation cases to test algorithm robustness.
|
|
5
|
+
* They enable sensitivity analysis by measuring how metrics change
|
|
6
|
+
* under controlled variations of the input.
|
|
7
|
+
*/
|
|
8
|
+
import type { EvaluationCase, Primitive } from "./case.js";
|
|
9
|
+
/**
|
|
10
|
+
* A perturbation that modifies an evaluation case.
|
|
11
|
+
*/
|
|
12
|
+
export interface Perturbation {
|
|
13
|
+
/** Unique identifier */
|
|
14
|
+
id: string;
|
|
15
|
+
/** Human-readable name */
|
|
16
|
+
name: string;
|
|
17
|
+
/** Description of what the perturbation does */
|
|
18
|
+
description: string;
|
|
19
|
+
/** Type of perturbation */
|
|
20
|
+
type: "structural" | "seed" | "noise" | "parameter";
|
|
21
|
+
/** Perturbation intensity (0-1 scale) */
|
|
22
|
+
intensity?: number;
|
|
23
|
+
/** Apply the perturbation to a case */
|
|
24
|
+
apply(evaluationCase: EvaluationCase, seed?: number): EvaluationCase;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Built-in perturbation types.
|
|
28
|
+
*/
|
|
29
|
+
export type PerturbationType = "edge-removal" | "edge-addition" | "seed-shift" | "node-removal" | "degree-rewiring" | "weight-noise";
|
|
30
|
+
/**
|
|
31
|
+
* Configuration for a perturbation.
|
|
32
|
+
*/
|
|
33
|
+
export interface PerturbationConfig {
|
|
34
|
+
/** Perturbation type */
|
|
35
|
+
type: PerturbationType;
|
|
36
|
+
/** Intensity (e.g., fraction of edges to remove) */
|
|
37
|
+
intensity: number;
|
|
38
|
+
/** Random seed for reproducibility */
|
|
39
|
+
seed?: number;
|
|
40
|
+
/** Additional type-specific parameters */
|
|
41
|
+
params?: Record<string, Primitive>;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Metrics for robustness analysis.
|
|
45
|
+
*/
|
|
46
|
+
export interface RobustnessMetrics {
|
|
47
|
+
/** Variance of metric under perturbation */
|
|
48
|
+
varianceUnderPerturbation: number;
|
|
49
|
+
/** Standard deviation */
|
|
50
|
+
stdUnderPerturbation: number;
|
|
51
|
+
/** Coefficient of variation */
|
|
52
|
+
coefficientOfVariation: number;
|
|
53
|
+
/** Ranking stability (Kendall's tau between perturbed rankings) */
|
|
54
|
+
rankingStability?: number;
|
|
55
|
+
/** Degradation curve: metric value at each perturbation level */
|
|
56
|
+
degradationCurve?: {
|
|
57
|
+
perturbationLevel: number;
|
|
58
|
+
metricValue: number;
|
|
59
|
+
stdDev?: number;
|
|
60
|
+
}[];
|
|
61
|
+
/** Breakpoint: intensity where metric degrades significantly */
|
|
62
|
+
breakpoint?: number;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Result of robustness analysis for a single SUT.
|
|
66
|
+
*/
|
|
67
|
+
export interface RobustnessAnalysisResult {
|
|
68
|
+
/** SUT identifier */
|
|
69
|
+
sut: string;
|
|
70
|
+
/** Case class (if grouped) */
|
|
71
|
+
caseClass?: string;
|
|
72
|
+
/** Perturbation applied */
|
|
73
|
+
perturbation: string;
|
|
74
|
+
/** Metric being analyzed */
|
|
75
|
+
metric: string;
|
|
76
|
+
/** Robustness metrics */
|
|
77
|
+
robustness: RobustnessMetrics;
|
|
78
|
+
/** Baseline (unperturbed) value */
|
|
79
|
+
baselineValue: number;
|
|
80
|
+
/** Number of perturbation runs */
|
|
81
|
+
runCount: number;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Complete robustness analysis output.
|
|
85
|
+
*/
|
|
86
|
+
export interface RobustnessAnalysisOutput {
|
|
87
|
+
/** Schema version */
|
|
88
|
+
version: string;
|
|
89
|
+
/** Generation timestamp */
|
|
90
|
+
timestamp: string;
|
|
91
|
+
/** Individual analysis results */
|
|
92
|
+
results: RobustnessAnalysisResult[];
|
|
93
|
+
/** Configuration used */
|
|
94
|
+
config: {
|
|
95
|
+
/** Perturbations applied */
|
|
96
|
+
perturbations: string[];
|
|
97
|
+
/** Metrics analyzed */
|
|
98
|
+
metrics: string[];
|
|
99
|
+
/** Intensity levels tested */
|
|
100
|
+
intensityLevels?: number[];
|
|
101
|
+
/** Runs per perturbation level */
|
|
102
|
+
runsPerLevel: number;
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=perturbation.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"perturbation.d.ts","sourceRoot":"","sources":["../../src/types/perturbation.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAE3D;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,wBAAwB;IACxB,EAAE,EAAE,MAAM,CAAC;IAEX,0BAA0B;IAC1B,IAAI,EAAE,MAAM,CAAC;IAEb,gDAAgD;IAChD,WAAW,EAAE,MAAM,CAAC;IAEpB,2BAA2B;IAC3B,IAAI,EAAE,YAAY,GAAG,MAAM,GAAG,OAAO,GAAG,WAAW,CAAC;IAEpD,yCAAyC;IACzC,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,uCAAuC;IACvC,KAAK,CAAC,cAAc,EAAE,cAAc,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,cAAc,CAAC;CACrE;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,GACzB,cAAc,GACd,eAAe,GACf,YAAY,GACZ,cAAc,GACd,iBAAiB,GACjB,cAAc,CAAC;AAElB;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,wBAAwB;IACxB,IAAI,EAAE,gBAAgB,CAAC;IAEvB,oDAAoD;IACpD,SAAS,EAAE,MAAM,CAAC;IAElB,sCAAsC;IACtC,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,0CAA0C;IAC1C,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,4CAA4C;IAC5C,yBAAyB,EAAE,MAAM,CAAC;IAElC,yBAAyB;IACzB,oBAAoB,EAAE,MAAM,CAAC;IAE7B,+BAA+B;IAC/B,sBAAsB,EAAE,MAAM,CAAC;IAE/B,mEAAmE;IACnE,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B,iEAAiE;IACjE,gBAAgB,CAAC,EAAE;QAClB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,CAAC,EAAE,MAAM,CAAC;KAChB,EAAE,CAAC;IAEJ,gEAAgE;IAChE,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACxC,qBAAqB;IACrB,GAAG,EAAE,MAAM,CAAC;IAEZ,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,2BAA2B;IAC3B,YAAY,EAAE,MAAM,CAAC;IAErB,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAC;IAEf,yBAAyB;IACzB,UAAU,EAAE,iBAAiB,CAAC;IAE9B,mCAAmC;IACnC,aAAa,EAAE,MAAM,CAAC;IAEtB,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACxC,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,kCAAkC;IAClC,OAAO,EAAE,wBAAwB,EAAE,CAAC;IAEpC,yBAAyB;IACzB,MAAM,EAAE;QACP,4BAA4B;QAC5B,aAAa,EAAE,MAAM,EAAE,CAAC;QAExB,uBAAuB;QACvB,OAAO,EAAE,MAAM,EAAE,CAAC;QAElB,8BAA8B;QAC9B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;QAE3B,kCAAkC;QAClC,YAAY,EAAE,MAAM,CAAC;KACrB,CAAC;CACF"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Perturbation Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Perturbations modify evaluation cases to test algorithm robustness.
|
|
5
|
+
* They enable sensitivity analysis by measuring how metrics change
|
|
6
|
+
* under controlled variations of the input.
|
|
7
|
+
*/
|
|
8
|
+
export {};
|
|
9
|
+
//# sourceMappingURL=perturbation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"perturbation.js","sourceRoot":"","sources":["../../src/types/perturbation.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
|