ppef 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -125
- package/dist/__tests__/cli/evaluate-command.integration.test.d.ts +8 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.d.ts.map +1 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.js +308 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.js +405 -0
- package/dist/__tests__/evaluators/claims-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js +424 -0
- package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/registry.unit.test.d.ts +7 -0
- package/dist/__tests__/evaluators/registry.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/registry.unit.test.js +173 -0
- package/dist/__tests__/evaluators/registry.unit.test.js.map +1 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts +8 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts.map +1 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js +260 -0
- package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js.map +1 -0
- package/dist/__tests__/framework-pipeline.integration.test.js +36 -9
- package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -1
- package/dist/__tests__/index-exports.unit.test.js +9 -12
- package/dist/__tests__/index-exports.unit.test.js.map +1 -1
- package/dist/aggregation/pipeline.d.ts.map +1 -1
- package/dist/aggregation/pipeline.js +40 -3
- package/dist/aggregation/pipeline.js.map +1 -1
- package/dist/claims/index.d.ts +6 -3
- package/dist/claims/index.d.ts.map +1 -1
- package/dist/claims/index.js +6 -3
- package/dist/claims/index.js.map +1 -1
- package/dist/cli/__tests__/aggregate.command.unit.test.js +3 -0
- package/dist/cli/__tests__/aggregate.command.unit.test.js.map +1 -1
- package/dist/cli/__tests__/binary-sut.integration.test.d.ts +8 -0
- package/dist/cli/__tests__/binary-sut.integration.test.d.ts.map +1 -0
- package/dist/cli/__tests__/binary-sut.integration.test.js +165 -0
- package/dist/cli/__tests__/binary-sut.integration.test.js.map +1 -0
- package/dist/cli/__tests__/config-loader.unit.test.d.ts +7 -0
- package/dist/cli/__tests__/config-loader.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/config-loader.unit.test.js +611 -0
- package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -0
- package/dist/cli/command-deps.d.ts +13 -1
- package/dist/cli/command-deps.d.ts.map +1 -1
- package/dist/cli/commands/aggregate.d.ts.map +1 -1
- package/dist/cli/commands/aggregate.js +3 -0
- package/dist/cli/commands/aggregate.js.map +1 -1
- package/dist/cli/commands/evaluate.d.ts +41 -0
- package/dist/cli/commands/evaluate.d.ts.map +1 -0
- package/dist/cli/commands/evaluate.js +287 -0
- package/dist/cli/commands/evaluate.js.map +1 -0
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +93 -1
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/index.d.ts +2 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +3 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/module-loader.d.ts +23 -1
- package/dist/cli/module-loader.d.ts.map +1 -1
- package/dist/cli/module-loader.js +19 -1
- package/dist/cli/module-loader.js.map +1 -1
- package/dist/cli/types.d.ts +19 -0
- package/dist/cli/types.d.ts.map +1 -1
- package/dist/evaluators/claims-evaluator.d.ts +87 -0
- package/dist/evaluators/claims-evaluator.d.ts.map +1 -0
- package/dist/evaluators/claims-evaluator.js +289 -0
- package/dist/evaluators/claims-evaluator.js.map +1 -0
- package/dist/evaluators/exploratory-evaluator.d.ts +136 -0
- package/dist/evaluators/exploratory-evaluator.d.ts.map +1 -0
- package/dist/evaluators/exploratory-evaluator.js +545 -0
- package/dist/evaluators/exploratory-evaluator.js.map +1 -0
- package/dist/evaluators/index.d.ts +13 -0
- package/dist/evaluators/index.d.ts.map +1 -0
- package/dist/evaluators/index.js +14 -0
- package/dist/evaluators/index.js.map +1 -0
- package/dist/evaluators/metrics-evaluator.d.ts +114 -0
- package/dist/evaluators/metrics-evaluator.d.ts.map +1 -0
- package/dist/evaluators/metrics-evaluator.js +433 -0
- package/dist/evaluators/metrics-evaluator.js.map +1 -0
- package/dist/evaluators/registry.d.ts +106 -0
- package/dist/evaluators/registry.d.ts.map +1 -0
- package/dist/evaluators/registry.js +148 -0
- package/dist/evaluators/registry.js.map +1 -0
- package/dist/evaluators/robustness-evaluator.d.ts +57 -0
- package/dist/evaluators/robustness-evaluator.d.ts.map +1 -0
- package/dist/evaluators/robustness-evaluator.js +186 -0
- package/dist/evaluators/robustness-evaluator.js.map +1 -0
- package/dist/executor/__tests__/binary-sut.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/binary-sut.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/binary-sut.unit.test.js +313 -0
- package/dist/executor/__tests__/binary-sut.unit.test.js.map +1 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js +43 -0
- package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -1
- package/dist/executor/__tests__/executor.unit.test.js +56 -9
- package/dist/executor/__tests__/executor.unit.test.js.map +1 -1
- package/dist/executor/__tests__/resource-calculator.unit.test.d.ts +10 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.js +104 -0
- package/dist/executor/__tests__/resource-calculator.unit.test.js.map +1 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts +8 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts.map +1 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.js +276 -0
- package/dist/executor/__tests__/worker-threads-executor.unit.test.js.map +1 -0
- package/dist/executor/binary-sut.d.ts +105 -0
- package/dist/executor/binary-sut.d.ts.map +1 -0
- package/dist/executor/binary-sut.js +174 -0
- package/dist/executor/binary-sut.js.map +1 -0
- package/dist/executor/checkpoint-storage.d.ts.map +1 -1
- package/dist/executor/checkpoint-storage.js +6 -4
- package/dist/executor/checkpoint-storage.js.map +1 -1
- package/dist/executor/executor.d.ts +28 -0
- package/dist/executor/executor.d.ts.map +1 -1
- package/dist/executor/executor.js +85 -24
- package/dist/executor/executor.js.map +1 -1
- package/dist/executor/index.d.ts +4 -0
- package/dist/executor/index.d.ts.map +1 -1
- package/dist/executor/index.js +4 -0
- package/dist/executor/index.js.map +1 -1
- package/dist/executor/resource-calculator.d.ts +49 -0
- package/dist/executor/resource-calculator.d.ts.map +1 -0
- package/dist/executor/resource-calculator.js +129 -0
- package/dist/executor/resource-calculator.js.map +1 -0
- package/dist/executor/worker-entry.js +26 -10
- package/dist/executor/worker-entry.js.map +1 -1
- package/dist/executor/worker-executor.d.ts +104 -3
- package/dist/executor/worker-executor.d.ts.map +1 -1
- package/dist/executor/worker-executor.js +224 -4
- package/dist/executor/worker-executor.js.map +1 -1
- package/dist/executor/worker-threads-executor.d.ts +245 -0
- package/dist/executor/worker-threads-executor.d.ts.map +1 -0
- package/dist/executor/worker-threads-executor.js +332 -0
- package/dist/executor/worker-threads-executor.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/dist/renderers/latex-renderer.d.ts +60 -0
- package/dist/renderers/latex-renderer.d.ts.map +1 -1
- package/dist/renderers/latex-renderer.js +299 -0
- package/dist/renderers/latex-renderer.js.map +1 -1
- package/dist/renderers/types.d.ts +9 -0
- package/dist/renderers/types.d.ts.map +1 -1
- package/dist/renderers/types.js.map +1 -1
- package/dist/robustness/index.d.ts +5 -2
- package/dist/robustness/index.d.ts.map +1 -1
- package/dist/robustness/index.js +4 -2
- package/dist/robustness/index.js.map +1 -1
- package/dist/types/evaluator.d.ts +449 -0
- package/dist/types/evaluator.d.ts.map +1 -0
- package/dist/types/evaluator.js +9 -0
- package/dist/types/evaluator.js.map +1 -0
- package/dist/types/result.d.ts +2 -0
- package/dist/types/result.d.ts.map +1 -1
- package/package.json +1 -1
- package/dist/claims/__tests__/evaluator.unit.test.d.ts +0 -12
- package/dist/claims/__tests__/evaluator.unit.test.d.ts.map +0 -1
- package/dist/claims/__tests__/evaluator.unit.test.js +0 -801
- package/dist/claims/__tests__/evaluator.unit.test.js.map +0 -1
- package/dist/claims/evaluator.d.ts +0 -33
- package/dist/claims/evaluator.d.ts.map +0 -1
- package/dist/claims/evaluator.js +0 -174
- package/dist/claims/evaluator.js.map +0 -1
- package/dist/robustness/__tests__/analyzer.unit.test.d.ts +0 -11
- package/dist/robustness/__tests__/analyzer.unit.test.d.ts.map +0 -1
- package/dist/robustness/__tests__/analyzer.unit.test.js +0 -455
- package/dist/robustness/__tests__/analyzer.unit.test.js.map +0 -1
- package/dist/robustness/analyzer.d.ts +0 -61
- package/dist/robustness/analyzer.d.ts.map +0 -1
- package/dist/robustness/analyzer.js +0 -191
- package/dist/robustness/analyzer.js.map +0 -1
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluator Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Core abstractions for the extensible evaluation system.
|
|
5
|
+
* All evaluators implement the Evaluator interface, enabling
|
|
6
|
+
* a unified API for claims, robustness, metrics, and custom evaluations.
|
|
7
|
+
*/
|
|
8
|
+
import type { AggregatedResult } from "./aggregate.js";
|
|
9
|
+
import type { EvaluationResult } from "./result.js";
|
|
10
|
+
/**
|
|
11
|
+
* Built-in evaluation types.
|
|
12
|
+
* Custom types can be registered at runtime.
|
|
13
|
+
*/
|
|
14
|
+
export type EvaluationType = "claims" | "robustness" | "metrics" | "exploratory" | "custom";
|
|
15
|
+
/**
|
|
16
|
+
* Base configuration for any evaluator.
|
|
17
|
+
*/
|
|
18
|
+
export interface EvaluatorConfig {
|
|
19
|
+
/** Human-readable name for this evaluator */
|
|
20
|
+
name?: string;
|
|
21
|
+
/** Optional description */
|
|
22
|
+
description?: string;
|
|
23
|
+
/** Additional evaluator-specific options */
|
|
24
|
+
options?: Record<string, unknown>;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Result of validating evaluator configuration.
|
|
28
|
+
*/
|
|
29
|
+
export interface ValidationResult {
|
|
30
|
+
/** Whether the configuration is valid */
|
|
31
|
+
valid: boolean;
|
|
32
|
+
/** Error messages if invalid */
|
|
33
|
+
errors?: string[];
|
|
34
|
+
/** Warnings for non-fatal issues */
|
|
35
|
+
warnings?: string[];
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Abstract evaluator interface for registry storage and collections.
|
|
39
|
+
*
|
|
40
|
+
* This non-generic interface uses `unknown` to accommodate any evaluator type.
|
|
41
|
+
* It enables evaluators to be stored in collections without type parameter conflicts.
|
|
42
|
+
*
|
|
43
|
+
* For type-safe operations, use the `Evaluator<TConfig, TInput, TOutput>` type instead.
|
|
44
|
+
*/
|
|
45
|
+
export interface IEvaluator {
|
|
46
|
+
/** Unique type identifier for this evaluator */
|
|
47
|
+
readonly type: EvaluationType;
|
|
48
|
+
/**
|
|
49
|
+
* Validate configuration before evaluation.
|
|
50
|
+
*
|
|
51
|
+
* @param config - Configuration to validate (unknown for compatibility)
|
|
52
|
+
* @returns Validation result
|
|
53
|
+
*/
|
|
54
|
+
validateConfig(config: unknown): ValidationResult;
|
|
55
|
+
/**
|
|
56
|
+
* Perform evaluation.
|
|
57
|
+
*
|
|
58
|
+
* @param config - Evaluator configuration (unknown for compatibility)
|
|
59
|
+
* @param input - Input data for evaluation (unknown for compatibility)
|
|
60
|
+
* @returns Evaluation output
|
|
61
|
+
*/
|
|
62
|
+
evaluate(config: unknown, input: unknown): EvaluationOutput<unknown>;
|
|
63
|
+
/**
|
|
64
|
+
* Create a summary of evaluation results.
|
|
65
|
+
*
|
|
66
|
+
* @param output - Evaluation output to summarize
|
|
67
|
+
* @returns Summary statistics
|
|
68
|
+
*/
|
|
69
|
+
summarize(output: EvaluationOutput<unknown>): EvaluationSummary;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Generic evaluator type for type-safe evaluator implementations.
|
|
73
|
+
*
|
|
74
|
+
* This intersection type combines `IEvaluator` with type-specific method signatures.
|
|
75
|
+
* Classes that implement both interfaces (via structural typing) automatically satisfy
|
|
76
|
+
* this type - no code duplication needed.
|
|
77
|
+
*
|
|
78
|
+
* @template TConfig - Configuration type for this evaluator
|
|
79
|
+
* @template TInput - Input type for evaluation
|
|
80
|
+
* @template TOutput - Output type for evaluation results
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* ```ts
|
|
84
|
+
* class MyEvaluator implements Evaluator<MyConfig, MyInput, MyOutput>, IEvaluator {
|
|
85
|
+
* readonly type = "custom" as const;
|
|
86
|
+
*
|
|
87
|
+
* // Generic type-safe methods (for Evaluator<>)
|
|
88
|
+
* validateConfig(config: MyConfig): ValidationResult { ... }
|
|
89
|
+
* evaluate(config: MyConfig, input: MyInput): EvaluationOutput<MyOutput> { ... }
|
|
90
|
+
* summarize(output: EvaluationOutput<MyOutput>): EvaluationSummary { ... }
|
|
91
|
+
*
|
|
92
|
+
* // Unknown methods are automatically satisfied by structural typing
|
|
93
|
+
* // No need to duplicate - TypeScript treats MyConfig as assignable to unknown
|
|
94
|
+
* }
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
97
|
+
export type Evaluator<TConfig extends EvaluatorConfig, TInput, TOutput> = IEvaluator & {
|
|
98
|
+
validateConfig(config: TConfig): ValidationResult;
|
|
99
|
+
evaluate(config: TConfig, input: TInput): EvaluationOutput<TOutput>;
|
|
100
|
+
summarize(output: EvaluationOutput<TOutput>): EvaluationSummary;
|
|
101
|
+
};
|
|
102
|
+
/**
|
|
103
|
+
* Generic evaluation output wrapper.
|
|
104
|
+
*
|
|
105
|
+
* All evaluators produce output in this format, providing
|
|
106
|
+
* a consistent shape for the renderer layer.
|
|
107
|
+
*
|
|
108
|
+
* @template T - Type-specific data
|
|
109
|
+
*/
|
|
110
|
+
export interface EvaluationOutput<T> {
|
|
111
|
+
/** Evaluator type that produced this output */
|
|
112
|
+
type: EvaluationType;
|
|
113
|
+
/** Schema version for this output format */
|
|
114
|
+
version: string;
|
|
115
|
+
/** Generation timestamp */
|
|
116
|
+
timestamp: string;
|
|
117
|
+
/** Type-specific evaluation data */
|
|
118
|
+
data: T;
|
|
119
|
+
/** Optional metadata */
|
|
120
|
+
metadata?: {
|
|
121
|
+
/** Input file or source */
|
|
122
|
+
inputSource?: string;
|
|
123
|
+
/** Configuration used */
|
|
124
|
+
config?: EvaluatorConfig;
|
|
125
|
+
/** Additional metadata */
|
|
126
|
+
[key: string]: unknown;
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Generic evaluation summary.
|
|
131
|
+
*/
|
|
132
|
+
export interface EvaluationSummary {
|
|
133
|
+
/** Total number of items evaluated */
|
|
134
|
+
total: number;
|
|
135
|
+
/** Number of items that passed criteria */
|
|
136
|
+
passed?: number;
|
|
137
|
+
/** Number of items that failed criteria */
|
|
138
|
+
failed?: number;
|
|
139
|
+
/** Number of items with inconclusive results */
|
|
140
|
+
inconclusive?: number;
|
|
141
|
+
/** Pass rate (passed / total) */
|
|
142
|
+
passRate?: number;
|
|
143
|
+
/** Additional summary metrics */
|
|
144
|
+
additional?: Record<string, number | string>;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Evaluation context for evaluators that work with aggregates.
|
|
148
|
+
*/
|
|
149
|
+
export interface EvaluationContext {
|
|
150
|
+
/** Aggregated results */
|
|
151
|
+
aggregates: AggregatedResult[];
|
|
152
|
+
/** Optional raw results for more detailed analysis */
|
|
153
|
+
rawResults?: EvaluationResult[];
|
|
154
|
+
/** Metadata about the evaluation */
|
|
155
|
+
metadata?: {
|
|
156
|
+
/** Source of the aggregates */
|
|
157
|
+
source?: string;
|
|
158
|
+
/** Configuration hash for reproducibility */
|
|
159
|
+
configHash?: string;
|
|
160
|
+
/** Timestamp when aggregates were generated */
|
|
161
|
+
aggregatesTimestamp?: string;
|
|
162
|
+
/** Additional metadata */
|
|
163
|
+
[key: string]: unknown;
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
import type { ClaimEvaluationSummary, EvaluationClaim } from "./claims.js";
|
|
167
|
+
/**
|
|
168
|
+
* Configuration for the claims evaluator.
|
|
169
|
+
*/
|
|
170
|
+
export interface ClaimsEvaluatorConfig extends EvaluatorConfig {
|
|
171
|
+
/** Claims to evaluate */
|
|
172
|
+
claims: EvaluationClaim[];
|
|
173
|
+
/** Optional significance level override */
|
|
174
|
+
significanceLevel?: number;
|
|
175
|
+
/** Optional minimum effect size override */
|
|
176
|
+
minEffectSize?: number;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Data type for claims evaluator output.
|
|
180
|
+
*/
|
|
181
|
+
export type ClaimsEvaluatorData = ClaimEvaluationSummary;
|
|
182
|
+
import type { RobustnessAnalysisOutput } from "./perturbation.js";
|
|
183
|
+
/**
|
|
184
|
+
* Configuration for the robustness evaluator.
|
|
185
|
+
*/
|
|
186
|
+
export interface RobustnessEvaluatorConfig extends EvaluatorConfig {
|
|
187
|
+
/** Metrics to analyze */
|
|
188
|
+
metrics: string[];
|
|
189
|
+
/** Perturbations applied */
|
|
190
|
+
perturbations: string[];
|
|
191
|
+
/** Intensity levels tested (if applicable) */
|
|
192
|
+
intensityLevels?: number[];
|
|
193
|
+
/** Number of runs per perturbation level */
|
|
194
|
+
runsPerLevel?: number;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Data type for robustness evaluator output.
|
|
198
|
+
*/
|
|
199
|
+
export type RobustnessEvaluatorData = RobustnessAnalysisOutput;
|
|
200
|
+
/**
|
|
201
|
+
* Criterion type for metrics evaluation.
|
|
202
|
+
*/
|
|
203
|
+
export type MetricsCriterionType = "threshold" | "baseline" | "target-range";
|
|
204
|
+
/**
|
|
205
|
+
* A single metrics evaluation criterion.
|
|
206
|
+
*/
|
|
207
|
+
export interface MetricsCriterion {
|
|
208
|
+
/** Unique identifier */
|
|
209
|
+
criterionId: string;
|
|
210
|
+
/** Human-readable description */
|
|
211
|
+
description: string;
|
|
212
|
+
/** Type of criterion */
|
|
213
|
+
type: MetricsCriterionType;
|
|
214
|
+
/** Metric to evaluate */
|
|
215
|
+
metric: string;
|
|
216
|
+
/** SUT to evaluate (or "*" for all SUTs) */
|
|
217
|
+
sut: string;
|
|
218
|
+
/** For threshold: operator and value */
|
|
219
|
+
threshold?: {
|
|
220
|
+
operator: "gt" | "gte" | "lt" | "lte" | "eq";
|
|
221
|
+
value: number;
|
|
222
|
+
};
|
|
223
|
+
/** For baseline: baseline SUT and comparison */
|
|
224
|
+
baseline?: {
|
|
225
|
+
sut: string;
|
|
226
|
+
operator: "gt" | "gte" | "lt" | "lte" | "eq";
|
|
227
|
+
};
|
|
228
|
+
/** For target-range: min and max values */
|
|
229
|
+
targetRange?: {
|
|
230
|
+
min?: number;
|
|
231
|
+
max?: number;
|
|
232
|
+
minInclusive?: boolean;
|
|
233
|
+
maxInclusive?: boolean;
|
|
234
|
+
};
|
|
235
|
+
/** Optional scope constraints */
|
|
236
|
+
scopeConstraints?: {
|
|
237
|
+
caseClass?: string | string[];
|
|
238
|
+
};
|
|
239
|
+
/** Tags for filtering */
|
|
240
|
+
tags?: readonly string[];
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Result of evaluating a single metrics criterion.
|
|
244
|
+
*/
|
|
245
|
+
export interface MetricsCriterionResult {
|
|
246
|
+
/** The criterion being evaluated */
|
|
247
|
+
criterion: MetricsCriterion;
|
|
248
|
+
/** Pass/fail status */
|
|
249
|
+
status: "pass" | "fail" | "inconclusive";
|
|
250
|
+
/** Actual value(s) observed */
|
|
251
|
+
observed: {
|
|
252
|
+
sut: string;
|
|
253
|
+
value: number;
|
|
254
|
+
}[];
|
|
255
|
+
/** Expected value or range */
|
|
256
|
+
expected: {
|
|
257
|
+
type: MetricsCriterionType;
|
|
258
|
+
threshold?: number;
|
|
259
|
+
baselineValue?: number;
|
|
260
|
+
targetRange?: {
|
|
261
|
+
min?: number;
|
|
262
|
+
max?: number;
|
|
263
|
+
};
|
|
264
|
+
};
|
|
265
|
+
/** Reason for inconclusive status (if applicable) */
|
|
266
|
+
inconclusiveReason?: string;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Summary of metrics evaluation.
|
|
270
|
+
*/
|
|
271
|
+
export interface MetricsEvaluationSummary {
|
|
272
|
+
/** Schema version */
|
|
273
|
+
version: string;
|
|
274
|
+
/** Generation timestamp */
|
|
275
|
+
timestamp: string;
|
|
276
|
+
/** Individual criterion results */
|
|
277
|
+
results: MetricsCriterionResult[];
|
|
278
|
+
/** Summary statistics */
|
|
279
|
+
summary: {
|
|
280
|
+
/** Total criteria evaluated */
|
|
281
|
+
total: number;
|
|
282
|
+
/** Criteria passed */
|
|
283
|
+
passed: number;
|
|
284
|
+
/** Criteria failed */
|
|
285
|
+
failed: number;
|
|
286
|
+
/** Criteria inconclusive */
|
|
287
|
+
inconclusive: number;
|
|
288
|
+
/** Overall pass rate */
|
|
289
|
+
passRate: number;
|
|
290
|
+
/** Pass rate by SUT */
|
|
291
|
+
passRateBySut: Record<string, number>;
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Configuration for the metrics evaluator.
|
|
296
|
+
*/
|
|
297
|
+
export interface MetricsEvaluatorConfig extends EvaluatorConfig {
|
|
298
|
+
/** Criteria to evaluate */
|
|
299
|
+
criteria: MetricsCriterion[];
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Data type for metrics evaluator output.
|
|
303
|
+
*/
|
|
304
|
+
export type MetricsEvaluatorData = MetricsEvaluationSummary;
|
|
305
|
+
/**
|
|
306
|
+
* Configuration for a custom evaluator.
|
|
307
|
+
* Users can extend this with their own properties.
|
|
308
|
+
*/
|
|
309
|
+
export interface CustomEvaluatorConfig extends EvaluatorConfig {
|
|
310
|
+
/** Custom evaluator type name */
|
|
311
|
+
customType: string;
|
|
312
|
+
/** Additional custom properties */
|
|
313
|
+
[key: string]: unknown;
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Data type for custom evaluator output.
|
|
317
|
+
* Users can define their own output structure.
|
|
318
|
+
*/
|
|
319
|
+
export type CustomEvaluatorData = Record<string, unknown>;
|
|
320
|
+
/**
|
|
321
|
+
* Metric direction for ranking interpretation.
|
|
322
|
+
*/
|
|
323
|
+
export type MetricDirection = "higher-better" | "lower-better";
|
|
324
|
+
/**
|
|
325
|
+
* Ranking of a SUT for a specific metric.
|
|
326
|
+
*/
|
|
327
|
+
export interface SutMetricRanking {
|
|
328
|
+
/** SUT identifier */
|
|
329
|
+
sut: string;
|
|
330
|
+
/** Mean value for this metric */
|
|
331
|
+
mean: number;
|
|
332
|
+
/** Median value for this metric */
|
|
333
|
+
median: number;
|
|
334
|
+
/** Standard deviation */
|
|
335
|
+
std?: number;
|
|
336
|
+
/** Rank (1 = best based on metric direction) */
|
|
337
|
+
rank: number;
|
|
338
|
+
/** Number of observations */
|
|
339
|
+
n: number;
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Pairwise comparison between two SUTs.
|
|
343
|
+
*/
|
|
344
|
+
export interface PairwiseComparison {
|
|
345
|
+
/** First SUT identifier */
|
|
346
|
+
sutA: string;
|
|
347
|
+
/** Second SUT identifier */
|
|
348
|
+
sutB: string;
|
|
349
|
+
/** Metric being compared */
|
|
350
|
+
metric: string;
|
|
351
|
+
/** Difference (sutA - sutB) */
|
|
352
|
+
delta: number;
|
|
353
|
+
/** Ratio (sutA / sutB) */
|
|
354
|
+
ratio: number;
|
|
355
|
+
/** p-value from statistical test */
|
|
356
|
+
pValue?: number;
|
|
357
|
+
/** Effect size (Cohen's d or similar) */
|
|
358
|
+
effectSize?: number;
|
|
359
|
+
/** Whether the difference is statistically significant */
|
|
360
|
+
significant: boolean;
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Effect of a case class on SUT performance.
|
|
364
|
+
*/
|
|
365
|
+
export interface CaseClassEffect {
|
|
366
|
+
/** Case class identifier */
|
|
367
|
+
caseClass: string;
|
|
368
|
+
/** SUT identifier */
|
|
369
|
+
sut: string;
|
|
370
|
+
/** Metric being analyzed */
|
|
371
|
+
metric: string;
|
|
372
|
+
/** Deviation from the SUT's overall mean for this metric */
|
|
373
|
+
deviationFromMean: number;
|
|
374
|
+
/** Percentage deviation from mean */
|
|
375
|
+
percentageDeviation?: number;
|
|
376
|
+
/** Whether the effect is statistically significant */
|
|
377
|
+
significant: boolean;
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Correlation between two metrics.
|
|
381
|
+
*/
|
|
382
|
+
export interface MetricCorrelation {
|
|
383
|
+
/** First metric */
|
|
384
|
+
metricA: string;
|
|
385
|
+
/** Second metric */
|
|
386
|
+
metricB: string;
|
|
387
|
+
/** Pearson correlation coefficient */
|
|
388
|
+
pearsonR: number;
|
|
389
|
+
/** Spearman rank correlation coefficient */
|
|
390
|
+
spearmanRho?: number;
|
|
391
|
+
/** Human-readable interpretation */
|
|
392
|
+
interpretation: string;
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Configuration for the exploratory evaluator.
|
|
396
|
+
*/
|
|
397
|
+
export interface ExploratoryEvaluatorConfig extends EvaluatorConfig {
|
|
398
|
+
/** Metrics to analyze (if not specified, all available metrics are used) */
|
|
399
|
+
metrics?: string[];
|
|
400
|
+
/** SUTs to include (if not specified, all available SUTs are used) */
|
|
401
|
+
suts?: string[];
|
|
402
|
+
/** Metric directions for ranking interpretation */
|
|
403
|
+
metricDirections?: Record<string, MetricDirection>;
|
|
404
|
+
/** Significance level for statistical tests (default: 0.05) */
|
|
405
|
+
significanceLevel?: number;
|
|
406
|
+
/** Minimum effect size to consider meaningful */
|
|
407
|
+
minEffectSize?: number;
|
|
408
|
+
/** Whether to compute metric correlations */
|
|
409
|
+
computeCorrelations?: boolean;
|
|
410
|
+
/** Whether to analyze case-class effects */
|
|
411
|
+
analyzeCaseClassEffects?: boolean;
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Summary of exploratory evaluation results.
|
|
415
|
+
*/
|
|
416
|
+
export interface ExploratoryEvaluationSummary {
|
|
417
|
+
/** Schema version */
|
|
418
|
+
version: string;
|
|
419
|
+
/** Generation timestamp */
|
|
420
|
+
timestamp: string;
|
|
421
|
+
/** SUT rankings per metric */
|
|
422
|
+
rankings: Record<string, SutMetricRanking[]>;
|
|
423
|
+
/** Pairwise comparisons between SUTs */
|
|
424
|
+
pairwiseComparisons: PairwiseComparison[];
|
|
425
|
+
/** Case-class effects (if analyzed) */
|
|
426
|
+
caseClassEffects?: CaseClassEffect[];
|
|
427
|
+
/** Metric correlations (if computed) */
|
|
428
|
+
metricCorrelations?: MetricCorrelation[];
|
|
429
|
+
/** Summary statistics */
|
|
430
|
+
summary: {
|
|
431
|
+
/** Number of SUTs analyzed */
|
|
432
|
+
sutsAnalyzed: number;
|
|
433
|
+
/** Number of metrics analyzed */
|
|
434
|
+
metricsAnalyzed: number;
|
|
435
|
+
/** Number of pairwise comparisons */
|
|
436
|
+
pairwiseComparisonsCount: number;
|
|
437
|
+
/** Number of significant differences found */
|
|
438
|
+
significantDifferences: number;
|
|
439
|
+
/** Number of case classes analyzed */
|
|
440
|
+
caseClassesAnalyzed?: number;
|
|
441
|
+
/** Best SUT per metric */
|
|
442
|
+
bestSutPerMetric: Record<string, string>;
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Data type for exploratory evaluator output.
|
|
447
|
+
*/
|
|
448
|
+
export type ExploratoryEvaluatorData = ExploratoryEvaluationSummary;
|
|
449
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../src/types/evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAEpD;;;GAGG;AACH,MAAM,MAAM,cAAc,GAAG,QAAQ,GAAG,YAAY,GAAG,SAAS,GAAG,aAAa,GAAG,QAAQ,CAAC;AAE5F;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,6CAA6C;IAC7C,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,4CAA4C;IAC5C,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,yCAAyC;IACzC,KAAK,EAAE,OAAO,CAAC;IAEf,gCAAgC;IAChC,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAElB,oCAAoC;IACpC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,UAAU;IAC1B,gDAAgD;IAChD,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC;IAE9B;;;;;OAKG;IACH,cAAc,CAAC,MAAM,EAAE,OAAO,GAAG,gBAAgB,CAAC;IAElD;;;;;;OAMG;IACH,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAErE;;;;;OAKG;IACH,SAAS,CAAC,MAAM,EAAE,gBAAgB,CAAC,OAAO,CAAC,GAAG,iBAAiB,CAAC;CAChE;AAED;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,MAAM,MAAM,SAAS,CAAC,OAAO,SAAS,eAAe,EAAE,MAAM,EAAE,OAAO,IAAI,UAAU,GAAG;IACtF,cAAc,CAAC,MAAM,EAAE,OAAO,GAAG,gBAAgB,CAAC;IAClD,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACpE,SAAS,CAAC,MAAM,EAAE,gBAAgB,CAAC,OAAO,CAAC,GAAG,iBAAiB,CAAC;CAChE,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,WAAW,gBAAgB,CAAC,CAAC;IAClC,+CAA+C;IAC/C,IAAI,EAAE,cAAc,CAAC;IAErB,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,oCAAoC;IACpC,IAAI,EAAE,CAAC,CAAC;IAER,wBAAwB;IACxB,QAAQ,CAAC,EAAE;QACV,2BAA2B;QAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;QAErB,yBAAyB;QACzB,MAAM,CAAC,EAAE,eAAe,CAAC;QAEzB,0BAA0B;QAC1B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;KACvB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,sCAAsC;IACtC,KAAK,EAAE,MAAM,CAAC;IAEd,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,gDAAgD;IAChD,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,iCAAiC;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,iCAAiC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,yBAAyB;IACzB,UAAU,EAAE,gBAAgB,EAAE,CAAC;IAE/B,sDAAsD;IACtD,UAAU,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAEhC,oCAAoC;IACpC,QAAQ,CAAC,EAAE;QACV,+BAA+B;QAC/B,MAAM,CAAC,EAAE,MAAM,CAAC;QAEhB,6CAA6C;QAC7C,UAAU,CAAC,EAAE,MAAM,CAAC;QAEpB,+CAA+C;QAC/C,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAE7B,0BAA0B;QAC1B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;KACvB,CAAC;CACF;AAMD,OAAO,KAAK,EAAE,sBAAsB,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE3E;;GAEG;AACH,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC7D,yBAAyB;IACzB,MAAM,EAAE,eAAe,EAAE,CAAC;IAE1B,2CAA2C;IAC3C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B,4CAA4C;IAC5C,aAAa,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,sBAAsB,CAAC;AAMzD,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,mBAAmB,CAAC;AAElE;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,eAAe;IACjE,yBAAyB;IACzB,OAAO,EAAE,MAAM,EAAE,CAAC;IAElB,4BAA4B;IAC5B,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB,8CAA8C;IAC9C,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAE3B,4CAA4C;IAC5C,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,MAAM,uBAAuB,GAAG,wBAAwB,CAAC;AAM/D;;GAEG;AACH,MAAM,MAAM,oBAAoB,GAAG,WAAW,GAAG,UAAU,GAAG,cAAc,CAAC;AAE7E;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,wBAAwB;IACxB,WAAW,EAAE,MAAM,CAAC;IAEpB,iCAAiC;IACjC,WAAW,EAAE,MAAM,CAAC;IAEpB,wBAAwB;IACxB,IAAI,EAAE,oBAAoB,CAAC;IAE3B,yBAAyB;IACzB,MAAM,EAAE,MAAM,CAAC;IAEf,4CAA4C;IAC5C,GAAG,EAAE,MAAM,CAAC;IAEZ,wCAAwC;IACxC,SAAS,CAAC,EAAE;QACX,QAAQ,EAAE,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,GAAG,IAAI,CAAC;QAC7C,KAAK,EAAE,MAAM,CAAC;KACd,CAAC;IAEF,gDAAgD;IAChD,QAAQ,CAAC,EAAE;QACV,GAAG,EAAE,MAAM,CAAC;QACZ,QAAQ,EAAE,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,GAAG,IAAI,CAAC;KAC7C,CAAC;IAEF,2CAA2C;IAC3C,WAAW,CAAC,EAAE;QACb,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,YAAY,CAAC,EAAE,OAAO,CAAC;KACvB,CAAC;IAEF,iCAAiC;IACjC,gBAAgB,CAAC,EAAE;QAClB,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;KAC9B,CAAC;IAEF,yBAAyB;IACzB,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACtC,oCAAoC;IACpC,SAAS,EAAE,gBAAgB,CAAC;IAE5B,uBAAuB;IACvB,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,cAAc,CAAC;IAEzC,+BAA+B;IAC/B,QAAQ,EAAE;QACT,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;KACd,EAAE,CAAC;IAEJ,8BAA8B;IAC9B,QAAQ,EAAE;QACT,IAAI,EAAE,oBAAoB,CAAC;QAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,WAAW,CAAC,EAAE;YAAE,GAAG,CAAC,EAAE,MAAM,CAAC;YAAC,GAAG,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;KAC7C,CAAC;IAEF,qDAAqD;IACrD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACxC,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,mCAAmC;IACnC,OAAO,EAAE,sBAAsB,EAAE,CAAC;IAElC,yBAAyB;IACzB,OAAO,EAAE;QACR,+BAA+B;QAC/B,KAAK,EAAE,MAAM,CAAC;QAEd,sBAAsB;QACtB,MAAM,EAAE,MAAM,CAAC;QAEf,sBAAsB;QACtB,MAAM,EAAE,MAAM,CAAC;QAEf,4BAA4B;QAC5B,YAAY,EAAE,MAAM,CAAC;QAErB,wBAAwB;QACxB,QAAQ,EAAE,MAAM,CAAC;QAEjB,uBAAuB;QACvB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACtC,CAAC;CACF;AAED;;GAEG;AACH,MAAM,WAAW,sBAAuB,SAAQ,eAAe;IAC9D,2BAA2B;IAC3B,QAAQ,EAAE,gBAAgB,EAAE,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,MAAM,oBAAoB,GAAG,wBAAwB,CAAC;AAM5D;;;GAGG;AACH,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC7D,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IAEnB,mCAAmC;IACnC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,MAAM,mBAAmB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAM1D;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,eAAe,GAAG,cAAc,CAAC;AAE/D;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,qBAAqB;IACrB,GAAG,EAAE,MAAM,CAAC;IAEZ,iCAAiC;IACjC,IAAI,EAAE,MAAM,CAAC;IAEb,mCAAmC;IACnC,MAAM,EAAE,MAAM,CAAC;IAEf,yBAAyB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,gDAAgD;IAChD,IAAI,EAAE,MAAM,CAAC;IAEb,6BAA6B;IAC7B,CAAC,EAAE,MAAM,CAAC;CACV;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,2BAA2B;IAC3B,IAAI,EAAE,MAAM,CAAC;IAEb,4BAA4B;IAC5B,IAAI,EAAE,MAAM,CAAC;IAEb,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAC;IAEf,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAC;IAEd,0BAA0B;IAC1B,KAAK,EAAE,MAAM,CAAC;IAEd,oCAAoC;IACpC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,yCAAyC;IACzC,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,0DAA0D;IAC1D,WAAW,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,4BAA4B;IAC5B,SAAS,EAAE,MAAM,CAAC;IAElB,qBAAqB;IACrB,GAAG,EAAE,MAAM,CAAC;IAEZ,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAC;IAEf,4DAA4D;IAC5D,iBAAiB,EAAE,MAAM,CAAC;IAE1B,qCAAqC;IACrC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B,sDAAsD;IACtD,WAAW,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,mBAAmB;IACnB,OAAO,EAAE,MAAM,CAAC;IAEhB,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC;IAEhB,sCAAsC;IACtC,QAAQ,EAAE,MAAM,CAAC;IAEjB,4CAA4C;IAC5C,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,oCAAoC;IACpC,cAAc,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,0BAA2B,SAAQ,eAAe;IAClE,4EAA4E;IAC5E,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IAEnB,sEAAsE;IACtE,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAEhB,mDAAmD;IACnD,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAEnD,+DAA+D;IAC/D,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B,iDAAiD;IACjD,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,6CAA6C;IAC7C,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAE9B,4CAA4C;IAC5C,uBAAuB,CAAC,EAAE,OAAO,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,4BAA4B;IAC5C,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,8BAA8B;IAC9B,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,EAAE,CAAC,CAAC;IAE7C,wCAAwC;IACxC,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;IAE1C,uCAAuC;IACvC,gBAAgB,CAAC,EAAE,eAAe,EAAE,CAAC;IAErC,wCAAwC;IACxC,kBAAkB,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAEzC,yBAAyB;IACzB,OAAO,EAAE;QACR,8BAA8B;QAC9B,YAAY,EAAE,MAAM,CAAC;QAErB,iCAAiC;QACjC,eAAe,EAAE,MAAM,CAAC;QAExB,qCAAqC;QACrC,wBAAwB,EAAE,MAAM,CAAC;QAEjC,8CAA8C;QAC9C,sBAAsB,EAAE,MAAM,CAAC;QAE/B,sCAAsC;QACtC,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAE7B,0BAA0B;QAC1B,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACzC,CAAC;CACF;AAED;;GAEG;AACH,MAAM,MAAM,wBAAwB,GAAG,4BAA4B,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluator Type Definitions
|
|
3
|
+
*
|
|
4
|
+
* Core abstractions for the extensible evaluation system.
|
|
5
|
+
* All evaluators implement the Evaluator interface, enabling
|
|
6
|
+
* a unified API for claims, robustness, metrics, and custom evaluations.
|
|
7
|
+
*/
|
|
8
|
+
export {};
|
|
9
|
+
//# sourceMappingURL=evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../src/types/evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
|
package/dist/types/result.d.ts
CHANGED
|
@@ -88,6 +88,8 @@ export interface ResultMetrics {
|
|
|
88
88
|
numeric: Record<string, number>;
|
|
89
89
|
/** Additional metrics (overflow) */
|
|
90
90
|
extra?: Record<string, number>;
|
|
91
|
+
/** Allow arbitrary number properties for scenario-specific metrics */
|
|
92
|
+
[key: string]: number | Record<string, number> | undefined;
|
|
91
93
|
}
|
|
92
94
|
/**
|
|
93
95
|
* Provenance information for reproducibility.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"result.d.ts","sourceRoot":"","sources":["../../src/types/result.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAExC;;GAEG;AACH,MAAM,MAAM,WAAW,GACpB,WAAW,GACX,mBAAmB,GACnB,sBAAsB,GACtB,WAAW,GACX,iBAAiB,GACjB,SAAS,CAAC;AAEb;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,4CAA4C;IAC5C,KAAK,EAAE,MAAM,CAAC;IAEd,qBAAqB;IACrB,GAAG,EAAE,MAAM,CAAC;IAEZ,yCAAyC;IACzC,OAAO,EAAE,OAAO,CAAC;IAEjB,sCAAsC;IACtC,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IAEf,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAEnC,gCAAgC;IAChC,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,6CAA6C;IAC7C,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,wDAAwD;IACxD,cAAc,EAAE,OAAO,CAAC;IAExB,0CAA0C;IAC1C,cAAc,EAAE,OAAO,CAAC;IAExB,2CAA2C;IAC3C,KAAK,EAAE,OAAO,CAAC;IAEf,0DAA0D;IAC1D,eAAe,EAAE,OAAO,GAAG,IAAI,CAAC;IAEhC,2CAA2C;IAC3C,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B,mCAAmC;IACnC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IAEf,0BAA0B;IAC1B,KAAK,EAAE,MAAM,CAAC;IAEd,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,4BAA4B;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;IAElD,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAEnC,sBAAsB;IACtB,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;IAEvB,wCAAwC;IACxC,SAAS,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAEhC,iCAAiC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,8BAA8B;IAC9B,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEhC,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"result.d.ts","sourceRoot":"","sources":["../../src/types/result.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC9D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAExC;;GAEG;AACH,MAAM,MAAM,WAAW,GACpB,WAAW,GACX,mBAAmB,GACnB,sBAAsB,GACtB,WAAW,GACX,iBAAiB,GACjB,SAAS,CAAC;AAEb;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,4CAA4C;IAC5C,KAAK,EAAE,MAAM,CAAC;IAEd,qBAAqB;IACrB,GAAG,EAAE,MAAM,CAAC;IAEZ,yCAAyC;IACzC,OAAO,EAAE,OAAO,CAAC;IAEjB,sCAAsC;IACtC,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IAEf,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAEnC,gCAAgC;IAChC,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,6CAA6C;IAC7C,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,wDAAwD;IACxD,cAAc,EAAE,OAAO,CAAC;IAExB,0CAA0C;IAC1C,cAAc,EAAE,OAAO,CAAC;IAExB,2CAA2C;IAC3C,KAAK,EAAE,OAAO,CAAC;IAEf,0DAA0D;IAC1D,eAAe,EAAE,OAAO,GAAG,IAAI,CAAC;IAEhC,2CAA2C;IAC3C,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B,mCAAmC;IACnC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IAEf,0BAA0B;IAC1B,KAAK,EAAE,MAAM,CAAC;IAEd,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,4BAA4B;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;IAElD,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAEnC,sBAAsB;IACtB,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;IAEvB,wCAAwC;IACxC,SAAS,CAAC,EAAE,iBAAiB,EAAE,CAAC;IAEhC,iCAAiC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,8BAA8B;IAC9B,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEhC,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE/B,sEAAsE;IACtE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,SAAS,CAAC;CAC3D;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,4BAA4B;IAC5B,OAAO,EAAE;QACR,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;KACpB,CAAC;IAEF,sBAAsB;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,wDAAwD;IACxD,KAAK,CAAC,EAAE,OAAO,CAAC;IAEhB,uDAAuD;IACvD,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B,2CAA2C;IAC3C,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IAExB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,gDAAgD;IAChD,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB,iDAAiD;IACjD,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB,yCAAyC;IACzC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAChC,+BAA+B;IAC/B,GAAG,EAAE,UAAU,CAAC;IAEhB,6BAA6B;IAC7B,WAAW,EAAE,iBAAiB,CAAC;IAE/B,qCAAqC;IACrC,OAAO,EAAE,aAAa,CAAC;IAEvB,sBAAsB;IACtB,OAAO,EAAE,aAAa,CAAC;IAEvB,qCAAqC;IACrC,UAAU,EAAE,UAAU,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,gCAAgC;IAChC,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAE5B,oCAAoC;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACrC"}
|
package/package.json
CHANGED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Unit tests for Claims Evaluator
|
|
3
|
-
*
|
|
4
|
-
* Tests the evaluateClaim function and related functionality including:
|
|
5
|
-
* - Satisfied claims with various directions
|
|
6
|
-
* - Violated claims
|
|
7
|
-
* - Inconclusive results from missing data
|
|
8
|
-
* - Evidence computation (delta, ratio, pValue, effectSize)
|
|
9
|
-
* - Scope filtering with caseClass constraints
|
|
10
|
-
*/
|
|
11
|
-
export {};
|
|
12
|
-
//# sourceMappingURL=evaluator.unit.test.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator.unit.test.d.ts","sourceRoot":"","sources":["../../../src/claims/__tests__/evaluator.unit.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG"}
|