ppef 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +270 -6
  2. package/dist/__tests__/cli/evaluate-command.integration.test.js +60 -0
  3. package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -1
  4. package/dist/__tests__/examples.integration.test.d.ts +8 -0
  5. package/dist/__tests__/examples.integration.test.d.ts.map +1 -0
  6. package/dist/__tests__/examples.integration.test.js +236 -0
  7. package/dist/__tests__/examples.integration.test.js.map +1 -0
  8. package/dist/cli/__tests__/commands.unit.test.js +12 -5
  9. package/dist/cli/__tests__/commands.unit.test.js.map +1 -1
  10. package/dist/cli/__tests__/config-loader.unit.test.js +30 -25
  11. package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -1
  12. package/dist/cli/__tests__/evaluator-schemas.unit.test.d.ts +9 -0
  13. package/dist/cli/__tests__/evaluator-schemas.unit.test.d.ts.map +1 -0
  14. package/dist/cli/__tests__/evaluator-schemas.unit.test.js +334 -0
  15. package/dist/cli/__tests__/evaluator-schemas.unit.test.js.map +1 -0
  16. package/dist/cli/commands/aggregate.d.ts.map +1 -1
  17. package/dist/cli/commands/aggregate.js +20 -12
  18. package/dist/cli/commands/aggregate.js.map +1 -1
  19. package/dist/cli/commands/evaluate.d.ts.map +1 -1
  20. package/dist/cli/commands/evaluate.js +130 -24
  21. package/dist/cli/commands/evaluate.js.map +1 -1
  22. package/dist/cli/commands/plan.d.ts.map +1 -1
  23. package/dist/cli/commands/plan.js +40 -6
  24. package/dist/cli/commands/plan.js.map +1 -1
  25. package/dist/cli/commands/run.d.ts +9 -0
  26. package/dist/cli/commands/run.d.ts.map +1 -1
  27. package/dist/cli/commands/run.js +71 -12
  28. package/dist/cli/commands/run.js.map +1 -1
  29. package/dist/cli/commands/validate.d.ts.map +1 -1
  30. package/dist/cli/commands/validate.js +55 -0
  31. package/dist/cli/commands/validate.js.map +1 -1
  32. package/dist/cli/config-loader.d.ts +6 -3
  33. package/dist/cli/config-loader.d.ts.map +1 -1
  34. package/dist/cli/config-loader.js +31 -106
  35. package/dist/cli/config-loader.js.map +1 -1
  36. package/dist/cli/evaluator-schemas.d.ts +395 -0
  37. package/dist/cli/evaluator-schemas.d.ts.map +1 -0
  38. package/dist/cli/evaluator-schemas.js +285 -0
  39. package/dist/cli/evaluator-schemas.js.map +1 -0
  40. package/dist/cli/index.d.ts.map +1 -1
  41. package/dist/cli/index.js +11 -1
  42. package/dist/cli/index.js.map +1 -1
  43. package/dist/cli/module-loader.d.ts.map +1 -1
  44. package/dist/cli/module-loader.js +38 -20
  45. package/dist/cli/module-loader.js.map +1 -1
  46. package/dist/cli/type-utils.d.ts +31 -0
  47. package/dist/cli/type-utils.d.ts.map +1 -0
  48. package/dist/cli/type-utils.js +38 -0
  49. package/dist/cli/type-utils.js.map +1 -0
  50. package/dist/cli/types.d.ts +284 -94
  51. package/dist/cli/types.d.ts.map +1 -1
  52. package/dist/cli/types.js +177 -1
  53. package/dist/cli/types.js.map +1 -1
  54. package/dist/collector/schema.js.map +1 -1
  55. package/dist/evaluators/claims-evaluator.d.ts.map +1 -1
  56. package/dist/evaluators/claims-evaluator.js +1 -1
  57. package/dist/evaluators/claims-evaluator.js.map +1 -1
  58. package/dist/evaluators/exploratory-evaluator.js.map +1 -1
  59. package/dist/executor/__tests__/worker-entry.integration.test.d.ts.map +1 -1
  60. package/dist/executor/__tests__/worker-entry.integration.test.js +19 -4
  61. package/dist/executor/__tests__/worker-entry.integration.test.js.map +1 -1
  62. package/dist/executor/binary-sut.d.ts.map +1 -1
  63. package/dist/executor/binary-sut.js +2 -1
  64. package/dist/executor/binary-sut.js.map +1 -1
  65. package/dist/executor/checkpoint-storage.d.ts.map +1 -1
  66. package/dist/executor/checkpoint-storage.js +13 -4
  67. package/dist/executor/checkpoint-storage.js.map +1 -1
  68. package/dist/executor/executor.d.ts +22 -0
  69. package/dist/executor/executor.d.ts.map +1 -1
  70. package/dist/executor/executor.js +133 -6
  71. package/dist/executor/executor.js.map +1 -1
  72. package/dist/executor/parallel-executor.d.ts.map +1 -1
  73. package/dist/executor/parallel-executor.js +9 -2
  74. package/dist/executor/parallel-executor.js.map +1 -1
  75. package/dist/executor/worker-entry.js +3 -1
  76. package/dist/executor/worker-entry.js.map +1 -1
  77. package/dist/executor/worker-executor.d.ts +9 -0
  78. package/dist/executor/worker-executor.d.ts.map +1 -1
  79. package/dist/executor/worker-executor.js +88 -9
  80. package/dist/executor/worker-executor.js.map +1 -1
  81. package/dist/executor/worker-threads-executor.d.ts.map +1 -1
  82. package/dist/executor/worker-threads-executor.js +52 -18
  83. package/dist/executor/worker-threads-executor.js.map +1 -1
  84. package/dist/index.cjs +1 -1
  85. package/dist/index.d.ts +1 -0
  86. package/dist/index.d.ts.map +1 -1
  87. package/dist/index.js +2 -0
  88. package/dist/index.js.map +1 -1
  89. package/dist/renderers/latex-renderer.d.ts.map +1 -1
  90. package/dist/renderers/latex-renderer.js +20 -12
  91. package/dist/renderers/latex-renderer.js.map +1 -1
  92. package/dist/schemas/__tests__/json-schema-validator.unit.test.d.ts +8 -0
  93. package/dist/schemas/__tests__/json-schema-validator.unit.test.d.ts.map +1 -0
  94. package/dist/schemas/__tests__/json-schema-validator.unit.test.js +170 -0
  95. package/dist/schemas/__tests__/json-schema-validator.unit.test.js.map +1 -0
  96. package/dist/schemas/index.d.ts +7 -0
  97. package/dist/schemas/index.d.ts.map +1 -0
  98. package/dist/schemas/index.js +7 -0
  99. package/dist/schemas/index.js.map +1 -0
  100. package/dist/schemas/json-schema-validator.d.ts +59 -0
  101. package/dist/schemas/json-schema-validator.d.ts.map +1 -0
  102. package/dist/schemas/json-schema-validator.js +67 -0
  103. package/dist/schemas/json-schema-validator.js.map +1 -0
  104. package/dist/types/case.d.ts +4 -0
  105. package/dist/types/case.d.ts.map +1 -1
  106. package/dist/types/result.d.ts +2 -0
  107. package/dist/types/result.d.ts.map +1 -1
  108. package/dist/types/sut.d.ts +4 -0
  109. package/dist/types/sut.d.ts.map +1 -1
  110. package/package.json +16 -6
  111. package/ppef.schema.json +1178 -0
@@ -0,0 +1,395 @@
1
+ /**
2
+ * Evaluator Configuration Zod Schemas
3
+ *
4
+ * Runtime-validated schemas for all evaluator config types.
5
+ * Existing interfaces in types/evaluator.ts remain for class implementations;
6
+ * these schemas are used by the CLI for config validation and JSON schema generation.
7
+ */
8
+ import { z } from "zod";
9
+ /**
10
+ * Built-in evaluation types.
11
+ */
12
+ export declare const EvaluationTypeSchema: z.ZodEnum<{
13
+ metrics: "metrics";
14
+ claims: "claims";
15
+ robustness: "robustness";
16
+ exploratory: "exploratory";
17
+ custom: "custom";
18
+ }>;
19
+ export type EvaluationTypeSchema = z.infer<typeof EvaluationTypeSchema>;
20
+ /**
21
+ * Direction of comparison for claims.
22
+ */
23
+ export declare const ComparisonDirectionSchema: z.ZodEnum<{
24
+ greater: "greater";
25
+ less: "less";
26
+ equal: "equal";
27
+ }>;
28
+ export type ComparisonDirectionSchema = z.infer<typeof ComparisonDirectionSchema>;
29
+ /**
30
+ * Scope of claim validity.
31
+ */
32
+ export declare const ValidityScopeSchema: z.ZodEnum<{
33
+ global: "global";
34
+ caseClass: "caseClass";
35
+ parameterRange: "parameterRange";
36
+ localStructure: "localStructure";
37
+ }>;
38
+ export type ValidityScopeSchema = z.infer<typeof ValidityScopeSchema>;
39
+ /**
40
+ * Metric direction for ranking interpretation.
41
+ */
42
+ export declare const MetricDirectionSchema: z.ZodEnum<{
43
+ "higher-better": "higher-better";
44
+ "lower-better": "lower-better";
45
+ }>;
46
+ export type MetricDirectionSchema = z.infer<typeof MetricDirectionSchema>;
47
+ /**
48
+ * Criterion type for metrics evaluation.
49
+ */
50
+ export declare const MetricsCriterionTypeSchema: z.ZodEnum<{
51
+ baseline: "baseline";
52
+ threshold: "threshold";
53
+ "target-range": "target-range";
54
+ }>;
55
+ export type MetricsCriterionTypeSchema = z.infer<typeof MetricsCriterionTypeSchema>;
56
+ /**
57
+ * Threshold comparison operator.
58
+ */
59
+ export declare const ThresholdOperatorSchema: z.ZodEnum<{
60
+ gt: "gt";
61
+ gte: "gte";
62
+ lt: "lt";
63
+ lte: "lte";
64
+ eq: "eq";
65
+ }>;
66
+ export type ThresholdOperatorSchema = z.infer<typeof ThresholdOperatorSchema>;
67
+ /**
68
+ * Base configuration shared by all evaluators.
69
+ */
70
+ export declare const EvaluatorConfigBase: z.ZodObject<{
71
+ name: z.ZodOptional<z.ZodString>;
72
+ description: z.ZodOptional<z.ZodString>;
73
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
74
+ }, z.core.$strip>;
75
+ /**
76
+ * A single evaluation claim (hypothesis).
77
+ */
78
+ export declare const EvaluationClaimSchema: z.ZodObject<{
79
+ claimId: z.ZodString;
80
+ description: z.ZodString;
81
+ sut: z.ZodString;
82
+ baseline: z.ZodString;
83
+ metric: z.ZodString;
84
+ direction: z.ZodEnum<{
85
+ greater: "greater";
86
+ less: "less";
87
+ equal: "equal";
88
+ }>;
89
+ threshold: z.ZodOptional<z.ZodNumber>;
90
+ scope: z.ZodEnum<{
91
+ global: "global";
92
+ caseClass: "caseClass";
93
+ parameterRange: "parameterRange";
94
+ localStructure: "localStructure";
95
+ }>;
96
+ scopeConstraints: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean, z.ZodNull]>, z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean, z.ZodNull]>>]>>>;
97
+ significanceLevel: z.ZodOptional<z.ZodNumber>;
98
+ minEffectSize: z.ZodOptional<z.ZodNumber>;
99
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
100
+ citation: z.ZodOptional<z.ZodString>;
101
+ }, z.core.$strip>;
102
+ export type EvaluationClaimSchema = z.infer<typeof EvaluationClaimSchema>;
103
+ /**
104
+ * Claims evaluator configuration.
105
+ */
106
+ export declare const ClaimsEvaluatorConfigSchema: z.ZodObject<{
107
+ name: z.ZodOptional<z.ZodString>;
108
+ description: z.ZodOptional<z.ZodString>;
109
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
110
+ claims: z.ZodArray<z.ZodObject<{
111
+ claimId: z.ZodString;
112
+ description: z.ZodString;
113
+ sut: z.ZodString;
114
+ baseline: z.ZodString;
115
+ metric: z.ZodString;
116
+ direction: z.ZodEnum<{
117
+ greater: "greater";
118
+ less: "less";
119
+ equal: "equal";
120
+ }>;
121
+ threshold: z.ZodOptional<z.ZodNumber>;
122
+ scope: z.ZodEnum<{
123
+ global: "global";
124
+ caseClass: "caseClass";
125
+ parameterRange: "parameterRange";
126
+ localStructure: "localStructure";
127
+ }>;
128
+ scopeConstraints: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean, z.ZodNull]>, z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean, z.ZodNull]>>]>>>;
129
+ significanceLevel: z.ZodOptional<z.ZodNumber>;
130
+ minEffectSize: z.ZodOptional<z.ZodNumber>;
131
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
132
+ citation: z.ZodOptional<z.ZodString>;
133
+ }, z.core.$strip>>;
134
+ significanceLevel: z.ZodOptional<z.ZodNumber>;
135
+ minEffectSize: z.ZodOptional<z.ZodNumber>;
136
+ }, z.core.$strip>;
137
+ export type ClaimsEvaluatorConfigSchema = z.infer<typeof ClaimsEvaluatorConfigSchema>;
138
+ /**
139
+ * A single metrics evaluation criterion.
140
+ *
141
+ * Uses superRefine to enforce that the correct sub-fields are present
142
+ * for each criterion type (threshold requires threshold field, etc.).
143
+ */
144
+ export declare const MetricsCriterionSchema: z.ZodObject<{
145
+ criterionId: z.ZodString;
146
+ description: z.ZodString;
147
+ type: z.ZodEnum<{
148
+ baseline: "baseline";
149
+ threshold: "threshold";
150
+ "target-range": "target-range";
151
+ }>;
152
+ metric: z.ZodString;
153
+ sut: z.ZodString;
154
+ threshold: z.ZodOptional<z.ZodObject<{
155
+ operator: z.ZodEnum<{
156
+ gt: "gt";
157
+ gte: "gte";
158
+ lt: "lt";
159
+ lte: "lte";
160
+ eq: "eq";
161
+ }>;
162
+ value: z.ZodNumber;
163
+ }, z.core.$strip>>;
164
+ baseline: z.ZodOptional<z.ZodObject<{
165
+ sut: z.ZodString;
166
+ operator: z.ZodEnum<{
167
+ gt: "gt";
168
+ gte: "gte";
169
+ lt: "lt";
170
+ lte: "lte";
171
+ eq: "eq";
172
+ }>;
173
+ }, z.core.$strip>>;
174
+ targetRange: z.ZodOptional<z.ZodObject<{
175
+ min: z.ZodOptional<z.ZodNumber>;
176
+ max: z.ZodOptional<z.ZodNumber>;
177
+ minInclusive: z.ZodOptional<z.ZodBoolean>;
178
+ maxInclusive: z.ZodOptional<z.ZodBoolean>;
179
+ }, z.core.$strip>>;
180
+ scopeConstraints: z.ZodOptional<z.ZodObject<{
181
+ caseClass: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
182
+ }, z.core.$strip>>;
183
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
184
+ }, z.core.$strip>;
185
+ export type MetricsCriterionSchema = z.infer<typeof MetricsCriterionSchema>;
186
+ /**
187
+ * Metrics evaluator configuration.
188
+ */
189
+ export declare const MetricsEvaluatorConfigSchema: z.ZodObject<{
190
+ name: z.ZodOptional<z.ZodString>;
191
+ description: z.ZodOptional<z.ZodString>;
192
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
193
+ criteria: z.ZodArray<z.ZodObject<{
194
+ criterionId: z.ZodString;
195
+ description: z.ZodString;
196
+ type: z.ZodEnum<{
197
+ baseline: "baseline";
198
+ threshold: "threshold";
199
+ "target-range": "target-range";
200
+ }>;
201
+ metric: z.ZodString;
202
+ sut: z.ZodString;
203
+ threshold: z.ZodOptional<z.ZodObject<{
204
+ operator: z.ZodEnum<{
205
+ gt: "gt";
206
+ gte: "gte";
207
+ lt: "lt";
208
+ lte: "lte";
209
+ eq: "eq";
210
+ }>;
211
+ value: z.ZodNumber;
212
+ }, z.core.$strip>>;
213
+ baseline: z.ZodOptional<z.ZodObject<{
214
+ sut: z.ZodString;
215
+ operator: z.ZodEnum<{
216
+ gt: "gt";
217
+ gte: "gte";
218
+ lt: "lt";
219
+ lte: "lte";
220
+ eq: "eq";
221
+ }>;
222
+ }, z.core.$strip>>;
223
+ targetRange: z.ZodOptional<z.ZodObject<{
224
+ min: z.ZodOptional<z.ZodNumber>;
225
+ max: z.ZodOptional<z.ZodNumber>;
226
+ minInclusive: z.ZodOptional<z.ZodBoolean>;
227
+ maxInclusive: z.ZodOptional<z.ZodBoolean>;
228
+ }, z.core.$strip>>;
229
+ scopeConstraints: z.ZodOptional<z.ZodObject<{
230
+ caseClass: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
231
+ }, z.core.$strip>>;
232
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
233
+ }, z.core.$strip>>;
234
+ }, z.core.$strip>;
235
+ export type MetricsEvaluatorConfigSchema = z.infer<typeof MetricsEvaluatorConfigSchema>;
236
+ /**
237
+ * Robustness evaluator configuration.
238
+ */
239
+ export declare const RobustnessEvaluatorConfigSchema: z.ZodObject<{
240
+ name: z.ZodOptional<z.ZodString>;
241
+ description: z.ZodOptional<z.ZodString>;
242
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
243
+ metrics: z.ZodArray<z.ZodString>;
244
+ perturbations: z.ZodArray<z.ZodString>;
245
+ intensityLevels: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
246
+ runsPerLevel: z.ZodOptional<z.ZodNumber>;
247
+ }, z.core.$strip>;
248
+ export type RobustnessEvaluatorConfigSchema = z.infer<typeof RobustnessEvaluatorConfigSchema>;
249
+ /**
250
+ * Exploratory evaluator configuration.
251
+ */
252
+ export declare const ExploratoryEvaluatorConfigSchema: z.ZodObject<{
253
+ name: z.ZodOptional<z.ZodString>;
254
+ description: z.ZodOptional<z.ZodString>;
255
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
256
+ metrics: z.ZodOptional<z.ZodArray<z.ZodString>>;
257
+ suts: z.ZodOptional<z.ZodArray<z.ZodString>>;
258
+ metricDirections: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodEnum<{
259
+ "higher-better": "higher-better";
260
+ "lower-better": "lower-better";
261
+ }>>>;
262
+ significanceLevel: z.ZodOptional<z.ZodNumber>;
263
+ minEffectSize: z.ZodOptional<z.ZodNumber>;
264
+ computeCorrelations: z.ZodOptional<z.ZodBoolean>;
265
+ analyzeCaseClassEffects: z.ZodOptional<z.ZodBoolean>;
266
+ }, z.core.$strip>;
267
+ export type ExploratoryEvaluatorConfigSchema = z.infer<typeof ExploratoryEvaluatorConfigSchema>;
268
+ /**
269
+ * Custom evaluator configuration.
270
+ * Uses catchall to allow arbitrary additional properties.
271
+ */
272
+ export declare const CustomEvaluatorConfigSchema: z.ZodObject<{
273
+ name: z.ZodOptional<z.ZodString>;
274
+ description: z.ZodOptional<z.ZodString>;
275
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
276
+ customType: z.ZodString;
277
+ }, z.core.$catchall<z.ZodUnknown>>;
278
+ export type CustomEvaluatorConfigSchema = z.infer<typeof CustomEvaluatorConfigSchema>;
279
+ /**
280
+ * A single evaluator entry combining type discriminant with config.
281
+ */
282
+ export declare const EvaluatorEntrySchema: z.ZodObject<{
283
+ type: z.ZodEnum<{
284
+ metrics: "metrics";
285
+ claims: "claims";
286
+ robustness: "robustness";
287
+ exploratory: "exploratory";
288
+ custom: "custom";
289
+ }>;
290
+ config: z.ZodUnion<readonly [z.ZodObject<{
291
+ name: z.ZodOptional<z.ZodString>;
292
+ description: z.ZodOptional<z.ZodString>;
293
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
294
+ claims: z.ZodArray<z.ZodObject<{
295
+ claimId: z.ZodString;
296
+ description: z.ZodString;
297
+ sut: z.ZodString;
298
+ baseline: z.ZodString;
299
+ metric: z.ZodString;
300
+ direction: z.ZodEnum<{
301
+ greater: "greater";
302
+ less: "less";
303
+ equal: "equal";
304
+ }>;
305
+ threshold: z.ZodOptional<z.ZodNumber>;
306
+ scope: z.ZodEnum<{
307
+ global: "global";
308
+ caseClass: "caseClass";
309
+ parameterRange: "parameterRange";
310
+ localStructure: "localStructure";
311
+ }>;
312
+ scopeConstraints: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean, z.ZodNull]>, z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean, z.ZodNull]>>]>>>;
313
+ significanceLevel: z.ZodOptional<z.ZodNumber>;
314
+ minEffectSize: z.ZodOptional<z.ZodNumber>;
315
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
316
+ citation: z.ZodOptional<z.ZodString>;
317
+ }, z.core.$strip>>;
318
+ significanceLevel: z.ZodOptional<z.ZodNumber>;
319
+ minEffectSize: z.ZodOptional<z.ZodNumber>;
320
+ }, z.core.$strip>, z.ZodObject<{
321
+ name: z.ZodOptional<z.ZodString>;
322
+ description: z.ZodOptional<z.ZodString>;
323
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
324
+ criteria: z.ZodArray<z.ZodObject<{
325
+ criterionId: z.ZodString;
326
+ description: z.ZodString;
327
+ type: z.ZodEnum<{
328
+ baseline: "baseline";
329
+ threshold: "threshold";
330
+ "target-range": "target-range";
331
+ }>;
332
+ metric: z.ZodString;
333
+ sut: z.ZodString;
334
+ threshold: z.ZodOptional<z.ZodObject<{
335
+ operator: z.ZodEnum<{
336
+ gt: "gt";
337
+ gte: "gte";
338
+ lt: "lt";
339
+ lte: "lte";
340
+ eq: "eq";
341
+ }>;
342
+ value: z.ZodNumber;
343
+ }, z.core.$strip>>;
344
+ baseline: z.ZodOptional<z.ZodObject<{
345
+ sut: z.ZodString;
346
+ operator: z.ZodEnum<{
347
+ gt: "gt";
348
+ gte: "gte";
349
+ lt: "lt";
350
+ lte: "lte";
351
+ eq: "eq";
352
+ }>;
353
+ }, z.core.$strip>>;
354
+ targetRange: z.ZodOptional<z.ZodObject<{
355
+ min: z.ZodOptional<z.ZodNumber>;
356
+ max: z.ZodOptional<z.ZodNumber>;
357
+ minInclusive: z.ZodOptional<z.ZodBoolean>;
358
+ maxInclusive: z.ZodOptional<z.ZodBoolean>;
359
+ }, z.core.$strip>>;
360
+ scopeConstraints: z.ZodOptional<z.ZodObject<{
361
+ caseClass: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
362
+ }, z.core.$strip>>;
363
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
364
+ }, z.core.$strip>>;
365
+ }, z.core.$strip>, z.ZodObject<{
366
+ name: z.ZodOptional<z.ZodString>;
367
+ description: z.ZodOptional<z.ZodString>;
368
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
369
+ metrics: z.ZodArray<z.ZodString>;
370
+ perturbations: z.ZodArray<z.ZodString>;
371
+ intensityLevels: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
372
+ runsPerLevel: z.ZodOptional<z.ZodNumber>;
373
+ }, z.core.$strip>, z.ZodObject<{
374
+ name: z.ZodOptional<z.ZodString>;
375
+ description: z.ZodOptional<z.ZodString>;
376
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
377
+ metrics: z.ZodOptional<z.ZodArray<z.ZodString>>;
378
+ suts: z.ZodOptional<z.ZodArray<z.ZodString>>;
379
+ metricDirections: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodEnum<{
380
+ "higher-better": "higher-better";
381
+ "lower-better": "lower-better";
382
+ }>>>;
383
+ significanceLevel: z.ZodOptional<z.ZodNumber>;
384
+ minEffectSize: z.ZodOptional<z.ZodNumber>;
385
+ computeCorrelations: z.ZodOptional<z.ZodBoolean>;
386
+ analyzeCaseClassEffects: z.ZodOptional<z.ZodBoolean>;
387
+ }, z.core.$strip>, z.ZodObject<{
388
+ name: z.ZodOptional<z.ZodString>;
389
+ description: z.ZodOptional<z.ZodString>;
390
+ options: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
391
+ customType: z.ZodString;
392
+ }, z.core.$catchall<z.ZodUnknown>>]>;
393
+ }, z.core.$strip>;
394
+ export type EvaluatorEntrySchema = z.infer<typeof EvaluatorEntrySchema>;
395
+ //# sourceMappingURL=evaluator-schemas.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator-schemas.d.ts","sourceRoot":"","sources":["../../src/cli/evaluator-schemas.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB;;GAEG;AACH,eAAO,MAAM,oBAAoB;;;;;;EAEJ,CAAC;AAC9B,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAExE;;GAEG;AACH,eAAO,MAAM,yBAAyB;;;;EAEQ,CAAC;AAC/C,MAAM,MAAM,yBAAyB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAC;AAElF;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;EAEK,CAAC;AACtC,MAAM,MAAM,mBAAmB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEtE;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;EAEQ,CAAC;AAC3C,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,CAAC;AAE1E;;GAEG;AACH,eAAO,MAAM,0BAA0B;;;;EAEA,CAAC;AACxC,MAAM,MAAM,0BAA0B,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,0BAA0B,CAAC,CAAC;AAEpF;;GAEG;AACH,eAAO,MAAM,uBAAuB;;;;;;EAEH,CAAC;AAClC,MAAM,MAAM,uBAAuB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,uBAAuB,CAAC,CAAC;AAM9E;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;iBAO9B,CAAC;AAWH;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;iBAwBmD,CAAC;AACtF,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,CAAC;AAE1E;;GAEG;AACH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAYtC,CAAC;AACH,MAAM,MAAM,2BAA2B,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAMtF;;;;;GAKG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAgEiD,CAAC;AACrF,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAE5E;;GAEG;AACH,eAAO,MAAM,4BAA4B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAKvC,CAAC;AACH,MAAM,MAAM,4BAA4B,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,4BAA4B,CAAC,CAAC;AAMxF;;GAEG;AACH,eAAO,MAAM,+BAA+B;;;;;;;;iBAa1C,CAAC;AACH,MAAM,MAAM,+BAA+B,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,+BAA+B,CAAC,CAAC;AAM9F;;GAEG;AACH,eAAO,MAAM,gCAAgC;;;;;;;;;;;;;;iBA0B3C,CAAC;AACH,MAAM,MAAM,gCAAgC,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gCAAgC,CAAC,CAAC;AAMhG;;;GAGG;AACH,eAAO,MAAM,2BAA2B;;;;;kCAOrC,CAAC;AACJ,MAAM,MAAM,2BAA2B,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAMtF;;GAEG;AACH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAc9B,CAAC;AACJ,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC"}
@@ -0,0 +1,285 @@
1
+ /**
2
+ * Evaluator Configuration Zod Schemas
3
+ *
4
+ * Runtime-validated schemas for all evaluator config types.
5
+ * Existing interfaces in types/evaluator.ts remain for class implementations;
6
+ * these schemas are used by the CLI for config validation and JSON schema generation.
7
+ */
8
+ import { z } from "zod";
9
+ // ============================================================================
10
+ // Shared Enums
11
+ // ============================================================================
12
+ /**
13
+ * Built-in evaluation types.
14
+ */
15
+ export const EvaluationTypeSchema = z
16
+ .enum(["claims", "robustness", "metrics", "exploratory", "custom"])
17
+ .describe("Evaluation type");
18
+ /**
19
+ * Direction of comparison for claims.
20
+ */
21
+ export const ComparisonDirectionSchema = z
22
+ .enum(["greater", "less", "equal"])
23
+ .describe("Expected direction of difference");
24
+ /**
25
+ * Scope of claim validity.
26
+ */
27
+ export const ValidityScopeSchema = z
28
+ .enum(["global", "caseClass", "parameterRange", "localStructure"])
29
+ .describe("Scope of claim validity");
30
+ /**
31
+ * Metric direction for ranking interpretation.
32
+ */
33
+ export const MetricDirectionSchema = z
34
+ .enum(["higher-better", "lower-better"])
35
+ .describe("Metric direction for ranking");
36
+ /**
37
+ * Criterion type for metrics evaluation.
38
+ */
39
+ export const MetricsCriterionTypeSchema = z
40
+ .enum(["threshold", "baseline", "target-range"])
41
+ .describe("Type of metrics criterion");
42
+ /**
43
+ * Threshold comparison operator.
44
+ */
45
+ export const ThresholdOperatorSchema = z
46
+ .enum(["gt", "gte", "lt", "lte", "eq"])
47
+ .describe("Comparison operator");
48
+ // ============================================================================
49
+ // Base Config
50
+ // ============================================================================
51
+ /**
52
+ * Base configuration shared by all evaluators.
53
+ */
54
+ export const EvaluatorConfigBase = z.object({
55
+ name: z.string().optional().describe("Human-readable evaluator name"),
56
+ description: z.string().optional().describe("Evaluator description"),
57
+ options: z
58
+ .record(z.string(), z.unknown())
59
+ .optional()
60
+ .describe("Additional evaluator-specific options"),
61
+ });
62
+ // ============================================================================
63
+ // Claims Evaluator
64
+ // ============================================================================
65
+ /**
66
+ * Primitive value type for scope constraints.
67
+ */
68
+ const PrimitiveSchema = z.union([z.string(), z.number(), z.boolean(), z.null()]);
69
+ /**
70
+ * A single evaluation claim (hypothesis).
71
+ */
72
+ export const EvaluationClaimSchema = z
73
+ .object({
74
+ claimId: z.string().min(1).describe("Unique claim identifier"),
75
+ description: z.string().min(1).describe("Human-readable claim description"),
76
+ sut: z.string().min(1).describe("Primary SUT being evaluated"),
77
+ baseline: z.string().min(1).describe("Baseline SUT for comparison"),
78
+ metric: z.string().min(1).describe("Metric being compared"),
79
+ direction: ComparisonDirectionSchema,
80
+ threshold: z.number().optional().describe("Optional threshold for the difference"),
81
+ scope: ValidityScopeSchema,
82
+ scopeConstraints: z
83
+ .record(z.string(), z.union([PrimitiveSchema, z.array(PrimitiveSchema)]))
84
+ .optional()
85
+ .describe("Scope constraints"),
86
+ significanceLevel: z
87
+ .number()
88
+ .min(0)
89
+ .max(1)
90
+ .optional()
91
+ .describe("Required significance level (default: 0.05)"),
92
+ minEffectSize: z.number().min(0).optional().describe("Minimum effect size (Cohen's d)"),
93
+ tags: z.array(z.string()).optional().describe("Tags for filtering"),
94
+ citation: z.string().optional().describe("Citation/reference for the claim"),
95
+ })
96
+ .meta({ title: "EvaluationClaim", description: "An evaluation claim (hypothesis)" });
97
+ /**
98
+ * Claims evaluator configuration.
99
+ */
100
+ export const ClaimsEvaluatorConfigSchema = EvaluatorConfigBase.extend({
101
+ claims: z.array(EvaluationClaimSchema).min(1).describe("Claims to evaluate"),
102
+ significanceLevel: z
103
+ .number()
104
+ .min(0)
105
+ .max(1)
106
+ .optional()
107
+ .describe("Global significance level override"),
108
+ minEffectSize: z.number().min(0).optional().describe("Global minimum effect size override"),
109
+ }).meta({
110
+ title: "ClaimsEvaluatorConfig",
111
+ description: "Configuration for the claims evaluator",
112
+ });
113
+ // ============================================================================
114
+ // Metrics Evaluator
115
+ // ============================================================================
116
+ /**
117
+ * A single metrics evaluation criterion.
118
+ *
119
+ * Uses superRefine to enforce that the correct sub-fields are present
120
+ * for each criterion type (threshold requires threshold field, etc.).
121
+ */
122
+ export const MetricsCriterionSchema = z
123
+ .object({
124
+ criterionId: z.string().min(1).describe("Unique criterion identifier"),
125
+ description: z.string().min(1).describe("Human-readable description"),
126
+ type: MetricsCriterionTypeSchema,
127
+ metric: z.string().min(1).describe("Metric to evaluate"),
128
+ sut: z.string().min(1).describe('SUT to evaluate (or "*" for all SUTs)'),
129
+ threshold: z
130
+ .object({
131
+ operator: ThresholdOperatorSchema,
132
+ value: z.number().describe("Threshold value"),
133
+ })
134
+ .optional()
135
+ .describe("Threshold operator and value (required when type is threshold)"),
136
+ baseline: z
137
+ .object({
138
+ sut: z.string().min(1).describe("Baseline SUT identifier"),
139
+ operator: ThresholdOperatorSchema,
140
+ })
141
+ .optional()
142
+ .describe("Baseline comparison (required when type is baseline)"),
143
+ targetRange: z
144
+ .object({
145
+ min: z.number().optional().describe("Minimum value"),
146
+ max: z.number().optional().describe("Maximum value"),
147
+ minInclusive: z.boolean().optional().describe("Whether min is inclusive"),
148
+ maxInclusive: z.boolean().optional().describe("Whether max is inclusive"),
149
+ })
150
+ .optional()
151
+ .describe("Target range (required when type is target-range)"),
152
+ scopeConstraints: z
153
+ .object({
154
+ caseClass: z
155
+ .union([z.string(), z.array(z.string())])
156
+ .optional()
157
+ .describe("Case class filter"),
158
+ })
159
+ .optional()
160
+ .describe("Optional scope constraints"),
161
+ tags: z.array(z.string()).optional().describe("Tags for filtering"),
162
+ })
163
+ .superRefine((data, ctx) => {
164
+ if (data.type === "threshold" && !data.threshold) {
165
+ ctx.addIssue({
166
+ code: "custom",
167
+ message: 'Criterion type "threshold" requires the "threshold" field',
168
+ path: ["threshold"],
169
+ });
170
+ }
171
+ if (data.type === "baseline" && !data.baseline) {
172
+ ctx.addIssue({
173
+ code: "custom",
174
+ message: 'Criterion type "baseline" requires the "baseline" field',
175
+ path: ["baseline"],
176
+ });
177
+ }
178
+ if (data.type === "target-range" && !data.targetRange) {
179
+ ctx.addIssue({
180
+ code: "custom",
181
+ message: 'Criterion type "target-range" requires the "targetRange" field',
182
+ path: ["targetRange"],
183
+ });
184
+ }
185
+ })
186
+ .meta({ title: "MetricsCriterion", description: "A metrics evaluation criterion" });
187
+ /**
188
+ * Metrics evaluator configuration.
189
+ */
190
+ export const MetricsEvaluatorConfigSchema = EvaluatorConfigBase.extend({
191
+ criteria: z.array(MetricsCriterionSchema).min(1).describe("Criteria to evaluate"),
192
+ }).meta({
193
+ title: "MetricsEvaluatorConfig",
194
+ description: "Configuration for the metrics evaluator",
195
+ });
196
+ // ============================================================================
197
+ // Robustness Evaluator
198
+ // ============================================================================
199
+ /**
200
+ * Robustness evaluator configuration.
201
+ */
202
+ export const RobustnessEvaluatorConfigSchema = EvaluatorConfigBase.extend({
203
+ metrics: z.array(z.string().min(1)).min(1).describe("Metrics to analyze"),
204
+ perturbations: z.array(z.string().min(1)).min(1).describe("Perturbations applied"),
205
+ intensityLevels: z.array(z.number()).optional().describe("Intensity levels tested"),
206
+ runsPerLevel: z
207
+ .number()
208
+ .int()
209
+ .min(1)
210
+ .optional()
211
+ .describe("Number of runs per perturbation level"),
212
+ }).meta({
213
+ title: "RobustnessEvaluatorConfig",
214
+ description: "Configuration for the robustness evaluator",
215
+ });
216
+ // ============================================================================
217
+ // Exploratory Evaluator
218
+ // ============================================================================
219
+ /**
220
+ * Exploratory evaluator configuration.
221
+ */
222
+ export const ExploratoryEvaluatorConfigSchema = EvaluatorConfigBase.extend({
223
+ metrics: z
224
+ .array(z.string().min(1))
225
+ .optional()
226
+ .describe("Metrics to analyze (all if not specified)"),
227
+ suts: z.array(z.string().min(1)).optional().describe("SUTs to include (all if not specified)"),
228
+ metricDirections: z
229
+ .record(z.string(), MetricDirectionSchema)
230
+ .optional()
231
+ .describe("Metric directions for ranking interpretation"),
232
+ significanceLevel: z
233
+ .number()
234
+ .min(0)
235
+ .max(1)
236
+ .optional()
237
+ .describe("Significance level for statistical tests (default: 0.05)"),
238
+ minEffectSize: z
239
+ .number()
240
+ .min(0)
241
+ .optional()
242
+ .describe("Minimum effect size to consider meaningful"),
243
+ computeCorrelations: z.boolean().optional().describe("Whether to compute metric correlations"),
244
+ analyzeCaseClassEffects: z.boolean().optional().describe("Whether to analyze case-class effects"),
245
+ }).meta({
246
+ title: "ExploratoryEvaluatorConfig",
247
+ description: "Configuration for the exploratory evaluator",
248
+ });
249
+ // ============================================================================
250
+ // Custom Evaluator
251
+ // ============================================================================
252
+ /**
253
+ * Custom evaluator configuration.
254
+ * Uses catchall to allow arbitrary additional properties.
255
+ */
256
+ export const CustomEvaluatorConfigSchema = EvaluatorConfigBase.extend({
257
+ customType: z.string().min(1).describe("Custom evaluator type name"),
258
+ })
259
+ .catchall(z.unknown())
260
+ .meta({
261
+ title: "CustomEvaluatorConfig",
262
+ description: "Configuration for a custom evaluator",
263
+ });
264
+ // ============================================================================
265
+ // Evaluator Entry (for ExperimentConfig.evaluators array)
266
+ // ============================================================================
267
+ /**
268
+ * A single evaluator entry combining type discriminant with config.
269
+ */
270
+ export const EvaluatorEntrySchema = z
271
+ .object({
272
+ type: EvaluationTypeSchema,
273
+ config: z.union([
274
+ ClaimsEvaluatorConfigSchema,
275
+ MetricsEvaluatorConfigSchema,
276
+ RobustnessEvaluatorConfigSchema,
277
+ ExploratoryEvaluatorConfigSchema,
278
+ CustomEvaluatorConfigSchema,
279
+ ]),
280
+ })
281
+ .meta({
282
+ title: "EvaluatorEntry",
283
+ description: "An evaluator configuration entry",
284
+ });
285
+ //# sourceMappingURL=evaluator-schemas.js.map