ppef 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +270 -6
- package/dist/__tests__/cli/evaluate-command.integration.test.js +60 -0
- package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -1
- package/dist/__tests__/examples.integration.test.d.ts +8 -0
- package/dist/__tests__/examples.integration.test.d.ts.map +1 -0
- package/dist/__tests__/examples.integration.test.js +236 -0
- package/dist/__tests__/examples.integration.test.js.map +1 -0
- package/dist/cli/__tests__/commands.unit.test.js +12 -5
- package/dist/cli/__tests__/commands.unit.test.js.map +1 -1
- package/dist/cli/__tests__/config-loader.unit.test.js +30 -25
- package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -1
- package/dist/cli/__tests__/evaluator-schemas.unit.test.d.ts +9 -0
- package/dist/cli/__tests__/evaluator-schemas.unit.test.d.ts.map +1 -0
- package/dist/cli/__tests__/evaluator-schemas.unit.test.js +334 -0
- package/dist/cli/__tests__/evaluator-schemas.unit.test.js.map +1 -0
- package/dist/cli/commands/aggregate.d.ts.map +1 -1
- package/dist/cli/commands/aggregate.js +20 -12
- package/dist/cli/commands/aggregate.js.map +1 -1
- package/dist/cli/commands/evaluate.d.ts.map +1 -1
- package/dist/cli/commands/evaluate.js +130 -24
- package/dist/cli/commands/evaluate.js.map +1 -1
- package/dist/cli/commands/plan.d.ts.map +1 -1
- package/dist/cli/commands/plan.js +40 -6
- package/dist/cli/commands/plan.js.map +1 -1
- package/dist/cli/commands/run.d.ts +9 -0
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +71 -12
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/commands/validate.d.ts.map +1 -1
- package/dist/cli/commands/validate.js +55 -0
- package/dist/cli/commands/validate.js.map +1 -1
- package/dist/cli/config-loader.d.ts +6 -3
- package/dist/cli/config-loader.d.ts.map +1 -1
- package/dist/cli/config-loader.js +31 -106
- package/dist/cli/config-loader.js.map +1 -1
- package/dist/cli/evaluator-schemas.d.ts +395 -0
- package/dist/cli/evaluator-schemas.d.ts.map +1 -0
- package/dist/cli/evaluator-schemas.js +285 -0
- package/dist/cli/evaluator-schemas.js.map +1 -0
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +11 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/module-loader.d.ts.map +1 -1
- package/dist/cli/module-loader.js +38 -20
- package/dist/cli/module-loader.js.map +1 -1
- package/dist/cli/type-utils.d.ts +31 -0
- package/dist/cli/type-utils.d.ts.map +1 -0
- package/dist/cli/type-utils.js +38 -0
- package/dist/cli/type-utils.js.map +1 -0
- package/dist/cli/types.d.ts +284 -94
- package/dist/cli/types.d.ts.map +1 -1
- package/dist/cli/types.js +177 -1
- package/dist/cli/types.js.map +1 -1
- package/dist/collector/schema.js.map +1 -1
- package/dist/evaluators/claims-evaluator.d.ts.map +1 -1
- package/dist/evaluators/claims-evaluator.js +1 -1
- package/dist/evaluators/claims-evaluator.js.map +1 -1
- package/dist/evaluators/exploratory-evaluator.js.map +1 -1
- package/dist/executor/__tests__/worker-entry.integration.test.d.ts.map +1 -1
- package/dist/executor/__tests__/worker-entry.integration.test.js +19 -4
- package/dist/executor/__tests__/worker-entry.integration.test.js.map +1 -1
- package/dist/executor/binary-sut.d.ts.map +1 -1
- package/dist/executor/binary-sut.js +2 -1
- package/dist/executor/binary-sut.js.map +1 -1
- package/dist/executor/checkpoint-storage.d.ts.map +1 -1
- package/dist/executor/checkpoint-storage.js +13 -4
- package/dist/executor/checkpoint-storage.js.map +1 -1
- package/dist/executor/executor.d.ts +22 -0
- package/dist/executor/executor.d.ts.map +1 -1
- package/dist/executor/executor.js +133 -6
- package/dist/executor/executor.js.map +1 -1
- package/dist/executor/parallel-executor.d.ts.map +1 -1
- package/dist/executor/parallel-executor.js +9 -2
- package/dist/executor/parallel-executor.js.map +1 -1
- package/dist/executor/worker-entry.js +3 -1
- package/dist/executor/worker-entry.js.map +1 -1
- package/dist/executor/worker-executor.d.ts +9 -0
- package/dist/executor/worker-executor.d.ts.map +1 -1
- package/dist/executor/worker-executor.js +88 -9
- package/dist/executor/worker-executor.js.map +1 -1
- package/dist/executor/worker-threads-executor.d.ts.map +1 -1
- package/dist/executor/worker-threads-executor.js +52 -18
- package/dist/executor/worker-threads-executor.js.map +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/renderers/latex-renderer.d.ts.map +1 -1
- package/dist/renderers/latex-renderer.js +20 -12
- package/dist/renderers/latex-renderer.js.map +1 -1
- package/dist/schemas/__tests__/json-schema-validator.unit.test.d.ts +8 -0
- package/dist/schemas/__tests__/json-schema-validator.unit.test.d.ts.map +1 -0
- package/dist/schemas/__tests__/json-schema-validator.unit.test.js +170 -0
- package/dist/schemas/__tests__/json-schema-validator.unit.test.js.map +1 -0
- package/dist/schemas/index.d.ts +7 -0
- package/dist/schemas/index.d.ts.map +1 -0
- package/dist/schemas/index.js +7 -0
- package/dist/schemas/index.js.map +1 -0
- package/dist/schemas/json-schema-validator.d.ts +59 -0
- package/dist/schemas/json-schema-validator.d.ts.map +1 -0
- package/dist/schemas/json-schema-validator.js +67 -0
- package/dist/schemas/json-schema-validator.js.map +1 -0
- package/dist/types/case.d.ts +4 -0
- package/dist/types/case.d.ts.map +1 -1
- package/dist/types/result.d.ts +2 -0
- package/dist/types/result.d.ts.map +1 -1
- package/dist/types/sut.d.ts +4 -0
- package/dist/types/sut.d.ts.map +1 -1
- package/package.json +15 -5
- package/ppef.schema.json +1178 -0
package/ppef.schema.json
ADDED
|
@@ -0,0 +1,1178 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://ppef.dev/schemas/v1.2.1/ppef.schema.json",
|
|
4
|
+
"title": "ExperimentConfig",
|
|
5
|
+
"description": "PPEF experiment configuration",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"cases": {
|
|
9
|
+
"description": "Test cases to run",
|
|
10
|
+
"type": "array",
|
|
11
|
+
"items": {
|
|
12
|
+
"title": "CaseConfig",
|
|
13
|
+
"description": "Test case configuration",
|
|
14
|
+
"type": "object",
|
|
15
|
+
"properties": {
|
|
16
|
+
"exportName": {
|
|
17
|
+
"description": "Name of the export to use as case factory",
|
|
18
|
+
"type": "string",
|
|
19
|
+
"minLength": 1
|
|
20
|
+
},
|
|
21
|
+
"id": {
|
|
22
|
+
"description": "Unique case identifier",
|
|
23
|
+
"type": "string",
|
|
24
|
+
"minLength": 1
|
|
25
|
+
},
|
|
26
|
+
"inputSchema": {
|
|
27
|
+
"description": "Per-case input schema (overrides schemas.input)",
|
|
28
|
+
"type": "object",
|
|
29
|
+
"additionalProperties": {},
|
|
30
|
+
"propertyNames": {
|
|
31
|
+
"type": "string"
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"module": {
|
|
35
|
+
"description": "Path to module file (relative to config file)",
|
|
36
|
+
"type": "string",
|
|
37
|
+
"minLength": 1
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"required": [
|
|
41
|
+
"exportName",
|
|
42
|
+
"id",
|
|
43
|
+
"module"
|
|
44
|
+
],
|
|
45
|
+
"additionalProperties": false
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
"evaluators": {
|
|
49
|
+
"description": "Evaluator configurations to run after experiment completion",
|
|
50
|
+
"type": "array",
|
|
51
|
+
"items": {
|
|
52
|
+
"title": "EvaluatorEntry",
|
|
53
|
+
"description": "An evaluator configuration entry",
|
|
54
|
+
"type": "object",
|
|
55
|
+
"properties": {
|
|
56
|
+
"config": {
|
|
57
|
+
"$comment": "Discriminated by sibling 'type' field: claims -> ClaimsEvaluatorConfig, metrics -> MetricsEvaluatorConfig, robustness -> RobustnessEvaluatorConfig, exploratory -> ExploratoryEvaluatorConfig, custom -> CustomEvaluatorConfig",
|
|
58
|
+
"anyOf": [
|
|
59
|
+
{
|
|
60
|
+
"$ref": "#/$defs/ClaimsEvaluatorConfig"
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"$ref": "#/$defs/MetricsEvaluatorConfig"
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"$ref": "#/$defs/RobustnessEvaluatorConfig"
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"$ref": "#/$defs/ExploratoryEvaluatorConfig"
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"$ref": "#/$defs/CustomEvaluatorConfig"
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
},
|
|
76
|
+
"type": {
|
|
77
|
+
"description": "Evaluation type",
|
|
78
|
+
"type": "string",
|
|
79
|
+
"oneOf": [
|
|
80
|
+
{
|
|
81
|
+
"description": "Test explicit hypotheses with statistical significance",
|
|
82
|
+
"const": "claims"
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"description": "Measure sensitivity under perturbations",
|
|
86
|
+
"const": "robustness"
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"description": "Evaluate against thresholds, baselines, or target ranges",
|
|
90
|
+
"const": "metrics"
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"description": "Hypothesis-free analysis: rankings, correlations",
|
|
94
|
+
"const": "exploratory"
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"description": "User-defined evaluator loaded from a module",
|
|
98
|
+
"const": "custom"
|
|
99
|
+
}
|
|
100
|
+
]
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
"required": [
|
|
104
|
+
"config",
|
|
105
|
+
"type"
|
|
106
|
+
],
|
|
107
|
+
"additionalProperties": false
|
|
108
|
+
}
|
|
109
|
+
},
|
|
110
|
+
"executor": {
|
|
111
|
+
"title": "ExecutorConfig",
|
|
112
|
+
"description": "Executor configuration",
|
|
113
|
+
"type": "object",
|
|
114
|
+
"properties": {
|
|
115
|
+
"collectProvenance": {
|
|
116
|
+
"description": "Whether to collect provenance information",
|
|
117
|
+
"type": "boolean",
|
|
118
|
+
"default": true
|
|
119
|
+
},
|
|
120
|
+
"concurrency": {
|
|
121
|
+
"description": "Number of concurrent runs",
|
|
122
|
+
"type": "integer",
|
|
123
|
+
"minimum": 1,
|
|
124
|
+
"maximum": 256
|
|
125
|
+
},
|
|
126
|
+
"continueOnError": {
|
|
127
|
+
"description": "Continue execution if a single run fails",
|
|
128
|
+
"type": "boolean",
|
|
129
|
+
"default": true
|
|
130
|
+
},
|
|
131
|
+
"repetitions": {
|
|
132
|
+
"description": "Number of repetitions per case",
|
|
133
|
+
"type": "integer",
|
|
134
|
+
"minimum": 1,
|
|
135
|
+
"maximum": 10000,
|
|
136
|
+
"default": 1
|
|
137
|
+
},
|
|
138
|
+
"seedBase": {
|
|
139
|
+
"description": "Random seed base",
|
|
140
|
+
"type": "integer",
|
|
141
|
+
"minimum": 0,
|
|
142
|
+
"maximum": 2147483647,
|
|
143
|
+
"default": 42
|
|
144
|
+
},
|
|
145
|
+
"timeoutMs": {
|
|
146
|
+
"description": "Timeout per run in milliseconds (0 = no timeout)",
|
|
147
|
+
"type": "integer",
|
|
148
|
+
"minimum": 0,
|
|
149
|
+
"maximum": 86400000,
|
|
150
|
+
"default": 0
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
"additionalProperties": false
|
|
154
|
+
},
|
|
155
|
+
"experiment": {
|
|
156
|
+
"title": "ExperimentMeta",
|
|
157
|
+
"description": "Experiment metadata",
|
|
158
|
+
"type": "object",
|
|
159
|
+
"properties": {
|
|
160
|
+
"description": {
|
|
161
|
+
"description": "Experiment description",
|
|
162
|
+
"type": "string"
|
|
163
|
+
},
|
|
164
|
+
"name": {
|
|
165
|
+
"description": "Human-readable experiment name",
|
|
166
|
+
"type": "string",
|
|
167
|
+
"minLength": 1
|
|
168
|
+
},
|
|
169
|
+
"version": {
|
|
170
|
+
"description": "Experiment version string",
|
|
171
|
+
"type": "string"
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
"required": [
|
|
175
|
+
"name"
|
|
176
|
+
],
|
|
177
|
+
"additionalProperties": false
|
|
178
|
+
},
|
|
179
|
+
"metricsExtractor": {
|
|
180
|
+
"title": "MetricsExtractorConfig",
|
|
181
|
+
"description": "Metrics extractor configuration",
|
|
182
|
+
"type": "object",
|
|
183
|
+
"properties": {
|
|
184
|
+
"exportName": {
|
|
185
|
+
"description": "Name of the export to use as metrics extractor",
|
|
186
|
+
"type": "string",
|
|
187
|
+
"minLength": 1
|
|
188
|
+
},
|
|
189
|
+
"module": {
|
|
190
|
+
"description": "Path to module file (relative to config file)",
|
|
191
|
+
"type": "string",
|
|
192
|
+
"minLength": 1
|
|
193
|
+
}
|
|
194
|
+
},
|
|
195
|
+
"required": [
|
|
196
|
+
"exportName",
|
|
197
|
+
"module"
|
|
198
|
+
],
|
|
199
|
+
"additionalProperties": false
|
|
200
|
+
},
|
|
201
|
+
"output": {
|
|
202
|
+
"title": "OutputConfig",
|
|
203
|
+
"description": "Output configuration",
|
|
204
|
+
"type": "object",
|
|
205
|
+
"properties": {
|
|
206
|
+
"aggregate": {
|
|
207
|
+
"description": "Whether to aggregate results",
|
|
208
|
+
"type": "boolean",
|
|
209
|
+
"default": true
|
|
210
|
+
},
|
|
211
|
+
"format": {
|
|
212
|
+
"description": "Output format",
|
|
213
|
+
"type": "string",
|
|
214
|
+
"enum": [
|
|
215
|
+
"json",
|
|
216
|
+
"json-pretty"
|
|
217
|
+
],
|
|
218
|
+
"default": "json-pretty"
|
|
219
|
+
},
|
|
220
|
+
"path": {
|
|
221
|
+
"description": "Output directory path",
|
|
222
|
+
"type": "string"
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
"additionalProperties": false
|
|
226
|
+
},
|
|
227
|
+
"schemas": {
|
|
228
|
+
"description": "Optional JSON Schemas for input/output validation",
|
|
229
|
+
"type": "object",
|
|
230
|
+
"properties": {
|
|
231
|
+
"input": {
|
|
232
|
+
"description": "Schema for case inputs",
|
|
233
|
+
"type": "object",
|
|
234
|
+
"additionalProperties": {},
|
|
235
|
+
"propertyNames": {
|
|
236
|
+
"type": "string"
|
|
237
|
+
}
|
|
238
|
+
},
|
|
239
|
+
"output": {
|
|
240
|
+
"description": "Schema for SUT outputs",
|
|
241
|
+
"type": "object",
|
|
242
|
+
"additionalProperties": {},
|
|
243
|
+
"propertyNames": {
|
|
244
|
+
"type": "string"
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
"additionalProperties": false
|
|
249
|
+
},
|
|
250
|
+
"suts": {
|
|
251
|
+
"description": "Systems Under Test to evaluate",
|
|
252
|
+
"type": "array",
|
|
253
|
+
"items": {
|
|
254
|
+
"title": "SutConfig",
|
|
255
|
+
"description": "System Under Test configuration",
|
|
256
|
+
"type": "object",
|
|
257
|
+
"properties": {
|
|
258
|
+
"binaryArgs": {
|
|
259
|
+
"description": "Arguments to pass to binary command",
|
|
260
|
+
"type": "array",
|
|
261
|
+
"items": {
|
|
262
|
+
"type": "string"
|
|
263
|
+
}
|
|
264
|
+
},
|
|
265
|
+
"binaryCommand": {
|
|
266
|
+
"description": "Command to execute (when type=\"binary\")",
|
|
267
|
+
"type": "string"
|
|
268
|
+
},
|
|
269
|
+
"binaryInputFormat": {
|
|
270
|
+
"description": "How to serialize inputs to stdin",
|
|
271
|
+
"type": "string",
|
|
272
|
+
"enum": [
|
|
273
|
+
"json",
|
|
274
|
+
"raw",
|
|
275
|
+
"lines"
|
|
276
|
+
]
|
|
277
|
+
},
|
|
278
|
+
"binaryOutputFormat": {
|
|
279
|
+
"description": "How to deserialize stdout",
|
|
280
|
+
"type": "string",
|
|
281
|
+
"enum": [
|
|
282
|
+
"json",
|
|
283
|
+
"raw",
|
|
284
|
+
"lines"
|
|
285
|
+
]
|
|
286
|
+
},
|
|
287
|
+
"binaryTimeout": {
|
|
288
|
+
"description": "Binary SUT timeout per run in milliseconds",
|
|
289
|
+
"type": "integer",
|
|
290
|
+
"minimum": 0,
|
|
291
|
+
"maximum": 86400000
|
|
292
|
+
},
|
|
293
|
+
"config": {
|
|
294
|
+
"description": "Optional configuration to pass to factory",
|
|
295
|
+
"type": "object",
|
|
296
|
+
"additionalProperties": {},
|
|
297
|
+
"propertyNames": {
|
|
298
|
+
"type": "string"
|
|
299
|
+
}
|
|
300
|
+
},
|
|
301
|
+
"exportName": {
|
|
302
|
+
"description": "Name of the export to use as factory",
|
|
303
|
+
"type": "string",
|
|
304
|
+
"minLength": 1
|
|
305
|
+
},
|
|
306
|
+
"id": {
|
|
307
|
+
"description": "Unique SUT identifier",
|
|
308
|
+
"type": "string",
|
|
309
|
+
"minLength": 1
|
|
310
|
+
},
|
|
311
|
+
"module": {
|
|
312
|
+
"description": "Path to module file (relative to config file)",
|
|
313
|
+
"type": "string",
|
|
314
|
+
"minLength": 1
|
|
315
|
+
},
|
|
316
|
+
"outputSchema": {
|
|
317
|
+
"description": "Per-SUT output schema (overrides schemas.output)",
|
|
318
|
+
"type": "object",
|
|
319
|
+
"additionalProperties": {},
|
|
320
|
+
"propertyNames": {
|
|
321
|
+
"type": "string"
|
|
322
|
+
}
|
|
323
|
+
},
|
|
324
|
+
"registration": {
|
|
325
|
+
"title": "SutRegistration",
|
|
326
|
+
"description": "SUT registration metadata",
|
|
327
|
+
"type": "object",
|
|
328
|
+
"properties": {
|
|
329
|
+
"description": {
|
|
330
|
+
"description": "Optional SUT description",
|
|
331
|
+
"type": "string"
|
|
332
|
+
},
|
|
333
|
+
"name": {
|
|
334
|
+
"description": "Human-readable SUT name",
|
|
335
|
+
"type": "string",
|
|
336
|
+
"minLength": 1
|
|
337
|
+
},
|
|
338
|
+
"role": {
|
|
339
|
+
"description": "Role of the SUT in evaluation",
|
|
340
|
+
"type": "string",
|
|
341
|
+
"oneOf": [
|
|
342
|
+
{
|
|
343
|
+
"description": "The system being evaluated; the novel algorithm or implementation",
|
|
344
|
+
"const": "primary"
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
"description": "A reference implementation for comparison",
|
|
348
|
+
"const": "baseline"
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
"description": "Ground truth provider; defines correct answers",
|
|
352
|
+
"const": "oracle"
|
|
353
|
+
}
|
|
354
|
+
]
|
|
355
|
+
},
|
|
356
|
+
"tags": {
|
|
357
|
+
"description": "Searchable tags",
|
|
358
|
+
"type": "array",
|
|
359
|
+
"items": {
|
|
360
|
+
"type": "string"
|
|
361
|
+
}
|
|
362
|
+
},
|
|
363
|
+
"version": {
|
|
364
|
+
"description": "SUT version string",
|
|
365
|
+
"type": "string",
|
|
366
|
+
"minLength": 1
|
|
367
|
+
}
|
|
368
|
+
},
|
|
369
|
+
"required": [
|
|
370
|
+
"name",
|
|
371
|
+
"role",
|
|
372
|
+
"version"
|
|
373
|
+
],
|
|
374
|
+
"additionalProperties": false
|
|
375
|
+
},
|
|
376
|
+
"type": {
|
|
377
|
+
"description": "SUT type: \"module\" (default) or \"binary\"",
|
|
378
|
+
"type": "string",
|
|
379
|
+
"enum": [
|
|
380
|
+
"module",
|
|
381
|
+
"binary"
|
|
382
|
+
]
|
|
383
|
+
}
|
|
384
|
+
},
|
|
385
|
+
"required": [
|
|
386
|
+
"exportName",
|
|
387
|
+
"id",
|
|
388
|
+
"module",
|
|
389
|
+
"registration"
|
|
390
|
+
],
|
|
391
|
+
"additionalProperties": false
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
},
|
|
395
|
+
"required": [
|
|
396
|
+
"cases",
|
|
397
|
+
"executor",
|
|
398
|
+
"experiment",
|
|
399
|
+
"metricsExtractor",
|
|
400
|
+
"output",
|
|
401
|
+
"suts"
|
|
402
|
+
],
|
|
403
|
+
"additionalProperties": false,
|
|
404
|
+
"examples": [
|
|
405
|
+
{
|
|
406
|
+
"cases": [
|
|
407
|
+
{
|
|
408
|
+
"exportName": "createCase",
|
|
409
|
+
"id": "hello-world",
|
|
410
|
+
"module": "./case.mjs"
|
|
411
|
+
}
|
|
412
|
+
],
|
|
413
|
+
"executor": {
|
|
414
|
+
"repetitions": 3
|
|
415
|
+
},
|
|
416
|
+
"experiment": {
|
|
417
|
+
"description": "Compare string length implementations",
|
|
418
|
+
"name": "string-length"
|
|
419
|
+
},
|
|
420
|
+
"metricsExtractor": {
|
|
421
|
+
"exportName": "extract",
|
|
422
|
+
"module": "./metrics.mjs"
|
|
423
|
+
},
|
|
424
|
+
"output": {
|
|
425
|
+
"path": "./results"
|
|
426
|
+
},
|
|
427
|
+
"suts": [
|
|
428
|
+
{
|
|
429
|
+
"exportName": "createSut",
|
|
430
|
+
"id": "builtin-length",
|
|
431
|
+
"module": "./sut.mjs",
|
|
432
|
+
"registration": {
|
|
433
|
+
"name": "Built-in .length",
|
|
434
|
+
"role": "primary",
|
|
435
|
+
"version": "1.0.0"
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
]
|
|
439
|
+
}
|
|
440
|
+
],
|
|
441
|
+
"$defs": {
|
|
442
|
+
"ClaimsEvaluatorConfig": {
|
|
443
|
+
"title": "ClaimsEvaluatorConfig",
|
|
444
|
+
"description": "Configuration for the claims evaluator",
|
|
445
|
+
"type": "object",
|
|
446
|
+
"properties": {
|
|
447
|
+
"claims": {
|
|
448
|
+
"description": "Claims to evaluate",
|
|
449
|
+
"type": "array",
|
|
450
|
+
"items": {
|
|
451
|
+
"title": "EvaluationClaim",
|
|
452
|
+
"description": "An evaluation claim (hypothesis)",
|
|
453
|
+
"type": "object",
|
|
454
|
+
"properties": {
|
|
455
|
+
"baseline": {
|
|
456
|
+
"description": "Baseline SUT for comparison",
|
|
457
|
+
"type": "string",
|
|
458
|
+
"minLength": 1
|
|
459
|
+
},
|
|
460
|
+
"citation": {
|
|
461
|
+
"description": "Citation/reference for the claim",
|
|
462
|
+
"type": "string"
|
|
463
|
+
},
|
|
464
|
+
"claimId": {
|
|
465
|
+
"description": "Unique claim identifier",
|
|
466
|
+
"type": "string",
|
|
467
|
+
"minLength": 1
|
|
468
|
+
},
|
|
469
|
+
"description": {
|
|
470
|
+
"description": "Human-readable claim description",
|
|
471
|
+
"type": "string",
|
|
472
|
+
"minLength": 1
|
|
473
|
+
},
|
|
474
|
+
"direction": {
|
|
475
|
+
"description": "Expected direction of difference",
|
|
476
|
+
"type": "string",
|
|
477
|
+
"oneOf": [
|
|
478
|
+
{
|
|
479
|
+
"description": "Primary SUT metric should be greater than baseline",
|
|
480
|
+
"const": "greater"
|
|
481
|
+
},
|
|
482
|
+
{
|
|
483
|
+
"description": "Primary SUT metric should be less than baseline",
|
|
484
|
+
"const": "less"
|
|
485
|
+
},
|
|
486
|
+
{
|
|
487
|
+
"description": "Primary SUT metric should be equal to baseline",
|
|
488
|
+
"const": "equal"
|
|
489
|
+
}
|
|
490
|
+
]
|
|
491
|
+
},
|
|
492
|
+
"metric": {
|
|
493
|
+
"description": "Metric being compared",
|
|
494
|
+
"type": "string",
|
|
495
|
+
"minLength": 1
|
|
496
|
+
},
|
|
497
|
+
"minEffectSize": {
|
|
498
|
+
"description": "Minimum effect size (Cohen's d)",
|
|
499
|
+
"type": "number",
|
|
500
|
+
"minimum": 0
|
|
501
|
+
},
|
|
502
|
+
"scope": {
|
|
503
|
+
"description": "Scope of claim validity",
|
|
504
|
+
"type": "string",
|
|
505
|
+
"oneOf": [
|
|
506
|
+
{
|
|
507
|
+
"description": "Claim applies across all cases and conditions",
|
|
508
|
+
"const": "global"
|
|
509
|
+
},
|
|
510
|
+
{
|
|
511
|
+
"description": "Claim applies within a specific case class",
|
|
512
|
+
"const": "caseClass"
|
|
513
|
+
},
|
|
514
|
+
{
|
|
515
|
+
"description": "Claim applies within a parameter range",
|
|
516
|
+
"const": "parameterRange"
|
|
517
|
+
},
|
|
518
|
+
{
|
|
519
|
+
"description": "Claim applies to local structural properties",
|
|
520
|
+
"const": "localStructure"
|
|
521
|
+
}
|
|
522
|
+
]
|
|
523
|
+
},
|
|
524
|
+
"scopeConstraints": {
|
|
525
|
+
"description": "Scope constraints",
|
|
526
|
+
"type": "object",
|
|
527
|
+
"additionalProperties": {
|
|
528
|
+
"anyOf": [
|
|
529
|
+
{
|
|
530
|
+
"anyOf": [
|
|
531
|
+
{
|
|
532
|
+
"type": "string"
|
|
533
|
+
},
|
|
534
|
+
{
|
|
535
|
+
"type": "number"
|
|
536
|
+
},
|
|
537
|
+
{
|
|
538
|
+
"type": "boolean"
|
|
539
|
+
},
|
|
540
|
+
{
|
|
541
|
+
"type": "null"
|
|
542
|
+
}
|
|
543
|
+
]
|
|
544
|
+
},
|
|
545
|
+
{
|
|
546
|
+
"type": "array",
|
|
547
|
+
"items": {
|
|
548
|
+
"anyOf": [
|
|
549
|
+
{
|
|
550
|
+
"type": "string"
|
|
551
|
+
},
|
|
552
|
+
{
|
|
553
|
+
"type": "number"
|
|
554
|
+
},
|
|
555
|
+
{
|
|
556
|
+
"type": "boolean"
|
|
557
|
+
},
|
|
558
|
+
{
|
|
559
|
+
"type": "null"
|
|
560
|
+
}
|
|
561
|
+
]
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
]
|
|
565
|
+
},
|
|
566
|
+
"propertyNames": {
|
|
567
|
+
"type": "string"
|
|
568
|
+
}
|
|
569
|
+
},
|
|
570
|
+
"significanceLevel": {
|
|
571
|
+
"description": "Required significance level (default: 0.05)",
|
|
572
|
+
"type": "number",
|
|
573
|
+
"minimum": 0,
|
|
574
|
+
"maximum": 1
|
|
575
|
+
},
|
|
576
|
+
"sut": {
|
|
577
|
+
"description": "Primary SUT being evaluated",
|
|
578
|
+
"type": "string",
|
|
579
|
+
"minLength": 1
|
|
580
|
+
},
|
|
581
|
+
"tags": {
|
|
582
|
+
"description": "Tags for filtering",
|
|
583
|
+
"type": "array",
|
|
584
|
+
"items": {
|
|
585
|
+
"type": "string"
|
|
586
|
+
}
|
|
587
|
+
},
|
|
588
|
+
"threshold": {
|
|
589
|
+
"description": "Optional threshold for the difference",
|
|
590
|
+
"type": "number"
|
|
591
|
+
}
|
|
592
|
+
},
|
|
593
|
+
"required": [
|
|
594
|
+
"baseline",
|
|
595
|
+
"claimId",
|
|
596
|
+
"description",
|
|
597
|
+
"direction",
|
|
598
|
+
"metric",
|
|
599
|
+
"scope",
|
|
600
|
+
"sut"
|
|
601
|
+
],
|
|
602
|
+
"additionalProperties": false
|
|
603
|
+
},
|
|
604
|
+
"minItems": 1
|
|
605
|
+
},
|
|
606
|
+
"description": {
|
|
607
|
+
"description": "Evaluator description",
|
|
608
|
+
"type": "string"
|
|
609
|
+
},
|
|
610
|
+
"minEffectSize": {
|
|
611
|
+
"description": "Global minimum effect size override",
|
|
612
|
+
"type": "number",
|
|
613
|
+
"minimum": 0
|
|
614
|
+
},
|
|
615
|
+
"name": {
|
|
616
|
+
"description": "Human-readable evaluator name",
|
|
617
|
+
"type": "string"
|
|
618
|
+
},
|
|
619
|
+
"options": {
|
|
620
|
+
"description": "Additional evaluator-specific options",
|
|
621
|
+
"type": "object",
|
|
622
|
+
"additionalProperties": {},
|
|
623
|
+
"propertyNames": {
|
|
624
|
+
"type": "string"
|
|
625
|
+
}
|
|
626
|
+
},
|
|
627
|
+
"significanceLevel": {
|
|
628
|
+
"description": "Global significance level override",
|
|
629
|
+
"type": "number",
|
|
630
|
+
"minimum": 0,
|
|
631
|
+
"maximum": 1
|
|
632
|
+
}
|
|
633
|
+
},
|
|
634
|
+
"required": [
|
|
635
|
+
"claims"
|
|
636
|
+
],
|
|
637
|
+
"additionalProperties": false,
|
|
638
|
+
"examples": [
|
|
639
|
+
{
|
|
640
|
+
"claims": [
|
|
641
|
+
{
|
|
642
|
+
"description": "Built-in .length reports greater length than spread operator on emoji strings",
|
|
643
|
+
"baseline": "spread-length",
|
|
644
|
+
"claimId": "C001",
|
|
645
|
+
"direction": "greater",
|
|
646
|
+
"metric": "length",
|
|
647
|
+
"scope": "global",
|
|
648
|
+
"sut": "builtin-length"
|
|
649
|
+
}
|
|
650
|
+
],
|
|
651
|
+
"significanceLevel": 0.05
|
|
652
|
+
}
|
|
653
|
+
]
|
|
654
|
+
},
|
|
655
|
+
"CustomEvaluatorConfig": {
|
|
656
|
+
"title": "CustomEvaluatorConfig",
|
|
657
|
+
"description": "Configuration for a custom evaluator",
|
|
658
|
+
"type": "object",
|
|
659
|
+
"properties": {
|
|
660
|
+
"customType": {
|
|
661
|
+
"description": "Custom evaluator type name",
|
|
662
|
+
"type": "string",
|
|
663
|
+
"minLength": 1
|
|
664
|
+
},
|
|
665
|
+
"description": {
|
|
666
|
+
"description": "Evaluator description",
|
|
667
|
+
"type": "string"
|
|
668
|
+
},
|
|
669
|
+
"name": {
|
|
670
|
+
"description": "Human-readable evaluator name",
|
|
671
|
+
"type": "string"
|
|
672
|
+
},
|
|
673
|
+
"options": {
|
|
674
|
+
"description": "Additional evaluator-specific options",
|
|
675
|
+
"type": "object",
|
|
676
|
+
"additionalProperties": {},
|
|
677
|
+
"propertyNames": {
|
|
678
|
+
"type": "string"
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
},
|
|
682
|
+
"required": [
|
|
683
|
+
"customType"
|
|
684
|
+
],
|
|
685
|
+
"additionalProperties": {}
|
|
686
|
+
},
|
|
687
|
+
"ExploratoryEvaluatorConfig": {
|
|
688
|
+
"title": "ExploratoryEvaluatorConfig",
|
|
689
|
+
"description": "Configuration for the exploratory evaluator",
|
|
690
|
+
"type": "object",
|
|
691
|
+
"properties": {
|
|
692
|
+
"analyzeCaseClassEffects": {
|
|
693
|
+
"description": "Whether to analyze case-class effects",
|
|
694
|
+
"type": "boolean"
|
|
695
|
+
},
|
|
696
|
+
"computeCorrelations": {
|
|
697
|
+
"description": "Whether to compute metric correlations",
|
|
698
|
+
"type": "boolean"
|
|
699
|
+
},
|
|
700
|
+
"description": {
|
|
701
|
+
"description": "Evaluator description",
|
|
702
|
+
"type": "string"
|
|
703
|
+
},
|
|
704
|
+
"metricDirections": {
|
|
705
|
+
"description": "Metric directions for ranking interpretation",
|
|
706
|
+
"type": "object",
|
|
707
|
+
"additionalProperties": {
|
|
708
|
+
"description": "Metric direction for ranking",
|
|
709
|
+
"type": "string",
|
|
710
|
+
"oneOf": [
|
|
711
|
+
{
|
|
712
|
+
"description": "Higher values indicate better performance",
|
|
713
|
+
"const": "higher-better"
|
|
714
|
+
},
|
|
715
|
+
{
|
|
716
|
+
"description": "Lower values indicate better performance",
|
|
717
|
+
"const": "lower-better"
|
|
718
|
+
}
|
|
719
|
+
]
|
|
720
|
+
},
|
|
721
|
+
"propertyNames": {
|
|
722
|
+
"type": "string"
|
|
723
|
+
}
|
|
724
|
+
},
|
|
725
|
+
"metrics": {
|
|
726
|
+
"description": "Metrics to analyze (all if not specified)",
|
|
727
|
+
"type": "array",
|
|
728
|
+
"items": {
|
|
729
|
+
"type": "string",
|
|
730
|
+
"minLength": 1
|
|
731
|
+
}
|
|
732
|
+
},
|
|
733
|
+
"minEffectSize": {
|
|
734
|
+
"description": "Minimum effect size to consider meaningful",
|
|
735
|
+
"type": "number",
|
|
736
|
+
"minimum": 0
|
|
737
|
+
},
|
|
738
|
+
"name": {
|
|
739
|
+
"description": "Human-readable evaluator name",
|
|
740
|
+
"type": "string"
|
|
741
|
+
},
|
|
742
|
+
"options": {
|
|
743
|
+
"description": "Additional evaluator-specific options",
|
|
744
|
+
"type": "object",
|
|
745
|
+
"additionalProperties": {},
|
|
746
|
+
"propertyNames": {
|
|
747
|
+
"type": "string"
|
|
748
|
+
}
|
|
749
|
+
},
|
|
750
|
+
"significanceLevel": {
|
|
751
|
+
"description": "Significance level for statistical tests (default: 0.05)",
|
|
752
|
+
"type": "number",
|
|
753
|
+
"minimum": 0,
|
|
754
|
+
"maximum": 1
|
|
755
|
+
},
|
|
756
|
+
"suts": {
|
|
757
|
+
"description": "SUTs to include (all if not specified)",
|
|
758
|
+
"type": "array",
|
|
759
|
+
"items": {
|
|
760
|
+
"type": "string",
|
|
761
|
+
"minLength": 1
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
},
|
|
765
|
+
"additionalProperties": false,
|
|
766
|
+
"examples": [
|
|
767
|
+
{
|
|
768
|
+
"analyzeCaseClassEffects": true,
|
|
769
|
+
"computeCorrelations": false,
|
|
770
|
+
"metricDirections": {
|
|
771
|
+
"length": "higher-better"
|
|
772
|
+
},
|
|
773
|
+
"metrics": [
|
|
774
|
+
"length"
|
|
775
|
+
]
|
|
776
|
+
}
|
|
777
|
+
]
|
|
778
|
+
},
|
|
779
|
+
"MetricsEvaluatorConfig": {
|
|
780
|
+
"title": "MetricsEvaluatorConfig",
|
|
781
|
+
"description": "Configuration for the metrics evaluator",
|
|
782
|
+
"type": "object",
|
|
783
|
+
"properties": {
|
|
784
|
+
"criteria": {
|
|
785
|
+
"description": "Criteria to evaluate",
|
|
786
|
+
"type": "array",
|
|
787
|
+
"items": {
|
|
788
|
+
"title": "MetricsCriterion",
|
|
789
|
+
"description": "A metrics evaluation criterion",
|
|
790
|
+
"type": "object",
|
|
791
|
+
"allOf": [
|
|
792
|
+
{
|
|
793
|
+
"if": {
|
|
794
|
+
"properties": {
|
|
795
|
+
"type": {
|
|
796
|
+
"const": "threshold"
|
|
797
|
+
}
|
|
798
|
+
},
|
|
799
|
+
"required": [
|
|
800
|
+
"type"
|
|
801
|
+
]
|
|
802
|
+
},
|
|
803
|
+
"then": {
|
|
804
|
+
"required": [
|
|
805
|
+
"threshold"
|
|
806
|
+
]
|
|
807
|
+
}
|
|
808
|
+
},
|
|
809
|
+
{
|
|
810
|
+
"if": {
|
|
811
|
+
"properties": {
|
|
812
|
+
"type": {
|
|
813
|
+
"const": "baseline"
|
|
814
|
+
}
|
|
815
|
+
},
|
|
816
|
+
"required": [
|
|
817
|
+
"type"
|
|
818
|
+
]
|
|
819
|
+
},
|
|
820
|
+
"then": {
|
|
821
|
+
"required": [
|
|
822
|
+
"baseline"
|
|
823
|
+
]
|
|
824
|
+
}
|
|
825
|
+
},
|
|
826
|
+
{
|
|
827
|
+
"if": {
|
|
828
|
+
"properties": {
|
|
829
|
+
"type": {
|
|
830
|
+
"const": "target-range"
|
|
831
|
+
}
|
|
832
|
+
},
|
|
833
|
+
"required": [
|
|
834
|
+
"type"
|
|
835
|
+
]
|
|
836
|
+
},
|
|
837
|
+
"then": {
|
|
838
|
+
"required": [
|
|
839
|
+
"targetRange"
|
|
840
|
+
]
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
],
|
|
844
|
+
"properties": {
|
|
845
|
+
"baseline": {
|
|
846
|
+
"description": "Baseline comparison (required when type is baseline)",
|
|
847
|
+
"type": "object",
|
|
848
|
+
"properties": {
|
|
849
|
+
"operator": {
|
|
850
|
+
"description": "Comparison operator",
|
|
851
|
+
"type": "string",
|
|
852
|
+
"oneOf": [
|
|
853
|
+
{
|
|
854
|
+
"description": "Greater than",
|
|
855
|
+
"const": "gt"
|
|
856
|
+
},
|
|
857
|
+
{
|
|
858
|
+
"description": "Greater than or equal to",
|
|
859
|
+
"const": "gte"
|
|
860
|
+
},
|
|
861
|
+
{
|
|
862
|
+
"description": "Less than",
|
|
863
|
+
"const": "lt"
|
|
864
|
+
},
|
|
865
|
+
{
|
|
866
|
+
"description": "Less than or equal to",
|
|
867
|
+
"const": "lte"
|
|
868
|
+
},
|
|
869
|
+
{
|
|
870
|
+
"description": "Equal to",
|
|
871
|
+
"const": "eq"
|
|
872
|
+
}
|
|
873
|
+
]
|
|
874
|
+
},
|
|
875
|
+
"sut": {
|
|
876
|
+
"description": "Baseline SUT identifier",
|
|
877
|
+
"type": "string",
|
|
878
|
+
"minLength": 1
|
|
879
|
+
}
|
|
880
|
+
},
|
|
881
|
+
"required": [
|
|
882
|
+
"operator",
|
|
883
|
+
"sut"
|
|
884
|
+
],
|
|
885
|
+
"additionalProperties": false
|
|
886
|
+
},
|
|
887
|
+
"criterionId": {
|
|
888
|
+
"description": "Unique criterion identifier",
|
|
889
|
+
"type": "string",
|
|
890
|
+
"minLength": 1
|
|
891
|
+
},
|
|
892
|
+
"description": {
|
|
893
|
+
"description": "Human-readable description",
|
|
894
|
+
"type": "string",
|
|
895
|
+
"minLength": 1
|
|
896
|
+
},
|
|
897
|
+
"metric": {
|
|
898
|
+
"description": "Metric to evaluate",
|
|
899
|
+
"type": "string",
|
|
900
|
+
"minLength": 1
|
|
901
|
+
},
|
|
902
|
+
"scopeConstraints": {
|
|
903
|
+
"description": "Optional scope constraints",
|
|
904
|
+
"type": "object",
|
|
905
|
+
"properties": {
|
|
906
|
+
"caseClass": {
|
|
907
|
+
"description": "Case class filter",
|
|
908
|
+
"anyOf": [
|
|
909
|
+
{
|
|
910
|
+
"type": "string"
|
|
911
|
+
},
|
|
912
|
+
{
|
|
913
|
+
"type": "array",
|
|
914
|
+
"items": {
|
|
915
|
+
"type": "string"
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
]
|
|
919
|
+
}
|
|
920
|
+
},
|
|
921
|
+
"additionalProperties": false
|
|
922
|
+
},
|
|
923
|
+
"sut": {
|
|
924
|
+
"description": "SUT to evaluate (or \"*\" for all SUTs)",
|
|
925
|
+
"type": "string",
|
|
926
|
+
"minLength": 1
|
|
927
|
+
},
|
|
928
|
+
"tags": {
|
|
929
|
+
"description": "Tags for filtering",
|
|
930
|
+
"type": "array",
|
|
931
|
+
"items": {
|
|
932
|
+
"type": "string"
|
|
933
|
+
}
|
|
934
|
+
},
|
|
935
|
+
"targetRange": {
|
|
936
|
+
"description": "Target range (required when type is target-range)",
|
|
937
|
+
"type": "object",
|
|
938
|
+
"properties": {
|
|
939
|
+
"max": {
|
|
940
|
+
"description": "Maximum value",
|
|
941
|
+
"type": "number"
|
|
942
|
+
},
|
|
943
|
+
"maxInclusive": {
|
|
944
|
+
"description": "Whether max is inclusive",
|
|
945
|
+
"type": "boolean"
|
|
946
|
+
},
|
|
947
|
+
"min": {
|
|
948
|
+
"description": "Minimum value",
|
|
949
|
+
"type": "number"
|
|
950
|
+
},
|
|
951
|
+
"minInclusive": {
|
|
952
|
+
"description": "Whether min is inclusive",
|
|
953
|
+
"type": "boolean"
|
|
954
|
+
}
|
|
955
|
+
},
|
|
956
|
+
"additionalProperties": false
|
|
957
|
+
},
|
|
958
|
+
"threshold": {
|
|
959
|
+
"description": "Threshold operator and value (required when type is threshold)",
|
|
960
|
+
"type": "object",
|
|
961
|
+
"properties": {
|
|
962
|
+
"operator": {
|
|
963
|
+
"description": "Comparison operator",
|
|
964
|
+
"type": "string",
|
|
965
|
+
"oneOf": [
|
|
966
|
+
{
|
|
967
|
+
"description": "Greater than",
|
|
968
|
+
"const": "gt"
|
|
969
|
+
},
|
|
970
|
+
{
|
|
971
|
+
"description": "Greater than or equal to",
|
|
972
|
+
"const": "gte"
|
|
973
|
+
},
|
|
974
|
+
{
|
|
975
|
+
"description": "Less than",
|
|
976
|
+
"const": "lt"
|
|
977
|
+
},
|
|
978
|
+
{
|
|
979
|
+
"description": "Less than or equal to",
|
|
980
|
+
"const": "lte"
|
|
981
|
+
},
|
|
982
|
+
{
|
|
983
|
+
"description": "Equal to",
|
|
984
|
+
"const": "eq"
|
|
985
|
+
}
|
|
986
|
+
]
|
|
987
|
+
},
|
|
988
|
+
"value": {
|
|
989
|
+
"description": "Threshold value",
|
|
990
|
+
"type": "number"
|
|
991
|
+
}
|
|
992
|
+
},
|
|
993
|
+
"required": [
|
|
994
|
+
"operator",
|
|
995
|
+
"value"
|
|
996
|
+
],
|
|
997
|
+
"additionalProperties": false
|
|
998
|
+
},
|
|
999
|
+
"type": {
|
|
1000
|
+
"description": "Type of metrics criterion",
|
|
1001
|
+
"type": "string",
|
|
1002
|
+
"oneOf": [
|
|
1003
|
+
{
|
|
1004
|
+
"description": "Compare a metric against a fixed threshold value",
|
|
1005
|
+
"const": "threshold"
|
|
1006
|
+
},
|
|
1007
|
+
{
|
|
1008
|
+
"description": "Compare a metric against a baseline SUT",
|
|
1009
|
+
"const": "baseline"
|
|
1010
|
+
},
|
|
1011
|
+
{
|
|
1012
|
+
"description": "Check that a metric falls within a target range",
|
|
1013
|
+
"const": "target-range"
|
|
1014
|
+
}
|
|
1015
|
+
]
|
|
1016
|
+
}
|
|
1017
|
+
},
|
|
1018
|
+
"required": [
|
|
1019
|
+
"criterionId",
|
|
1020
|
+
"description",
|
|
1021
|
+
"metric",
|
|
1022
|
+
"sut",
|
|
1023
|
+
"type"
|
|
1024
|
+
],
|
|
1025
|
+
"additionalProperties": false
|
|
1026
|
+
},
|
|
1027
|
+
"minItems": 1
|
|
1028
|
+
},
|
|
1029
|
+
"description": {
|
|
1030
|
+
"description": "Evaluator description",
|
|
1031
|
+
"type": "string"
|
|
1032
|
+
},
|
|
1033
|
+
"name": {
|
|
1034
|
+
"description": "Human-readable evaluator name",
|
|
1035
|
+
"type": "string"
|
|
1036
|
+
},
|
|
1037
|
+
"options": {
|
|
1038
|
+
"description": "Additional evaluator-specific options",
|
|
1039
|
+
"type": "object",
|
|
1040
|
+
"additionalProperties": {},
|
|
1041
|
+
"propertyNames": {
|
|
1042
|
+
"type": "string"
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
},
|
|
1046
|
+
"required": [
|
|
1047
|
+
"criteria"
|
|
1048
|
+
],
|
|
1049
|
+
"additionalProperties": false,
|
|
1050
|
+
"examples": [
|
|
1051
|
+
{
|
|
1052
|
+
"description": "Evaluate length metric against threshold, baseline, and target-range criteria",
|
|
1053
|
+
"criteria": [
|
|
1054
|
+
{
|
|
1055
|
+
"description": "Measured length should be greater than zero",
|
|
1056
|
+
"type": "threshold",
|
|
1057
|
+
"criterionId": "length-threshold",
|
|
1058
|
+
"metric": "length",
|
|
1059
|
+
"sut": "*",
|
|
1060
|
+
"threshold": {
|
|
1061
|
+
"operator": "gt",
|
|
1062
|
+
"value": 0
|
|
1063
|
+
}
|
|
1064
|
+
},
|
|
1065
|
+
{
|
|
1066
|
+
"description": "Built-in .length should be at least as large as spread operator",
|
|
1067
|
+
"type": "baseline",
|
|
1068
|
+
"baseline": {
|
|
1069
|
+
"operator": "gte",
|
|
1070
|
+
"sut": "spread-length"
|
|
1071
|
+
},
|
|
1072
|
+
"criterionId": "length-baseline",
|
|
1073
|
+
"metric": "length",
|
|
1074
|
+
"sut": "builtin-length"
|
|
1075
|
+
},
|
|
1076
|
+
{
|
|
1077
|
+
"description": "Length should be in reasonable range [1, 100]",
|
|
1078
|
+
"type": "target-range",
|
|
1079
|
+
"criterionId": "length-target-range",
|
|
1080
|
+
"metric": "length",
|
|
1081
|
+
"sut": "*",
|
|
1082
|
+
"targetRange": {
|
|
1083
|
+
"max": 100,
|
|
1084
|
+
"maxInclusive": true,
|
|
1085
|
+
"min": 1,
|
|
1086
|
+
"minInclusive": true
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
],
|
|
1090
|
+
"name": "Metrics-Only Evaluation"
|
|
1091
|
+
}
|
|
1092
|
+
]
|
|
1093
|
+
},
|
|
1094
|
+
"RobustnessEvaluatorConfig": {
|
|
1095
|
+
"title": "RobustnessEvaluatorConfig",
|
|
1096
|
+
"description": "Configuration for the robustness evaluator",
|
|
1097
|
+
"type": "object",
|
|
1098
|
+
"properties": {
|
|
1099
|
+
"description": {
|
|
1100
|
+
"description": "Evaluator description",
|
|
1101
|
+
"type": "string"
|
|
1102
|
+
},
|
|
1103
|
+
"intensityLevels": {
|
|
1104
|
+
"description": "Intensity levels tested",
|
|
1105
|
+
"type": "array",
|
|
1106
|
+
"items": {
|
|
1107
|
+
"type": "number"
|
|
1108
|
+
}
|
|
1109
|
+
},
|
|
1110
|
+
"metrics": {
|
|
1111
|
+
"description": "Metrics to analyze",
|
|
1112
|
+
"type": "array",
|
|
1113
|
+
"items": {
|
|
1114
|
+
"type": "string",
|
|
1115
|
+
"minLength": 1
|
|
1116
|
+
},
|
|
1117
|
+
"minItems": 1
|
|
1118
|
+
},
|
|
1119
|
+
"name": {
|
|
1120
|
+
"description": "Human-readable evaluator name",
|
|
1121
|
+
"type": "string"
|
|
1122
|
+
},
|
|
1123
|
+
"options": {
|
|
1124
|
+
"description": "Additional evaluator-specific options",
|
|
1125
|
+
"type": "object",
|
|
1126
|
+
"additionalProperties": {},
|
|
1127
|
+
"propertyNames": {
|
|
1128
|
+
"type": "string"
|
|
1129
|
+
}
|
|
1130
|
+
},
|
|
1131
|
+
"perturbations": {
|
|
1132
|
+
"description": "Perturbations applied",
|
|
1133
|
+
"type": "array",
|
|
1134
|
+
"items": {
|
|
1135
|
+
"type": "string",
|
|
1136
|
+
"minLength": 1
|
|
1137
|
+
},
|
|
1138
|
+
"minItems": 1
|
|
1139
|
+
},
|
|
1140
|
+
"runsPerLevel": {
|
|
1141
|
+
"description": "Number of runs per perturbation level",
|
|
1142
|
+
"type": "integer",
|
|
1143
|
+
"minimum": 1,
|
|
1144
|
+
"maximum": 10000
|
|
1145
|
+
}
|
|
1146
|
+
},
|
|
1147
|
+
"required": [
|
|
1148
|
+
"metrics",
|
|
1149
|
+
"perturbations"
|
|
1150
|
+
],
|
|
1151
|
+
"additionalProperties": false,
|
|
1152
|
+
"examples": [
|
|
1153
|
+
{
|
|
1154
|
+
"description": "Analyze algorithm robustness under perturbations",
|
|
1155
|
+
"intensityLevels": [
|
|
1156
|
+
0.1,
|
|
1157
|
+
0.2,
|
|
1158
|
+
0.3,
|
|
1159
|
+
0.4,
|
|
1160
|
+
0.5
|
|
1161
|
+
],
|
|
1162
|
+
"metrics": [
|
|
1163
|
+
"executionTime",
|
|
1164
|
+
"accuracy",
|
|
1165
|
+
"f1Score"
|
|
1166
|
+
],
|
|
1167
|
+
"name": "Robustness Analysis",
|
|
1168
|
+
"perturbations": [
|
|
1169
|
+
"edge-removal",
|
|
1170
|
+
"noise",
|
|
1171
|
+
"seed-shift"
|
|
1172
|
+
],
|
|
1173
|
+
"runsPerLevel": 10
|
|
1174
|
+
}
|
|
1175
|
+
]
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
}
|