ppef 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. package/README.md +76 -125
  2. package/bin/ppef.mjs +20 -0
  3. package/dist/__tests__/cli/evaluate-command.integration.test.d.ts +8 -0
  4. package/dist/__tests__/cli/evaluate-command.integration.test.d.ts.map +1 -0
  5. package/dist/__tests__/cli/evaluate-command.integration.test.js +308 -0
  6. package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -0
  7. package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts +8 -0
  8. package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts.map +1 -0
  9. package/dist/__tests__/evaluators/claims-evaluator.unit.test.js +405 -0
  10. package/dist/__tests__/evaluators/claims-evaluator.unit.test.js.map +1 -0
  11. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts +8 -0
  12. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts.map +1 -0
  13. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js +424 -0
  14. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js.map +1 -0
  15. package/dist/__tests__/evaluators/registry.unit.test.d.ts +7 -0
  16. package/dist/__tests__/evaluators/registry.unit.test.d.ts.map +1 -0
  17. package/dist/__tests__/evaluators/registry.unit.test.js +173 -0
  18. package/dist/__tests__/evaluators/registry.unit.test.js.map +1 -0
  19. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts +8 -0
  20. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts.map +1 -0
  21. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js +260 -0
  22. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js.map +1 -0
  23. package/dist/__tests__/framework-pipeline.integration.test.js +49 -20
  24. package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -1
  25. package/dist/__tests__/index-exports.unit.test.d.ts +8 -0
  26. package/dist/__tests__/index-exports.unit.test.d.ts.map +1 -0
  27. package/dist/__tests__/index-exports.unit.test.js +124 -0
  28. package/dist/__tests__/index-exports.unit.test.js.map +1 -0
  29. package/dist/__tests__/registry-executor.integration.test.js +12 -9
  30. package/dist/__tests__/registry-executor.integration.test.js.map +1 -1
  31. package/dist/aggregation/__tests__/aggregators.unit.test.d.ts +7 -0
  32. package/dist/aggregation/__tests__/aggregators.unit.test.d.ts.map +1 -0
  33. package/dist/aggregation/__tests__/aggregators.unit.test.js +350 -0
  34. package/dist/aggregation/__tests__/aggregators.unit.test.js.map +1 -0
  35. package/dist/aggregation/__tests__/pipeline.unit.test.d.ts +7 -0
  36. package/dist/aggregation/__tests__/pipeline.unit.test.d.ts.map +1 -0
  37. package/dist/aggregation/__tests__/pipeline.unit.test.js +213 -0
  38. package/dist/aggregation/__tests__/pipeline.unit.test.js.map +1 -0
  39. package/dist/aggregation/aggregators.d.ts +9 -0
  40. package/dist/aggregation/aggregators.d.ts.map +1 -1
  41. package/dist/aggregation/aggregators.js +1 -1
  42. package/dist/aggregation/aggregators.js.map +1 -1
  43. package/dist/aggregation/index.d.ts +1 -1
  44. package/dist/aggregation/index.d.ts.map +1 -1
  45. package/dist/aggregation/index.js +1 -1
  46. package/dist/aggregation/index.js.map +1 -1
  47. package/dist/aggregation/pipeline.d.ts.map +1 -1
  48. package/dist/aggregation/pipeline.js +40 -3
  49. package/dist/aggregation/pipeline.js.map +1 -1
  50. package/dist/claims/index.d.ts +6 -3
  51. package/dist/claims/index.d.ts.map +1 -1
  52. package/dist/claims/index.js +6 -3
  53. package/dist/claims/index.js.map +1 -1
  54. package/dist/cli/__tests__/aggregate.command.unit.test.d.ts +7 -0
  55. package/dist/cli/__tests__/aggregate.command.unit.test.d.ts.map +1 -0
  56. package/dist/cli/__tests__/aggregate.command.unit.test.js +399 -0
  57. package/dist/cli/__tests__/aggregate.command.unit.test.js.map +1 -0
  58. package/dist/cli/__tests__/binary-sut.integration.test.d.ts +8 -0
  59. package/dist/cli/__tests__/binary-sut.integration.test.d.ts.map +1 -0
  60. package/dist/cli/__tests__/binary-sut.integration.test.js +165 -0
  61. package/dist/cli/__tests__/binary-sut.integration.test.js.map +1 -0
  62. package/dist/cli/__tests__/commands.unit.test.d.ts +10 -0
  63. package/dist/cli/__tests__/commands.unit.test.d.ts.map +1 -0
  64. package/dist/cli/__tests__/commands.unit.test.js +217 -0
  65. package/dist/cli/__tests__/commands.unit.test.js.map +1 -0
  66. package/dist/cli/__tests__/config-loader.unit.test.d.ts +7 -0
  67. package/dist/cli/__tests__/config-loader.unit.test.d.ts.map +1 -0
  68. package/dist/cli/__tests__/config-loader.unit.test.js +611 -0
  69. package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -0
  70. package/dist/cli/__tests__/index.unit.test.d.ts +10 -0
  71. package/dist/cli/__tests__/index.unit.test.d.ts.map +1 -0
  72. package/dist/cli/__tests__/index.unit.test.js +65 -0
  73. package/dist/cli/__tests__/index.unit.test.js.map +1 -0
  74. package/dist/cli/__tests__/logger.unit.test.d.ts +11 -0
  75. package/dist/cli/__tests__/logger.unit.test.d.ts.map +1 -0
  76. package/dist/cli/__tests__/logger.unit.test.js +180 -0
  77. package/dist/cli/__tests__/logger.unit.test.js.map +1 -0
  78. package/dist/cli/__tests__/module-loader.unit.test.d.ts +11 -0
  79. package/dist/cli/__tests__/module-loader.unit.test.d.ts.map +1 -0
  80. package/dist/cli/__tests__/module-loader.unit.test.js +262 -0
  81. package/dist/cli/__tests__/module-loader.unit.test.js.map +1 -0
  82. package/dist/cli/__tests__/output-writer.unit.test.d.ts +10 -0
  83. package/dist/cli/__tests__/output-writer.unit.test.d.ts.map +1 -0
  84. package/dist/cli/__tests__/output-writer.unit.test.js +216 -0
  85. package/dist/cli/__tests__/output-writer.unit.test.js.map +1 -0
  86. package/dist/cli/__tests__/plan.command.unit.test.d.ts +7 -0
  87. package/dist/cli/__tests__/plan.command.unit.test.d.ts.map +1 -0
  88. package/dist/cli/__tests__/plan.command.unit.test.js +289 -0
  89. package/dist/cli/__tests__/plan.command.unit.test.js.map +1 -0
  90. package/dist/cli/__tests__/run.command.unit.test.d.ts +7 -0
  91. package/dist/cli/__tests__/run.command.unit.test.d.ts.map +1 -0
  92. package/dist/cli/__tests__/run.command.unit.test.js +422 -0
  93. package/dist/cli/__tests__/run.command.unit.test.js.map +1 -0
  94. package/dist/cli/__tests__/validate.command.unit.test.d.ts +7 -0
  95. package/dist/cli/__tests__/validate.command.unit.test.d.ts.map +1 -0
  96. package/dist/cli/__tests__/validate.command.unit.test.js +226 -0
  97. package/dist/cli/__tests__/validate.command.unit.test.js.map +1 -0
  98. package/dist/cli/command-deps.d.ts +137 -0
  99. package/dist/cli/command-deps.d.ts.map +1 -0
  100. package/dist/cli/command-deps.js +7 -0
  101. package/dist/cli/command-deps.js.map +1 -0
  102. package/dist/cli/commands/aggregate.d.ts +35 -0
  103. package/dist/cli/commands/aggregate.d.ts.map +1 -0
  104. package/dist/cli/commands/aggregate.js +124 -0
  105. package/dist/cli/commands/aggregate.js.map +1 -0
  106. package/dist/cli/commands/evaluate.d.ts +41 -0
  107. package/dist/cli/commands/evaluate.d.ts.map +1 -0
  108. package/dist/cli/commands/evaluate.js +287 -0
  109. package/dist/cli/commands/evaluate.js.map +1 -0
  110. package/dist/cli/commands/plan.d.ts +36 -0
  111. package/dist/cli/commands/plan.d.ts.map +1 -0
  112. package/dist/cli/commands/plan.js +109 -0
  113. package/dist/cli/commands/plan.js.map +1 -0
  114. package/dist/cli/commands/run.d.ts +33 -0
  115. package/dist/cli/commands/run.d.ts.map +1 -0
  116. package/dist/cli/commands/run.js +277 -0
  117. package/dist/cli/commands/run.js.map +1 -0
  118. package/dist/cli/commands/validate.d.ts +27 -0
  119. package/dist/cli/commands/validate.d.ts.map +1 -0
  120. package/dist/cli/commands/validate.js +88 -0
  121. package/dist/cli/commands/validate.js.map +1 -0
  122. package/dist/cli/config-loader.d.ts +30 -0
  123. package/dist/cli/config-loader.d.ts.map +1 -0
  124. package/dist/cli/config-loader.js +181 -0
  125. package/dist/cli/config-loader.js.map +1 -0
  126. package/dist/cli/index.d.ts +27 -0
  127. package/dist/cli/index.d.ts.map +1 -0
  128. package/dist/cli/index.js +60 -0
  129. package/dist/cli/index.js.map +1 -0
  130. package/dist/cli/logger.d.ts +75 -0
  131. package/dist/cli/logger.d.ts.map +1 -0
  132. package/dist/cli/logger.js +131 -0
  133. package/dist/cli/logger.js.map +1 -0
  134. package/dist/cli/module-loader.d.ts +68 -0
  135. package/dist/cli/module-loader.d.ts.map +1 -0
  136. package/dist/cli/module-loader.js +134 -0
  137. package/dist/cli/module-loader.js.map +1 -0
  138. package/dist/cli/output-writer.d.ts +51 -0
  139. package/dist/cli/output-writer.d.ts.map +1 -0
  140. package/dist/cli/output-writer.js +65 -0
  141. package/dist/cli/output-writer.js.map +1 -0
  142. package/dist/cli/types.d.ts +193 -0
  143. package/dist/cli/types.d.ts.map +1 -0
  144. package/dist/cli/types.js +7 -0
  145. package/dist/cli/types.js.map +1 -0
  146. package/dist/collector/__tests__/result-collector.unit.test.d.ts +7 -0
  147. package/dist/collector/__tests__/result-collector.unit.test.d.ts.map +1 -0
  148. package/dist/collector/__tests__/result-collector.unit.test.js +1021 -0
  149. package/dist/collector/__tests__/result-collector.unit.test.js.map +1 -0
  150. package/dist/collector/__tests__/schema.unit.test.d.ts +7 -0
  151. package/dist/collector/__tests__/schema.unit.test.d.ts.map +1 -0
  152. package/dist/collector/__tests__/schema.unit.test.js +360 -0
  153. package/dist/collector/__tests__/schema.unit.test.js.map +1 -0
  154. package/dist/evaluators/claims-evaluator.d.ts +87 -0
  155. package/dist/evaluators/claims-evaluator.d.ts.map +1 -0
  156. package/dist/evaluators/claims-evaluator.js +289 -0
  157. package/dist/evaluators/claims-evaluator.js.map +1 -0
  158. package/dist/evaluators/exploratory-evaluator.d.ts +136 -0
  159. package/dist/evaluators/exploratory-evaluator.d.ts.map +1 -0
  160. package/dist/evaluators/exploratory-evaluator.js +545 -0
  161. package/dist/evaluators/exploratory-evaluator.js.map +1 -0
  162. package/dist/evaluators/index.d.ts +13 -0
  163. package/dist/evaluators/index.d.ts.map +1 -0
  164. package/dist/evaluators/index.js +14 -0
  165. package/dist/evaluators/index.js.map +1 -0
  166. package/dist/evaluators/metrics-evaluator.d.ts +114 -0
  167. package/dist/evaluators/metrics-evaluator.d.ts.map +1 -0
  168. package/dist/evaluators/metrics-evaluator.js +433 -0
  169. package/dist/evaluators/metrics-evaluator.js.map +1 -0
  170. package/dist/evaluators/registry.d.ts +106 -0
  171. package/dist/evaluators/registry.d.ts.map +1 -0
  172. package/dist/evaluators/registry.js +148 -0
  173. package/dist/evaluators/registry.js.map +1 -0
  174. package/dist/evaluators/robustness-evaluator.d.ts +57 -0
  175. package/dist/evaluators/robustness-evaluator.d.ts.map +1 -0
  176. package/dist/evaluators/robustness-evaluator.js +186 -0
  177. package/dist/evaluators/robustness-evaluator.js.map +1 -0
  178. package/dist/executor/__tests__/binary-sut.unit.test.d.ts +8 -0
  179. package/dist/executor/__tests__/binary-sut.unit.test.d.ts.map +1 -0
  180. package/dist/executor/__tests__/binary-sut.unit.test.js +313 -0
  181. package/dist/executor/__tests__/binary-sut.unit.test.js.map +1 -0
  182. package/dist/executor/__tests__/checkpoint-manager.unit.test.js +83 -1
  183. package/dist/executor/__tests__/checkpoint-manager.unit.test.js.map +1 -1
  184. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts +3 -6
  185. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts.map +1 -1
  186. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js +428 -159
  187. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js.map +1 -1
  188. package/dist/executor/__tests__/checkpoint-storage.unit.test.js +148 -1
  189. package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -1
  190. package/dist/executor/__tests__/executor.unit.test.js +123 -8
  191. package/dist/executor/__tests__/executor.unit.test.js.map +1 -1
  192. package/dist/executor/__tests__/memory-monitor.unit.test.d.ts +7 -0
  193. package/dist/executor/__tests__/memory-monitor.unit.test.d.ts.map +1 -0
  194. package/dist/executor/__tests__/memory-monitor.unit.test.js +285 -0
  195. package/dist/executor/__tests__/memory-monitor.unit.test.js.map +1 -0
  196. package/dist/executor/__tests__/parallel-executor.unit.test.d.ts +2 -1
  197. package/dist/executor/__tests__/parallel-executor.unit.test.d.ts.map +1 -1
  198. package/dist/executor/__tests__/parallel-executor.unit.test.js +426 -156
  199. package/dist/executor/__tests__/parallel-executor.unit.test.js.map +1 -1
  200. package/dist/executor/__tests__/resource-calculator.unit.test.d.ts +10 -0
  201. package/dist/executor/__tests__/resource-calculator.unit.test.d.ts.map +1 -0
  202. package/dist/executor/__tests__/resource-calculator.unit.test.js +104 -0
  203. package/dist/executor/__tests__/resource-calculator.unit.test.js.map +1 -0
  204. package/dist/executor/__tests__/run-id.unit.test.d.ts +8 -0
  205. package/dist/executor/__tests__/run-id.unit.test.d.ts.map +1 -0
  206. package/dist/executor/__tests__/run-id.unit.test.js +156 -0
  207. package/dist/executor/__tests__/run-id.unit.test.js.map +1 -0
  208. package/dist/executor/__tests__/worker-entry.integration.test.d.ts +24 -0
  209. package/dist/executor/__tests__/worker-entry.integration.test.d.ts.map +1 -0
  210. package/dist/executor/__tests__/worker-entry.integration.test.js +82 -0
  211. package/dist/executor/__tests__/worker-entry.integration.test.js.map +1 -0
  212. package/dist/executor/__tests__/worker-entry.unit.test.d.ts +7 -0
  213. package/dist/executor/__tests__/worker-entry.unit.test.d.ts.map +1 -0
  214. package/dist/executor/__tests__/worker-entry.unit.test.js +364 -0
  215. package/dist/executor/__tests__/worker-entry.unit.test.js.map +1 -0
  216. package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts +8 -0
  217. package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts.map +1 -0
  218. package/dist/executor/__tests__/worker-threads-executor.unit.test.js +276 -0
  219. package/dist/executor/__tests__/worker-threads-executor.unit.test.js.map +1 -0
  220. package/dist/executor/binary-sut.d.ts +105 -0
  221. package/dist/executor/binary-sut.d.ts.map +1 -0
  222. package/dist/executor/binary-sut.js +174 -0
  223. package/dist/executor/binary-sut.js.map +1 -0
  224. package/dist/executor/checkpoint-storage.d.ts.map +1 -1
  225. package/dist/executor/checkpoint-storage.js +6 -4
  226. package/dist/executor/checkpoint-storage.js.map +1 -1
  227. package/dist/executor/executor.d.ts +28 -0
  228. package/dist/executor/executor.d.ts.map +1 -1
  229. package/dist/executor/executor.js +85 -24
  230. package/dist/executor/executor.js.map +1 -1
  231. package/dist/executor/index.d.ts +4 -0
  232. package/dist/executor/index.d.ts.map +1 -1
  233. package/dist/executor/index.js +4 -0
  234. package/dist/executor/index.js.map +1 -1
  235. package/dist/executor/parallel-executor.d.ts +186 -0
  236. package/dist/executor/parallel-executor.d.ts.map +1 -1
  237. package/dist/executor/parallel-executor.js +218 -83
  238. package/dist/executor/parallel-executor.js.map +1 -1
  239. package/dist/executor/resource-calculator.d.ts +49 -0
  240. package/dist/executor/resource-calculator.d.ts.map +1 -0
  241. package/dist/executor/resource-calculator.js +129 -0
  242. package/dist/executor/resource-calculator.js.map +1 -0
  243. package/dist/executor/run-id.d.ts.map +1 -1
  244. package/dist/executor/run-id.js +8 -1
  245. package/dist/executor/run-id.js.map +1 -1
  246. package/dist/executor/worker-entry.d.ts +2 -0
  247. package/dist/executor/worker-entry.d.ts.map +1 -1
  248. package/dist/executor/worker-entry.js +46 -55
  249. package/dist/executor/worker-entry.js.map +1 -1
  250. package/dist/executor/worker-executor.d.ts +257 -0
  251. package/dist/executor/worker-executor.d.ts.map +1 -0
  252. package/dist/executor/worker-executor.js +308 -0
  253. package/dist/executor/worker-executor.js.map +1 -0
  254. package/dist/executor/worker-threads-executor.d.ts +245 -0
  255. package/dist/executor/worker-threads-executor.d.ts.map +1 -0
  256. package/dist/executor/worker-threads-executor.js +332 -0
  257. package/dist/executor/worker-threads-executor.js.map +1 -0
  258. package/dist/index.d.ts +1 -0
  259. package/dist/index.d.ts.map +1 -1
  260. package/dist/index.js +4 -2
  261. package/dist/index.js.map +1 -1
  262. package/dist/renderers/latex-renderer.d.ts +60 -0
  263. package/dist/renderers/latex-renderer.d.ts.map +1 -1
  264. package/dist/renderers/latex-renderer.js +299 -0
  265. package/dist/renderers/latex-renderer.js.map +1 -1
  266. package/dist/renderers/types.d.ts +9 -0
  267. package/dist/renderers/types.d.ts.map +1 -1
  268. package/dist/renderers/types.js.map +1 -1
  269. package/dist/robustness/__tests__/perturbations.unit.test.d.ts +11 -0
  270. package/dist/robustness/__tests__/perturbations.unit.test.d.ts.map +1 -0
  271. package/dist/robustness/__tests__/perturbations.unit.test.js +284 -0
  272. package/dist/robustness/__tests__/perturbations.unit.test.js.map +1 -0
  273. package/dist/robustness/index.d.ts +5 -2
  274. package/dist/robustness/index.d.ts.map +1 -1
  275. package/dist/robustness/index.js +4 -2
  276. package/dist/robustness/index.js.map +1 -1
  277. package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts +7 -0
  278. package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts.map +1 -0
  279. package/dist/statistical/__tests__/mann-whitney-u.unit.test.js +185 -0
  280. package/dist/statistical/__tests__/mann-whitney-u.unit.test.js.map +1 -0
  281. package/dist/types/evaluator.d.ts +449 -0
  282. package/dist/types/evaluator.d.ts.map +1 -0
  283. package/dist/types/evaluator.js +9 -0
  284. package/dist/types/evaluator.js.map +1 -0
  285. package/dist/types/result.d.ts +2 -0
  286. package/dist/types/result.d.ts.map +1 -1
  287. package/package.json +8 -1
  288. package/dist/claims/evaluator.d.ts +0 -33
  289. package/dist/claims/evaluator.d.ts.map +0 -1
  290. package/dist/claims/evaluator.js +0 -174
  291. package/dist/claims/evaluator.js.map +0 -1
  292. package/dist/robustness/analyzer.d.ts +0 -61
  293. package/dist/robustness/analyzer.d.ts.map +0 -1
  294. package/dist/robustness/analyzer.js +0 -191
  295. package/dist/robustness/analyzer.js.map +0 -1
@@ -0,0 +1,289 @@
1
+ /**
2
+ * Claims Evaluator
3
+ *
4
+ * Evaluates explicit hypotheses (claims) against aggregated results.
5
+ * Refactored from src/claims/evaluator.ts into a class-based design
6
+ * that implements the Evaluator interface.
7
+ */
8
+ /**
9
+ * Claims evaluator - evaluates hypotheses against aggregated results.
10
+ */
11
+ export class ClaimsEvaluator {
12
+ /** Type identifier */
13
+ type = "claims";
14
+ /** Schema version */
15
+ static VERSION = "1.0.0";
16
+ /**
17
+ * Validate claims evaluator configuration.
18
+ *
19
+ * @param config - Configuration to validate
20
+ * @returns Validation result
21
+ */
22
+ validateConfig(config) {
23
+ const errors = [];
24
+ const warnings = [];
25
+ // Check claims array
26
+ if (!Array.isArray(config.claims)) {
27
+ errors.push("claims must be an array");
28
+ return { valid: false, errors, warnings };
29
+ }
30
+ if (config.claims.length === 0) {
31
+ warnings.push("No claims provided - evaluation will produce empty results");
32
+ }
33
+ // Validate each claim
34
+ for (let i = 0; i < config.claims.length; i++) {
35
+ const claim = config.claims[i];
36
+ const claimErrors = this.validateClaim(claim, i);
37
+ errors.push(...claimErrors);
38
+ }
39
+ return {
40
+ valid: errors.length === 0,
41
+ errors: errors.length > 0 ? errors : undefined,
42
+ warnings: warnings.length > 0 ? warnings : undefined,
43
+ };
44
+ }
45
+ /**
46
+ * Validate a single claim.
47
+ *
48
+ * @param claim - Claim to validate
49
+ * @param index - Index in claims array (for error messages)
50
+ * @returns Array of error messages
51
+ */
52
+ validateClaim(claim, index) {
53
+ const errors = [];
54
+ const prefix = `Claim[${index}]`;
55
+ if (!claim.claimId || typeof claim.claimId !== "string") {
56
+ errors.push(`${prefix}: claimId is required`);
57
+ }
58
+ if (!claim.description || typeof claim.description !== "string") {
59
+ errors.push(`${prefix}: description is required`);
60
+ }
61
+ if (!claim.sut || typeof claim.sut !== "string") {
62
+ errors.push(`${prefix}: sut is required`);
63
+ }
64
+ if (!claim.baseline || typeof claim.baseline !== "string") {
65
+ errors.push(`${prefix}: baseline is required`);
66
+ }
67
+ if (!claim.metric || typeof claim.metric !== "string") {
68
+ errors.push(`${prefix}: metric is required`);
69
+ }
70
+ if (!["greater", "less", "equal"].includes(claim.direction)) {
71
+ errors.push(`${prefix}: direction must be 'greater', 'less', or 'equal'`);
72
+ }
73
+ if (claim.threshold !== undefined && typeof claim.threshold !== "number") {
74
+ errors.push(`${prefix}: threshold must be a number`);
75
+ }
76
+ if (!["global", "caseClass", "parameterRange", "localStructure"].includes(claim.scope)) {
77
+ errors.push(`${prefix}: scope must be a valid ValidityScope`);
78
+ }
79
+ return errors;
80
+ }
81
+ /**
82
+ * Evaluate claims against aggregated results.
83
+ *
84
+ * @param config - Claims evaluator configuration
85
+ * @param input - Evaluation context with aggregates
86
+ * @returns Evaluation output
87
+ */
88
+ evaluate(config, input) {
89
+ const { aggregates } = input;
90
+ // Evaluate all claims
91
+ const evaluations = config.claims.map((claim) => this.evaluateClaim(claim, aggregates));
92
+ // Create summary
93
+ const summary = this.createClaimSummary(evaluations);
94
+ return {
95
+ type: "claims",
96
+ version: ClaimsEvaluator.VERSION,
97
+ timestamp: new Date().toISOString(),
98
+ data: summary,
99
+ metadata: {
100
+ inputSource: input.metadata?.source,
101
+ config,
102
+ },
103
+ };
104
+ }
105
+ /**
106
+ * Evaluate a single claim against aggregated results.
107
+ *
108
+ * @param claim - The claim to evaluate
109
+ * @param aggregates - Aggregated results from the pipeline
110
+ * @returns Claim evaluation with status and evidence
111
+ */
112
+ evaluateClaim(claim, aggregates) {
113
+ // Filter aggregates by scope constraints
114
+ const filteredAggregates = this.filterByScope(aggregates, claim);
115
+ // Find primary and baseline aggregates
116
+ const primaryAgg = filteredAggregates.find((a) => a.sut === claim.sut);
117
+ const baselineAgg = filteredAggregates.find((a) => a.sut === claim.baseline);
118
+ // Handle missing data
119
+ if (!primaryAgg || !baselineAgg) {
120
+ return this.createInconclusiveResult(claim, primaryAgg ? undefined : "Primary SUT not found", baselineAgg ? undefined : "Baseline SUT not found");
121
+ }
122
+ // Get metric values
123
+ const primaryMetric = claim.metric;
124
+ const baselineMetric = claim.metric;
125
+ const primaryStats = primaryAgg.metrics[primaryMetric];
126
+ const baselineStats = baselineAgg.metrics[baselineMetric];
127
+ if (!(primaryMetric in primaryAgg.metrics) || !(baselineMetric in baselineAgg.metrics)) {
128
+ return this.createInconclusiveResult(claim, "Metric not found in primary results", "Metric not found in baseline results");
129
+ }
130
+ // Compute evidence
131
+ const primaryValue = primaryStats.mean;
132
+ const baselineValue = baselineStats.mean;
133
+ const delta = primaryValue - baselineValue;
134
+ const ratio = baselineValue === 0 ? Infinity : primaryValue / baselineValue;
135
+ // Get statistical significance if available
136
+ const comparison = primaryAgg.comparisons?.[claim.baseline];
137
+ const pValue = comparison?.pValue;
138
+ const effectSize = comparison?.effectSize;
139
+ const evidence = {
140
+ primaryValue,
141
+ baselineValue,
142
+ delta,
143
+ ratio,
144
+ pValue,
145
+ effectSize,
146
+ n: primaryStats.n + baselineStats.n,
147
+ };
148
+ // Determine claim status
149
+ const status = this.determineClaimStatus(claim, evidence);
150
+ return {
151
+ claim,
152
+ status,
153
+ evidence,
154
+ };
155
+ }
156
+ /**
157
+ * Filter aggregates by claim scope constraints.
158
+ *
159
+ * @param aggregates - All aggregates
160
+ * @param claim - Claim with scope constraints
161
+ * @returns Filtered aggregates
162
+ */
163
+ filterByScope(aggregates, claim) {
164
+ if (!claim.scopeConstraints) {
165
+ return aggregates;
166
+ }
167
+ return aggregates.filter((agg) => {
168
+ for (const [key, value] of Object.entries(claim.scopeConstraints ?? {})) {
169
+ if (key === "caseClass") {
170
+ const allowedClasses = Array.isArray(value) ? value : [value];
171
+ if (!allowedClasses.includes(agg.caseClass)) {
172
+ return false;
173
+ }
174
+ }
175
+ // Add more scope constraint checks as needed
176
+ }
177
+ return true;
178
+ });
179
+ }
180
+ /**
181
+ * Create an inconclusive result with reasons.
182
+ *
183
+ * @param claim - The claim being evaluated
184
+ * @param reasons - Reasons for inconclusive status
185
+ * @returns Inconclusive claim evaluation
186
+ */
187
+ createInconclusiveResult(claim, ...reasons) {
188
+ const validReasons = reasons.filter((r) => r !== undefined);
189
+ return {
190
+ claim,
191
+ status: "inconclusive",
192
+ evidence: {
193
+ primaryValue: Number.NaN,
194
+ baselineValue: Number.NaN,
195
+ delta: Number.NaN,
196
+ ratio: Number.NaN,
197
+ },
198
+ inconclusiveReason: validReasons.join("; "),
199
+ };
200
+ }
201
+ /**
202
+ * Determine claim status based on evidence.
203
+ *
204
+ * @param claim - The claim being evaluated
205
+ * @param evidence - Computed evidence
206
+ * @returns Claim status
207
+ */
208
+ determineClaimStatus(claim, evidence) {
209
+ // Check for missing data
210
+ if (Number.isNaN(evidence.primaryValue) || Number.isNaN(evidence.baselineValue)) {
211
+ return "inconclusive";
212
+ }
213
+ // Check statistical significance if required
214
+ const significanceLevel = claim.significanceLevel ?? 0.05;
215
+ if (evidence.pValue !== undefined && evidence.pValue > significanceLevel) {
216
+ return "inconclusive";
217
+ }
218
+ // Check minimum effect size if required
219
+ if (claim.minEffectSize !== undefined &&
220
+ evidence.effectSize !== undefined &&
221
+ Math.abs(evidence.effectSize) < claim.minEffectSize) {
222
+ return "inconclusive";
223
+ }
224
+ // Evaluate direction
225
+ switch (claim.direction) {
226
+ case "greater": {
227
+ if (claim.threshold !== undefined) {
228
+ return evidence.delta >= claim.threshold ? "satisfied" : "violated";
229
+ }
230
+ return evidence.delta > 0 ? "satisfied" : "violated";
231
+ }
232
+ case "less": {
233
+ if (claim.threshold !== undefined) {
234
+ return evidence.delta <= -claim.threshold ? "satisfied" : "violated";
235
+ }
236
+ return evidence.delta < 0 ? "satisfied" : "violated";
237
+ }
238
+ case "equal": {
239
+ const epsilon = claim.threshold ?? 0.001;
240
+ return Math.abs(evidence.delta) <= epsilon ? "satisfied" : "violated";
241
+ }
242
+ }
243
+ }
244
+ /**
245
+ * Create a claim evaluation summary.
246
+ *
247
+ * @param evaluations - Completed claim evaluations
248
+ * @returns Summary with counts and rates
249
+ */
250
+ createClaimSummary(evaluations) {
251
+ const satisfied = evaluations.filter((e) => e.status === "satisfied").length;
252
+ const violated = evaluations.filter((e) => e.status === "violated").length;
253
+ const inconclusive = evaluations.filter((e) => e.status === "inconclusive").length;
254
+ const definitive = satisfied + violated;
255
+ const satisfactionRate = definitive > 0 ? satisfied / definitive : 0;
256
+ return {
257
+ version: "1.0.0",
258
+ timestamp: new Date().toISOString(),
259
+ evaluations,
260
+ summary: {
261
+ total: evaluations.length,
262
+ satisfied,
263
+ violated,
264
+ inconclusive,
265
+ satisfactionRate,
266
+ },
267
+ };
268
+ }
269
+ /**
270
+ * Summarize evaluation output.
271
+ *
272
+ * @param output - Evaluation output to summarize
273
+ * @returns Summary statistics
274
+ */
275
+ summarize(output) {
276
+ const { summary } = output.data;
277
+ return {
278
+ total: summary.total,
279
+ passed: summary.satisfied,
280
+ failed: summary.violated,
281
+ inconclusive: summary.inconclusive,
282
+ passRate: summary.satisfactionRate,
283
+ additional: {
284
+ satisfactionRate: summary.satisfactionRate,
285
+ },
286
+ };
287
+ }
288
+ }
289
+ //# sourceMappingURL=claims-evaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claims-evaluator.js","sourceRoot":"","sources":["../../src/evaluators/claims-evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAsBH;;GAEG;AACH,MAAM,OAAO,eAAe;IAG3B,sBAAsB;IACb,IAAI,GAAG,QAAiB,CAAC;IAElC,qBAAqB;IACb,MAAM,CAAU,OAAO,GAAG,OAAO,CAAC;IAE1C;;;;;OAKG;IACH,cAAc,CAAC,MAA6B;QAC3C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,qBAAqB;QACrB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;YACvC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QAC3C,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChC,QAAQ,CAAC,IAAI,CAAC,4DAA4D,CAAC,CAAC;QAC7E,CAAC;QAED,sBAAsB;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;QAC7B,CAAC;QAED,OAAO;YACN,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;YAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAC9C,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;SACpD,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,aAAa,CAAC,KAAsB,EAAE,KAAa;QAC1D,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,SAAS,KAAK,GAAG,CAAC;QAEjC,IAAI,CAAC,KAAK,CAAC,OAAO,IAAI,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACzD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,uBAAuB,CAAC,CAAC;QAC/C,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,WAAW,IAAI,OAAO,KAAK,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;YACjE,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,2BAA2B,CAAC,CAAC;QACnD,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,OAAO,KAAK,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,mBAAmB,CAAC,CAAC;QAC3C,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,OAAO,KAAK,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;YAC3D,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,wBAAwB,CAAC,CAAC;QAChD,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACvD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,sBAAsB,CAAC,CAAC;QAC9C,CAAC;QACD,IAAI,CAAC,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,mDAAmD,CAAC,CAAC;QAC3E,CAAC;QACD,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,IAAI,OAAO,KAAK,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;YAC1E,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,8BAA8B,CAAC,CAAC;QACtD,CAAC;QACD,IAAI,CAAC,CAAC,QAAQ,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;YACxF,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,uCAAuC,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;IAED;;;;;;OAMG;IACH,QAAQ,CACP,MAA6B,EAC7B,KAAwB;QAExB,MAAM,EAAE,UAAU,EAAE,GAAG,KAAK,CAAC;QAE7B,sBAAsB;QACtB,MAAM,WAAW,GAAsB,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAClE,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,UAAU,CAAC,CACrC,CAAC;QAEF,iBAAiB;QACjB,MAAM,OAAO,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,CAAC,CAAC;QAErD,OAAO;YACN,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,eAAe,CAAC,OAAO;YAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,IAAI,EAAE,OAAO;YACb,QAAQ,EAAE;gBACT,WAAW,EAAE,KAAK,CAAC,QAAQ,EAAE,MAAM;gBACnC,MAAM;aACN;SACD,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,aAAa,CAAC,KAAsB,EAAE,UAA8B;QAC3E,yCAAyC;QACzC,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAEjE,uCAAuC;QACvC,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,GAAG,CAAC,CAAC;QACvE,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,QAAQ,CAAC,CAAC;QAE7E,sBAAsB;QACtB,IAAI,CAAC,UAAU,IAAI,CAAC,WAAW,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC,wBAAwB,CACnC,KAAK,EACL,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,uBAAuB,EAChD,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,wBAAwB,CAClD,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC;QACnC,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC;QACpC,MAAM,YAAY,GAAG,UAAU,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACvD,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAE1D,IAAI,CAAC,CAAC,aAAa,IAAI,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,cAAc,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;YACxF,OAAO,IAAI,CAAC,wBAAwB,CACnC,KAAK,EACL,qCAAqC,EACrC,sCAAsC,CACtC,CAAC;QACH,CAAC;QAED,mBAAmB;QACnB,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC;QACvC,MAAM,aAAa,GAAG,aAAa,CAAC,IAAI,CAAC;QACzC,MAAM,KAAK,GAAG,YAAY,GAAG,aAAa,CAAC;QAC3C,MAAM,KAAK,GAAG,aAAa,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,GAAG,aAAa,CAAC;QAE5E,4CAA4C;QAC5C,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,UAAU,EAAE,MAAM,CAAC;QAClC,MAAM,UAAU,GAAG,UAAU,EAAE,UAAU,CAAC;QAE1C,MAAM,QAAQ,GAAkB;YAC/B,YAAY;YACZ,aAAa;YACb,KAAK;YACL,KAAK;YACL,MAAM;YACN,UAAU;YACV,CAAC,EAAE,YAAY,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC;SACnC,CAAC;QAEF,yBAAyB;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,oBAAoB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAE1D,OAAO;YACN,KAAK;YACL,MAAM;YACN,QAAQ;SACR,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,aAAa,CACpB,UAA8B,EAC9B,KAAsB;QAEtB,IAAI,CAAC,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC7B,OAAO,UAAU,CAAC;QACnB,CAAC;QAED,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;gBACzE,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;oBACzB,MAAM,cAAc,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;oBAC9D,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAsB,CAAC,EAAE,CAAC;wBAC1D,OAAO,KAAK,CAAC;oBACd,CAAC;gBACF,CAAC;gBACD,6CAA6C;YAC9C,CAAC;YACD,OAAO,IAAI,CAAC;QACb,CAAC,CAAC,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACK,wBAAwB,CAC/B,KAAsB,EACtB,GAAG,OAA+B;QAElC,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;QAEzE,OAAO;YACN,KAAK;YACL,MAAM,EAAE,cAAc;YACtB,QAAQ,EAAE;gBACT,YAAY,EAAE,MAAM,CAAC,GAAG;gBACxB,aAAa,EAAE,MAAM,CAAC,GAAG;gBACzB,KAAK,EAAE,MAAM,CAAC,GAAG;gBACjB,KAAK,EAAE,MAAM,CAAC,GAAG;aACjB;YACD,kBAAkB,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;SAC3C,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACK,oBAAoB,CAAC,KAAsB,EAAE,QAAuB;QAC3E,yBAAyB;QACzB,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YACjF,OAAO,cAAc,CAAC;QACvB,CAAC;QAED,6CAA6C;QAC7C,MAAM,iBAAiB,GAAG,KAAK,CAAC,iBAAiB,IAAI,IAAI,CAAC;QAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;YAC1E,OAAO,cAAc,CAAC;QACvB,CAAC;QAED,wCAAwC;QACxC,IACC,KAAK,CAAC,aAAa,KAAK,SAAS;YACjC,QAAQ,CAAC,UAAU,KAAK,SAAS;YACjC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,KAAK,CAAC,aAAa,EAClD,CAAC;YACF,OAAO,cAAc,CAAC;QACvB,CAAC;QAED,qBAAqB;QACrB,QAAQ,KAAK,CAAC,SAAS,EAAE,CAAC;YACzB,KAAK,SAAS,CAAC,CAAC,CAAC;gBAChB,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;oBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;gBACrE,CAAC;gBACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACtD,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACb,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;oBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;gBACtE,CAAC;gBACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACtD,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACd,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC;gBACzC,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACvE,CAAC;QACF,CAAC;IACF,CAAC;IAED;;;;;OAKG;IACK,kBAAkB,CAAC,WAA8B;QACxD,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;QAC7E,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;QAC3E,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;QAEnF,MAAM,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;QACxC,MAAM,gBAAgB,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAErE,OAAO;YACN,OAAO,EAAE,OAAO;YAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,WAAW;YACX,OAAO,EAAE;gBACR,KAAK,EAAE,WAAW,CAAC,MAAM;gBACzB,SAAS;gBACT,QAAQ;gBACR,YAAY;gBACZ,gBAAgB;aAChB;SACD,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,SAAS,CAAC,MAA6C;QACtD,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC;QAEhC,OAAO;YACN,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,SAAS;YACzB,MAAM,EAAE,OAAO,CAAC,QAAQ;YACxB,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,QAAQ,EAAE,OAAO,CAAC,gBAAgB;YAClC,UAAU,EAAE;gBACX,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;aAC1C;SACD,CAAC;IACH,CAAC"}
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Exploratory Evaluator
3
+ *
4
+ * Hypothesis-free analysis for discovering patterns in evaluation data.
5
+ * Unlike ClaimsEvaluator which tests predefined hypotheses, this evaluator
6
+ * performs exploratory analysis including:
7
+ * - Ranking all SUTs by any metric (not just primary vs baseline)
8
+ * - Finding significant pairwise differences (N-way comparisons)
9
+ * - Discovering case-class effects
10
+ * - Computing metric correlations
11
+ */
12
+ import type { EvaluationContext, EvaluationOutput, EvaluationSummary, Evaluator, ExploratoryEvaluatorConfig, ExploratoryEvaluatorData, IEvaluator, ValidationResult } from "../types/evaluator.js";
13
+ /**
14
+ * Exploratory evaluator - hypothesis-free comparative analysis.
15
+ */
16
+ export declare class ExploratoryEvaluator implements Evaluator<ExploratoryEvaluatorConfig, EvaluationContext, ExploratoryEvaluatorData>, IEvaluator {
17
+ /** Type identifier */
18
+ readonly type: "exploratory";
19
+ /** Schema version */
20
+ private static readonly VERSION;
21
+ /** Default significance level */
22
+ private static readonly DEFAULT_SIGNIFICANCE;
23
+ /**
24
+ * Validate exploratory evaluator configuration.
25
+ *
26
+ * @param config - Configuration to validate
27
+ * @returns Validation result
28
+ */
29
+ validateConfig(config: ExploratoryEvaluatorConfig): ValidationResult;
30
+ /**
31
+ * Perform exploratory evaluation.
32
+ *
33
+ * @param config - Exploratory evaluator configuration
34
+ * @param input - Evaluation context with aggregates
35
+ * @returns Evaluation output
36
+ */
37
+ evaluate(config: ExploratoryEvaluatorConfig, input: EvaluationContext): EvaluationOutput<ExploratoryEvaluatorData>;
38
+ /**
39
+ * Summarize evaluation output.
40
+ *
41
+ * @param output - Evaluation output to summarize
42
+ * @returns Summary statistics
43
+ */
44
+ summarize(output: EvaluationOutput<ExploratoryEvaluatorData>): EvaluationSummary;
45
+ /**
46
+ * Determine which SUTs to analyze.
47
+ * @param aggregates
48
+ * @param configSuts
49
+ */
50
+ private determineSuts;
51
+ /**
52
+ * Determine which metrics to analyze.
53
+ * @param aggregates
54
+ * @param configMetrics
55
+ */
56
+ private determineMetrics;
57
+ /**
58
+ * Compute rankings for a single metric.
59
+ * @param aggregates
60
+ * @param metric
61
+ * @param direction
62
+ */
63
+ private computeRankings;
64
+ /**
65
+ * Compute all pairwise comparisons.
66
+ * @param aggregates
67
+ * @param suts
68
+ * @param metrics
69
+ * @param significanceLevel
70
+ * @param minEffectSize
71
+ */
72
+ private computePairwiseComparisons;
73
+ /**
74
+ * Compare a single pair of SUTs for a metric.
75
+ * @param aggregates
76
+ * @param sutA
77
+ * @param sutB
78
+ * @param metric
79
+ * @param significanceLevel
80
+ * @param minEffectSize
81
+ */
82
+ private compareSutPair;
83
+ /**
84
+ * Analyze case-class effects on SUT performance.
85
+ * @param aggregates
86
+ * @param metrics
87
+ * @param significanceLevel
88
+ */
89
+ private analyzeCaseClassEffects;
90
+ /**
91
+ * Compute correlations between metrics.
92
+ * @param aggregates
93
+ * @param metrics
94
+ */
95
+ private computeMetricCorrelations;
96
+ /**
97
+ * Compute Pearson and Spearman correlation between two metrics.
98
+ * @param aggregates
99
+ * @param metricA
100
+ * @param metricB
101
+ */
102
+ private computeCorrelation;
103
+ /**
104
+ * Compute Pearson correlation coefficient.
105
+ * @param x
106
+ * @param y
107
+ */
108
+ private pearsonCorrelation;
109
+ /**
110
+ * Compute Spearman rank correlation coefficient.
111
+ * @param x
112
+ * @param y
113
+ */
114
+ private spearmanCorrelation;
115
+ /**
116
+ * Compute ranks for an array of values (handling ties).
117
+ * @param values
118
+ */
119
+ private computeRanks;
120
+ /**
121
+ * Interpret correlation coefficient.
122
+ * @param r
123
+ */
124
+ private interpretCorrelation;
125
+ /**
126
+ * Compute variance of an array.
127
+ * @param values
128
+ */
129
+ private variance;
130
+ /**
131
+ * Standard normal CDF approximation.
132
+ * @param z
133
+ */
134
+ private normalCdf;
135
+ }
136
+ //# sourceMappingURL=exploratory-evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"exploratory-evaluator.d.ts","sourceRoot":"","sources":["../../src/evaluators/exploratory-evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,KAAK,EAEX,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,SAAS,EACT,0BAA0B,EAC1B,wBAAwB,EAExB,UAAU,EAKV,gBAAgB,EAChB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,qBAAa,oBACZ,YACC,SAAS,CAAC,0BAA0B,EAAE,iBAAiB,EAAE,wBAAwB,CAAC,EAClF,UAAU;IAEX,sBAAsB;IACtB,QAAQ,CAAC,IAAI,EAAG,aAAa,CAAU;IAEvC,qBAAqB;IACrB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAW;IAE1C,iCAAiC;IACjC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,oBAAoB,CAAQ;IAEpD;;;;;OAKG;IACH,cAAc,CAAC,MAAM,EAAE,0BAA0B,GAAG,gBAAgB;IA2CpE;;;;;;OAMG;IACH,QAAQ,CACP,MAAM,EAAE,0BAA0B,EAClC,KAAK,EAAE,iBAAiB,GACtB,gBAAgB,CAAC,wBAAwB,CAAC;IAsF7C;;;;;OAKG;IACH,SAAS,CAAC,MAAM,EAAE,gBAAgB,CAAC,wBAAwB,CAAC,GAAG,iBAAiB;IAchF;;;;OAIG;IACH,OAAO,CAAC,aAAa;IAQrB;;;;OAIG;IACH,OAAO,CAAC,gBAAgB;IAcxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAuEvB;;;;;;;OAOG;IACH,OAAO,CAAC,0BAA0B;IAmClC;;;;;;;;OAQG;IACH,OAAO,CAAC,cAAc;IAuFtB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA8D/B;;;;OAIG;IACH,OAAO,CAAC,yBAAyB;IAqBjC;;;;;OAKG;IACH,OAAO,CAAC,kBAAkB;IA2C1B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;IAqB1B;;;;OAIG;IACH,OAAO,CAAC,mBAAmB;IAM3B;;;OAGG;IACH,OAAO,CAAC,YAAY;IA0BpB;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAS5B;;;OAGG;IACH,OAAO,CAAC,QAAQ;IAMhB;;;OAGG;IACH,OAAO,CAAC,SAAS;CAiBjB"}