ppef 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. package/README.md +76 -125
  2. package/bin/ppef.mjs +20 -0
  3. package/dist/__tests__/cli/evaluate-command.integration.test.d.ts +8 -0
  4. package/dist/__tests__/cli/evaluate-command.integration.test.d.ts.map +1 -0
  5. package/dist/__tests__/cli/evaluate-command.integration.test.js +308 -0
  6. package/dist/__tests__/cli/evaluate-command.integration.test.js.map +1 -0
  7. package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts +8 -0
  8. package/dist/__tests__/evaluators/claims-evaluator.unit.test.d.ts.map +1 -0
  9. package/dist/__tests__/evaluators/claims-evaluator.unit.test.js +405 -0
  10. package/dist/__tests__/evaluators/claims-evaluator.unit.test.js.map +1 -0
  11. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts +8 -0
  12. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.d.ts.map +1 -0
  13. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js +424 -0
  14. package/dist/__tests__/evaluators/metrics-evaluator.unit.test.js.map +1 -0
  15. package/dist/__tests__/evaluators/registry.unit.test.d.ts +7 -0
  16. package/dist/__tests__/evaluators/registry.unit.test.d.ts.map +1 -0
  17. package/dist/__tests__/evaluators/registry.unit.test.js +173 -0
  18. package/dist/__tests__/evaluators/registry.unit.test.js.map +1 -0
  19. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts +8 -0
  20. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.d.ts.map +1 -0
  21. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js +260 -0
  22. package/dist/__tests__/evaluators/robustness-evaluator.unit.test.js.map +1 -0
  23. package/dist/__tests__/framework-pipeline.integration.test.js +49 -20
  24. package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -1
  25. package/dist/__tests__/index-exports.unit.test.d.ts +8 -0
  26. package/dist/__tests__/index-exports.unit.test.d.ts.map +1 -0
  27. package/dist/__tests__/index-exports.unit.test.js +124 -0
  28. package/dist/__tests__/index-exports.unit.test.js.map +1 -0
  29. package/dist/__tests__/registry-executor.integration.test.js +12 -9
  30. package/dist/__tests__/registry-executor.integration.test.js.map +1 -1
  31. package/dist/aggregation/__tests__/aggregators.unit.test.d.ts +7 -0
  32. package/dist/aggregation/__tests__/aggregators.unit.test.d.ts.map +1 -0
  33. package/dist/aggregation/__tests__/aggregators.unit.test.js +350 -0
  34. package/dist/aggregation/__tests__/aggregators.unit.test.js.map +1 -0
  35. package/dist/aggregation/__tests__/pipeline.unit.test.d.ts +7 -0
  36. package/dist/aggregation/__tests__/pipeline.unit.test.d.ts.map +1 -0
  37. package/dist/aggregation/__tests__/pipeline.unit.test.js +213 -0
  38. package/dist/aggregation/__tests__/pipeline.unit.test.js.map +1 -0
  39. package/dist/aggregation/aggregators.d.ts +9 -0
  40. package/dist/aggregation/aggregators.d.ts.map +1 -1
  41. package/dist/aggregation/aggregators.js +1 -1
  42. package/dist/aggregation/aggregators.js.map +1 -1
  43. package/dist/aggregation/index.d.ts +1 -1
  44. package/dist/aggregation/index.d.ts.map +1 -1
  45. package/dist/aggregation/index.js +1 -1
  46. package/dist/aggregation/index.js.map +1 -1
  47. package/dist/aggregation/pipeline.d.ts.map +1 -1
  48. package/dist/aggregation/pipeline.js +40 -3
  49. package/dist/aggregation/pipeline.js.map +1 -1
  50. package/dist/claims/index.d.ts +6 -3
  51. package/dist/claims/index.d.ts.map +1 -1
  52. package/dist/claims/index.js +6 -3
  53. package/dist/claims/index.js.map +1 -1
  54. package/dist/cli/__tests__/aggregate.command.unit.test.d.ts +7 -0
  55. package/dist/cli/__tests__/aggregate.command.unit.test.d.ts.map +1 -0
  56. package/dist/cli/__tests__/aggregate.command.unit.test.js +399 -0
  57. package/dist/cli/__tests__/aggregate.command.unit.test.js.map +1 -0
  58. package/dist/cli/__tests__/binary-sut.integration.test.d.ts +8 -0
  59. package/dist/cli/__tests__/binary-sut.integration.test.d.ts.map +1 -0
  60. package/dist/cli/__tests__/binary-sut.integration.test.js +165 -0
  61. package/dist/cli/__tests__/binary-sut.integration.test.js.map +1 -0
  62. package/dist/cli/__tests__/commands.unit.test.d.ts +10 -0
  63. package/dist/cli/__tests__/commands.unit.test.d.ts.map +1 -0
  64. package/dist/cli/__tests__/commands.unit.test.js +217 -0
  65. package/dist/cli/__tests__/commands.unit.test.js.map +1 -0
  66. package/dist/cli/__tests__/config-loader.unit.test.d.ts +7 -0
  67. package/dist/cli/__tests__/config-loader.unit.test.d.ts.map +1 -0
  68. package/dist/cli/__tests__/config-loader.unit.test.js +611 -0
  69. package/dist/cli/__tests__/config-loader.unit.test.js.map +1 -0
  70. package/dist/cli/__tests__/index.unit.test.d.ts +10 -0
  71. package/dist/cli/__tests__/index.unit.test.d.ts.map +1 -0
  72. package/dist/cli/__tests__/index.unit.test.js +65 -0
  73. package/dist/cli/__tests__/index.unit.test.js.map +1 -0
  74. package/dist/cli/__tests__/logger.unit.test.d.ts +11 -0
  75. package/dist/cli/__tests__/logger.unit.test.d.ts.map +1 -0
  76. package/dist/cli/__tests__/logger.unit.test.js +180 -0
  77. package/dist/cli/__tests__/logger.unit.test.js.map +1 -0
  78. package/dist/cli/__tests__/module-loader.unit.test.d.ts +11 -0
  79. package/dist/cli/__tests__/module-loader.unit.test.d.ts.map +1 -0
  80. package/dist/cli/__tests__/module-loader.unit.test.js +262 -0
  81. package/dist/cli/__tests__/module-loader.unit.test.js.map +1 -0
  82. package/dist/cli/__tests__/output-writer.unit.test.d.ts +10 -0
  83. package/dist/cli/__tests__/output-writer.unit.test.d.ts.map +1 -0
  84. package/dist/cli/__tests__/output-writer.unit.test.js +216 -0
  85. package/dist/cli/__tests__/output-writer.unit.test.js.map +1 -0
  86. package/dist/cli/__tests__/plan.command.unit.test.d.ts +7 -0
  87. package/dist/cli/__tests__/plan.command.unit.test.d.ts.map +1 -0
  88. package/dist/cli/__tests__/plan.command.unit.test.js +289 -0
  89. package/dist/cli/__tests__/plan.command.unit.test.js.map +1 -0
  90. package/dist/cli/__tests__/run.command.unit.test.d.ts +7 -0
  91. package/dist/cli/__tests__/run.command.unit.test.d.ts.map +1 -0
  92. package/dist/cli/__tests__/run.command.unit.test.js +422 -0
  93. package/dist/cli/__tests__/run.command.unit.test.js.map +1 -0
  94. package/dist/cli/__tests__/validate.command.unit.test.d.ts +7 -0
  95. package/dist/cli/__tests__/validate.command.unit.test.d.ts.map +1 -0
  96. package/dist/cli/__tests__/validate.command.unit.test.js +226 -0
  97. package/dist/cli/__tests__/validate.command.unit.test.js.map +1 -0
  98. package/dist/cli/command-deps.d.ts +137 -0
  99. package/dist/cli/command-deps.d.ts.map +1 -0
  100. package/dist/cli/command-deps.js +7 -0
  101. package/dist/cli/command-deps.js.map +1 -0
  102. package/dist/cli/commands/aggregate.d.ts +35 -0
  103. package/dist/cli/commands/aggregate.d.ts.map +1 -0
  104. package/dist/cli/commands/aggregate.js +124 -0
  105. package/dist/cli/commands/aggregate.js.map +1 -0
  106. package/dist/cli/commands/evaluate.d.ts +41 -0
  107. package/dist/cli/commands/evaluate.d.ts.map +1 -0
  108. package/dist/cli/commands/evaluate.js +287 -0
  109. package/dist/cli/commands/evaluate.js.map +1 -0
  110. package/dist/cli/commands/plan.d.ts +36 -0
  111. package/dist/cli/commands/plan.d.ts.map +1 -0
  112. package/dist/cli/commands/plan.js +109 -0
  113. package/dist/cli/commands/plan.js.map +1 -0
  114. package/dist/cli/commands/run.d.ts +33 -0
  115. package/dist/cli/commands/run.d.ts.map +1 -0
  116. package/dist/cli/commands/run.js +277 -0
  117. package/dist/cli/commands/run.js.map +1 -0
  118. package/dist/cli/commands/validate.d.ts +27 -0
  119. package/dist/cli/commands/validate.d.ts.map +1 -0
  120. package/dist/cli/commands/validate.js +88 -0
  121. package/dist/cli/commands/validate.js.map +1 -0
  122. package/dist/cli/config-loader.d.ts +30 -0
  123. package/dist/cli/config-loader.d.ts.map +1 -0
  124. package/dist/cli/config-loader.js +181 -0
  125. package/dist/cli/config-loader.js.map +1 -0
  126. package/dist/cli/index.d.ts +27 -0
  127. package/dist/cli/index.d.ts.map +1 -0
  128. package/dist/cli/index.js +60 -0
  129. package/dist/cli/index.js.map +1 -0
  130. package/dist/cli/logger.d.ts +75 -0
  131. package/dist/cli/logger.d.ts.map +1 -0
  132. package/dist/cli/logger.js +131 -0
  133. package/dist/cli/logger.js.map +1 -0
  134. package/dist/cli/module-loader.d.ts +68 -0
  135. package/dist/cli/module-loader.d.ts.map +1 -0
  136. package/dist/cli/module-loader.js +134 -0
  137. package/dist/cli/module-loader.js.map +1 -0
  138. package/dist/cli/output-writer.d.ts +51 -0
  139. package/dist/cli/output-writer.d.ts.map +1 -0
  140. package/dist/cli/output-writer.js +65 -0
  141. package/dist/cli/output-writer.js.map +1 -0
  142. package/dist/cli/types.d.ts +193 -0
  143. package/dist/cli/types.d.ts.map +1 -0
  144. package/dist/cli/types.js +7 -0
  145. package/dist/cli/types.js.map +1 -0
  146. package/dist/collector/__tests__/result-collector.unit.test.d.ts +7 -0
  147. package/dist/collector/__tests__/result-collector.unit.test.d.ts.map +1 -0
  148. package/dist/collector/__tests__/result-collector.unit.test.js +1021 -0
  149. package/dist/collector/__tests__/result-collector.unit.test.js.map +1 -0
  150. package/dist/collector/__tests__/schema.unit.test.d.ts +7 -0
  151. package/dist/collector/__tests__/schema.unit.test.d.ts.map +1 -0
  152. package/dist/collector/__tests__/schema.unit.test.js +360 -0
  153. package/dist/collector/__tests__/schema.unit.test.js.map +1 -0
  154. package/dist/evaluators/claims-evaluator.d.ts +87 -0
  155. package/dist/evaluators/claims-evaluator.d.ts.map +1 -0
  156. package/dist/evaluators/claims-evaluator.js +289 -0
  157. package/dist/evaluators/claims-evaluator.js.map +1 -0
  158. package/dist/evaluators/exploratory-evaluator.d.ts +136 -0
  159. package/dist/evaluators/exploratory-evaluator.d.ts.map +1 -0
  160. package/dist/evaluators/exploratory-evaluator.js +545 -0
  161. package/dist/evaluators/exploratory-evaluator.js.map +1 -0
  162. package/dist/evaluators/index.d.ts +13 -0
  163. package/dist/evaluators/index.d.ts.map +1 -0
  164. package/dist/evaluators/index.js +14 -0
  165. package/dist/evaluators/index.js.map +1 -0
  166. package/dist/evaluators/metrics-evaluator.d.ts +114 -0
  167. package/dist/evaluators/metrics-evaluator.d.ts.map +1 -0
  168. package/dist/evaluators/metrics-evaluator.js +433 -0
  169. package/dist/evaluators/metrics-evaluator.js.map +1 -0
  170. package/dist/evaluators/registry.d.ts +106 -0
  171. package/dist/evaluators/registry.d.ts.map +1 -0
  172. package/dist/evaluators/registry.js +148 -0
  173. package/dist/evaluators/registry.js.map +1 -0
  174. package/dist/evaluators/robustness-evaluator.d.ts +57 -0
  175. package/dist/evaluators/robustness-evaluator.d.ts.map +1 -0
  176. package/dist/evaluators/robustness-evaluator.js +186 -0
  177. package/dist/evaluators/robustness-evaluator.js.map +1 -0
  178. package/dist/executor/__tests__/binary-sut.unit.test.d.ts +8 -0
  179. package/dist/executor/__tests__/binary-sut.unit.test.d.ts.map +1 -0
  180. package/dist/executor/__tests__/binary-sut.unit.test.js +313 -0
  181. package/dist/executor/__tests__/binary-sut.unit.test.js.map +1 -0
  182. package/dist/executor/__tests__/checkpoint-manager.unit.test.js +83 -1
  183. package/dist/executor/__tests__/checkpoint-manager.unit.test.js.map +1 -1
  184. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts +3 -6
  185. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts.map +1 -1
  186. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js +428 -159
  187. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js.map +1 -1
  188. package/dist/executor/__tests__/checkpoint-storage.unit.test.js +148 -1
  189. package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -1
  190. package/dist/executor/__tests__/executor.unit.test.js +123 -8
  191. package/dist/executor/__tests__/executor.unit.test.js.map +1 -1
  192. package/dist/executor/__tests__/memory-monitor.unit.test.d.ts +7 -0
  193. package/dist/executor/__tests__/memory-monitor.unit.test.d.ts.map +1 -0
  194. package/dist/executor/__tests__/memory-monitor.unit.test.js +285 -0
  195. package/dist/executor/__tests__/memory-monitor.unit.test.js.map +1 -0
  196. package/dist/executor/__tests__/parallel-executor.unit.test.d.ts +2 -1
  197. package/dist/executor/__tests__/parallel-executor.unit.test.d.ts.map +1 -1
  198. package/dist/executor/__tests__/parallel-executor.unit.test.js +426 -156
  199. package/dist/executor/__tests__/parallel-executor.unit.test.js.map +1 -1
  200. package/dist/executor/__tests__/resource-calculator.unit.test.d.ts +10 -0
  201. package/dist/executor/__tests__/resource-calculator.unit.test.d.ts.map +1 -0
  202. package/dist/executor/__tests__/resource-calculator.unit.test.js +104 -0
  203. package/dist/executor/__tests__/resource-calculator.unit.test.js.map +1 -0
  204. package/dist/executor/__tests__/run-id.unit.test.d.ts +8 -0
  205. package/dist/executor/__tests__/run-id.unit.test.d.ts.map +1 -0
  206. package/dist/executor/__tests__/run-id.unit.test.js +156 -0
  207. package/dist/executor/__tests__/run-id.unit.test.js.map +1 -0
  208. package/dist/executor/__tests__/worker-entry.integration.test.d.ts +24 -0
  209. package/dist/executor/__tests__/worker-entry.integration.test.d.ts.map +1 -0
  210. package/dist/executor/__tests__/worker-entry.integration.test.js +82 -0
  211. package/dist/executor/__tests__/worker-entry.integration.test.js.map +1 -0
  212. package/dist/executor/__tests__/worker-entry.unit.test.d.ts +7 -0
  213. package/dist/executor/__tests__/worker-entry.unit.test.d.ts.map +1 -0
  214. package/dist/executor/__tests__/worker-entry.unit.test.js +364 -0
  215. package/dist/executor/__tests__/worker-entry.unit.test.js.map +1 -0
  216. package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts +8 -0
  217. package/dist/executor/__tests__/worker-threads-executor.unit.test.d.ts.map +1 -0
  218. package/dist/executor/__tests__/worker-threads-executor.unit.test.js +276 -0
  219. package/dist/executor/__tests__/worker-threads-executor.unit.test.js.map +1 -0
  220. package/dist/executor/binary-sut.d.ts +105 -0
  221. package/dist/executor/binary-sut.d.ts.map +1 -0
  222. package/dist/executor/binary-sut.js +174 -0
  223. package/dist/executor/binary-sut.js.map +1 -0
  224. package/dist/executor/checkpoint-storage.d.ts.map +1 -1
  225. package/dist/executor/checkpoint-storage.js +6 -4
  226. package/dist/executor/checkpoint-storage.js.map +1 -1
  227. package/dist/executor/executor.d.ts +28 -0
  228. package/dist/executor/executor.d.ts.map +1 -1
  229. package/dist/executor/executor.js +85 -24
  230. package/dist/executor/executor.js.map +1 -1
  231. package/dist/executor/index.d.ts +4 -0
  232. package/dist/executor/index.d.ts.map +1 -1
  233. package/dist/executor/index.js +4 -0
  234. package/dist/executor/index.js.map +1 -1
  235. package/dist/executor/parallel-executor.d.ts +186 -0
  236. package/dist/executor/parallel-executor.d.ts.map +1 -1
  237. package/dist/executor/parallel-executor.js +218 -83
  238. package/dist/executor/parallel-executor.js.map +1 -1
  239. package/dist/executor/resource-calculator.d.ts +49 -0
  240. package/dist/executor/resource-calculator.d.ts.map +1 -0
  241. package/dist/executor/resource-calculator.js +129 -0
  242. package/dist/executor/resource-calculator.js.map +1 -0
  243. package/dist/executor/run-id.d.ts.map +1 -1
  244. package/dist/executor/run-id.js +8 -1
  245. package/dist/executor/run-id.js.map +1 -1
  246. package/dist/executor/worker-entry.d.ts +2 -0
  247. package/dist/executor/worker-entry.d.ts.map +1 -1
  248. package/dist/executor/worker-entry.js +46 -55
  249. package/dist/executor/worker-entry.js.map +1 -1
  250. package/dist/executor/worker-executor.d.ts +257 -0
  251. package/dist/executor/worker-executor.d.ts.map +1 -0
  252. package/dist/executor/worker-executor.js +308 -0
  253. package/dist/executor/worker-executor.js.map +1 -0
  254. package/dist/executor/worker-threads-executor.d.ts +245 -0
  255. package/dist/executor/worker-threads-executor.d.ts.map +1 -0
  256. package/dist/executor/worker-threads-executor.js +332 -0
  257. package/dist/executor/worker-threads-executor.js.map +1 -0
  258. package/dist/index.d.ts +1 -0
  259. package/dist/index.d.ts.map +1 -1
  260. package/dist/index.js +4 -2
  261. package/dist/index.js.map +1 -1
  262. package/dist/renderers/latex-renderer.d.ts +60 -0
  263. package/dist/renderers/latex-renderer.d.ts.map +1 -1
  264. package/dist/renderers/latex-renderer.js +299 -0
  265. package/dist/renderers/latex-renderer.js.map +1 -1
  266. package/dist/renderers/types.d.ts +9 -0
  267. package/dist/renderers/types.d.ts.map +1 -1
  268. package/dist/renderers/types.js.map +1 -1
  269. package/dist/robustness/__tests__/perturbations.unit.test.d.ts +11 -0
  270. package/dist/robustness/__tests__/perturbations.unit.test.d.ts.map +1 -0
  271. package/dist/robustness/__tests__/perturbations.unit.test.js +284 -0
  272. package/dist/robustness/__tests__/perturbations.unit.test.js.map +1 -0
  273. package/dist/robustness/index.d.ts +5 -2
  274. package/dist/robustness/index.d.ts.map +1 -1
  275. package/dist/robustness/index.js +4 -2
  276. package/dist/robustness/index.js.map +1 -1
  277. package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts +7 -0
  278. package/dist/statistical/__tests__/mann-whitney-u.unit.test.d.ts.map +1 -0
  279. package/dist/statistical/__tests__/mann-whitney-u.unit.test.js +185 -0
  280. package/dist/statistical/__tests__/mann-whitney-u.unit.test.js.map +1 -0
  281. package/dist/types/evaluator.d.ts +449 -0
  282. package/dist/types/evaluator.d.ts.map +1 -0
  283. package/dist/types/evaluator.js +9 -0
  284. package/dist/types/evaluator.js.map +1 -0
  285. package/dist/types/result.d.ts +2 -0
  286. package/dist/types/result.d.ts.map +1 -1
  287. package/package.json +8 -1
  288. package/dist/claims/evaluator.d.ts +0 -33
  289. package/dist/claims/evaluator.d.ts.map +0 -1
  290. package/dist/claims/evaluator.js +0 -174
  291. package/dist/claims/evaluator.js.map +0 -1
  292. package/dist/robustness/analyzer.d.ts +0 -61
  293. package/dist/robustness/analyzer.d.ts.map +0 -1
  294. package/dist/robustness/analyzer.js +0 -191
  295. package/dist/robustness/analyzer.js.map +0 -1
@@ -1,174 +0,0 @@
1
- /**
2
- * Claims Evaluator
3
- *
4
- * Evaluates explicit hypotheses (claims) against aggregated results.
5
- * This enables claim-driven evaluation where experiments test specific
6
- * hypotheses rather than collect arbitrary metrics.
7
- */
8
- /**
9
- * Evaluate a single claim against aggregated results.
10
- *
11
- * @param claim - The claim to evaluate
12
- * @param aggregates - Aggregated results from the pipeline
13
- * @returns Claim evaluation with status and evidence
14
- */
15
- export const evaluateClaim = (claim, aggregates) => {
16
- // Filter aggregates by scope constraints
17
- const filteredAggregates = filterByScope(aggregates, claim);
18
- // Find primary and baseline aggregates
19
- const primaryAgg = filteredAggregates.find((a) => a.sut === claim.sut);
20
- const baselineAgg = filteredAggregates.find((a) => a.sut === claim.baseline);
21
- // Handle missing data
22
- if (!primaryAgg || !baselineAgg) {
23
- return createInconclusiveResult(claim, primaryAgg ? undefined : "Primary SUT not found", baselineAgg ? undefined : "Baseline SUT not found");
24
- }
25
- // Get metric values
26
- const primaryMetric = claim.metric;
27
- const baselineMetric = claim.metric;
28
- const primaryStats = primaryAgg.metrics[primaryMetric];
29
- const baselineStats = baselineAgg.metrics[baselineMetric];
30
- if (!(primaryMetric in primaryAgg.metrics) || !(baselineMetric in baselineAgg.metrics)) {
31
- return createInconclusiveResult(claim, "Metric not found in primary results", "Metric not found in baseline results");
32
- }
33
- // Compute evidence
34
- const primaryValue = primaryStats.mean;
35
- const baselineValue = baselineStats.mean;
36
- const delta = primaryValue - baselineValue;
37
- const ratio = baselineValue === 0 ? Infinity : primaryValue / baselineValue;
38
- // Get statistical significance if available
39
- const comparison = primaryAgg.comparisons?.[claim.baseline];
40
- const pValue = comparison?.pValue;
41
- const effectSize = comparison?.effectSize;
42
- const evidence = {
43
- primaryValue,
44
- baselineValue,
45
- delta,
46
- ratio,
47
- pValue,
48
- effectSize,
49
- n: primaryStats.n + baselineStats.n,
50
- };
51
- // Determine claim status
52
- const status = determineClaimStatus(claim, evidence);
53
- return {
54
- claim,
55
- status,
56
- evidence,
57
- };
58
- };
59
- /**
60
- * Filter aggregates by claim scope constraints.
61
- * @param aggregates
62
- * @param claim
63
- */
64
- const filterByScope = (aggregates, claim) => {
65
- if (!claim.scopeConstraints) {
66
- return aggregates;
67
- }
68
- return aggregates.filter((agg) => {
69
- for (const [key, value] of Object.entries(claim.scopeConstraints ?? {})) {
70
- if (key === "caseClass") {
71
- const allowedClasses = Array.isArray(value) ? value : [value];
72
- if (!allowedClasses.includes(agg.caseClass)) {
73
- return false;
74
- }
75
- }
76
- // Add more scope constraint checks as needed
77
- }
78
- return true;
79
- });
80
- };
81
- /**
82
- * Create an inconclusive result with reasons.
83
- * @param claim
84
- * @param reasons
85
- */
86
- const createInconclusiveResult = (claim, ...reasons) => {
87
- const validReasons = reasons.filter((r) => r !== undefined);
88
- return {
89
- claim,
90
- status: "inconclusive",
91
- evidence: {
92
- primaryValue: Number.NaN,
93
- baselineValue: Number.NaN,
94
- delta: Number.NaN,
95
- ratio: Number.NaN,
96
- },
97
- inconclusiveReason: validReasons.join("; "),
98
- };
99
- };
100
- /**
101
- * Determine claim status based on evidence.
102
- * @param claim
103
- * @param evidence
104
- */
105
- const determineClaimStatus = (claim, evidence) => {
106
- // Check for missing data
107
- if (Number.isNaN(evidence.primaryValue) || Number.isNaN(evidence.baselineValue)) {
108
- return "inconclusive";
109
- }
110
- // Check statistical significance if required
111
- const significanceLevel = claim.significanceLevel ?? 0.05;
112
- if (evidence.pValue !== undefined && evidence.pValue > significanceLevel) {
113
- return "inconclusive";
114
- }
115
- // Check minimum effect size if required
116
- if (claim.minEffectSize !== undefined &&
117
- evidence.effectSize !== undefined &&
118
- Math.abs(evidence.effectSize) < claim.minEffectSize) {
119
- return "inconclusive";
120
- }
121
- // Evaluate direction
122
- switch (claim.direction) {
123
- case "greater": {
124
- if (claim.threshold !== undefined) {
125
- return evidence.delta >= claim.threshold ? "satisfied" : "violated";
126
- }
127
- return evidence.delta > 0 ? "satisfied" : "violated";
128
- }
129
- case "less": {
130
- if (claim.threshold !== undefined) {
131
- return evidence.delta <= -claim.threshold ? "satisfied" : "violated";
132
- }
133
- return evidence.delta < 0 ? "satisfied" : "violated";
134
- }
135
- case "equal": {
136
- const epsilon = claim.threshold ?? 0.001;
137
- return Math.abs(evidence.delta) <= epsilon ? "satisfied" : "violated";
138
- }
139
- }
140
- };
141
- /**
142
- * Evaluate multiple claims against aggregated results.
143
- *
144
- * @param claims - Claims to evaluate
145
- * @param aggregates - Aggregated results
146
- * @returns Array of claim evaluations
147
- */
148
- export const evaluateClaims = (claims, aggregates) => claims.map((claim) => evaluateClaim(claim, aggregates));
149
- /**
150
- * Create a claim evaluation summary.
151
- *
152
- * @param evaluations - Completed claim evaluations
153
- * @returns Summary with counts and rates
154
- */
155
- export const createClaimSummary = (evaluations) => {
156
- const satisfied = evaluations.filter((e) => e.status === "satisfied").length;
157
- const violated = evaluations.filter((e) => e.status === "violated").length;
158
- const inconclusive = evaluations.filter((e) => e.status === "inconclusive").length;
159
- const definitive = satisfied + violated;
160
- const satisfactionRate = definitive > 0 ? satisfied / definitive : 0;
161
- return {
162
- version: "1.0.0",
163
- timestamp: new Date().toISOString(),
164
- evaluations,
165
- summary: {
166
- total: evaluations.length,
167
- satisfied,
168
- violated,
169
- inconclusive,
170
- satisfactionRate,
171
- },
172
- };
173
- };
174
- //# sourceMappingURL=evaluator.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../src/claims/evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAYH;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,CAC5B,KAAsB,EACtB,UAA8B,EACZ,EAAE;IACpB,yCAAyC;IACzC,MAAM,kBAAkB,GAAG,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;IAE5D,uCAAuC;IACvC,MAAM,UAAU,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,GAAG,CAAC,CAAC;IACvE,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,CAAC,QAAQ,CAAC,CAAC;IAE7E,sBAAsB;IACtB,IAAI,CAAC,UAAU,IAAI,CAAC,WAAW,EAAE,CAAC;QACjC,OAAO,wBAAwB,CAC9B,KAAK,EACL,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,uBAAuB,EAChD,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,wBAAwB,CAClD,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC;IACnC,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC;IACpC,MAAM,YAAY,GAAG,UAAU,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IACvD,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;IAE1D,IAAI,CAAC,CAAC,aAAa,IAAI,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,cAAc,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACxF,OAAO,wBAAwB,CAC9B,KAAK,EACL,qCAAqC,EACrC,sCAAsC,CACtC,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC;IACvC,MAAM,aAAa,GAAG,aAAa,CAAC,IAAI,CAAC;IACzC,MAAM,KAAK,GAAG,YAAY,GAAG,aAAa,CAAC;IAC3C,MAAM,KAAK,GAAG,aAAa,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,GAAG,aAAa,CAAC;IAE5E,4CAA4C;IAC5C,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC5D,MAAM,MAAM,GAAG,UAAU,EAAE,MAAM,CAAC;IAClC,MAAM,UAAU,GAAG,UAAU,EAAE,UAAU,CAAC;IAE1C,MAAM,QAAQ,GAAkB;QAC/B,YAAY;QACZ,aAAa;QACb,KAAK;QACL,KAAK;QACL,MAAM;QACN,UAAU;QACV,CAAC,EAAE,YAAY,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC;KACnC,CAAC;IAEF,yBAAyB;IACzB,MAAM,MAAM,GAAG,oBAAoB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAErD,OAAO;QACN,KAAK;QACL,MAAM;QACN,QAAQ;KACR,CAAC;AACH,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,aAAa,GAAG,CACrB,UAA8B,EAC9B,KAAsB,EACD,EAAE;IACvB,IAAI,CAAC,KAAK,CAAC,gBAAgB,EAAE,CAAC;QAC7B,OAAO,UAAU,CAAC;IACnB,CAAC;IAED,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;QAChC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;YACzE,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,cAAc,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;gBAC9D,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAsB,CAAC,EAAE,CAAC;oBAC1D,OAAO,KAAK,CAAC;gBACd,CAAC;YACF,CAAC;YACD,6CAA6C;QAC9C,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,wBAAwB,GAAG,CAChC,KAAsB,EACtB,GAAG,OAA+B,EAChB,EAAE;IACpB,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;IAEzE,OAAO;QACN,KAAK;QACL,MAAM,EAAE,cAAc;QACtB,QAAQ,EAAE;YACT,YAAY,EAAE,MAAM,CAAC,GAAG;YACxB,aAAa,EAAE,MAAM,CAAC,GAAG;YACzB,KAAK,EAAE,MAAM,CAAC,GAAG;YACjB,KAAK,EAAE,MAAM,CAAC,GAAG;SACjB;QACD,kBAAkB,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;KAC3C,CAAC;AACH,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,oBAAoB,GAAG,CAAC,KAAsB,EAAE,QAAuB,EAAe,EAAE;IAC7F,yBAAyB;IACzB,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QACjF,OAAO,cAAc,CAAC;IACvB,CAAC;IAED,6CAA6C;IAC7C,MAAM,iBAAiB,GAAG,KAAK,CAAC,iBAAiB,IAAI,IAAI,CAAC;IAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;QAC1E,OAAO,cAAc,CAAC;IACvB,CAAC;IAED,wCAAwC;IACxC,IACC,KAAK,CAAC,aAAa,KAAK,SAAS;QACjC,QAAQ,CAAC,UAAU,KAAK,SAAS;QACjC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,KAAK,CAAC,aAAa,EAClD,CAAC;QACF,OAAO,cAAc,CAAC;IACvB,CAAC;IAED,qBAAqB;IACrB,QAAQ,KAAK,CAAC,SAAS,EAAE,CAAC;QACzB,KAAK,SAAS,CAAC,CAAC,CAAC;YAChB,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;gBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACrE,CAAC;YACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;QACtD,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACb,IAAI,KAAK,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;gBACnC,OAAO,QAAQ,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;YACtE,CAAC;YACD,OAAO,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;QACtD,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACd,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC;YACzC,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC;QACvE,CAAC;IACF,CAAC;AACF,CAAC,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,cAAc,GAAG,CAC7B,MAAyB,EACzB,UAA8B,EACV,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC;AAEhF;;;;;GAKG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,WAA8B,EAA0B,EAAE;IAC5F,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;IAC7E,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC3E,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;IAEnF,MAAM,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;IACxC,MAAM,gBAAgB,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAErE,OAAO;QACN,OAAO,EAAE,OAAO;QAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,WAAW;QACX,OAAO,EAAE;YACR,KAAK,EAAE,WAAW,CAAC,MAAM;YACzB,SAAS;YACT,QAAQ;YACR,YAAY;YACZ,gBAAgB;SAChB;KACD,CAAC;AACH,CAAC,CAAC"}
@@ -1,61 +0,0 @@
1
- /**
2
- * Robustness Analyzer
3
- *
4
- * Analyzes algorithm robustness under perturbations.
5
- * Computes variance, stability, and degradation metrics.
6
- */
7
- import type { RobustnessAnalysisOutput, RobustnessMetrics } from "../types/perturbation.js";
8
- import type { EvaluationResult } from "../types/result.js";
9
- /**
10
- * Options for robustness analysis.
11
- */
12
- export interface RobustnessAnalysisOptions {
13
- /** Metrics to analyze */
14
- metrics: string[];
15
- /** Perturbations applied */
16
- perturbations: string[];
17
- /** Intensity levels tested (if applicable) */
18
- intensityLevels?: number[];
19
- /** Number of runs per perturbation level */
20
- runsPerLevel?: number;
21
- }
22
- /**
23
- * Analyze robustness of a SUT under perturbation.
24
- *
25
- * @param baseResults - Results without perturbation
26
- * @param perturbedResults - Results with perturbation, keyed by perturbation name
27
- * @param metric - Metric to analyze
28
- * @returns Robustness metrics
29
- */
30
- export declare const analyzeRobustnessForMetric: (baseResults: EvaluationResult[], perturbedResults: EvaluationResult[], metric: string) => RobustnessMetrics;
31
- /**
32
- * Analyze robustness across multiple perturbation levels.
33
- *
34
- * @param results - All results including perturbed ones
35
- * @param metric - Metric to analyze
36
- * @param intensityLevels - Perturbation intensity levels
37
- * @returns Robustness metrics with degradation curve
38
- */
39
- export declare const analyzeRobustnessWithCurve: (results: EvaluationResult[], metric: string, intensityLevels: number[]) => RobustnessMetrics;
40
- /**
41
- * Compare robustness between two SUTs.
42
- *
43
- * @param sutAResults - Results for SUT A (including perturbed)
44
- * @param sutBResults - Results for SUT B (including perturbed)
45
- * @param metric - Metric to compare
46
- * @returns Object with comparison metrics
47
- */
48
- export declare const compareRobustness: (sutAResults: EvaluationResult[], sutBResults: EvaluationResult[], metric: string) => {
49
- sutAVariance: number;
50
- sutBVariance: number;
51
- relativeRobustness: number;
52
- };
53
- /**
54
- * Create a full robustness analysis output.
55
- *
56
- * @param results - All evaluation results (base and perturbed)
57
- * @param options - Analysis options
58
- * @returns Complete robustness analysis output
59
- */
60
- export declare const createRobustnessAnalysis: (results: EvaluationResult[], options: RobustnessAnalysisOptions) => RobustnessAnalysisOutput;
61
- //# sourceMappingURL=analyzer.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../../src/robustness/analyzer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EACX,wBAAwB,EAExB,iBAAiB,EACjB,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAE3D;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACzC,yBAAyB;IACzB,OAAO,EAAE,MAAM,EAAE,CAAC;IAElB,4BAA4B;IAC5B,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB,8CAA8C;IAC9C,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAE3B,4CAA4C;IAC5C,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;GAOG;AACH,eAAO,MAAM,0BAA0B,GACtC,aAAa,gBAAgB,EAAE,EAC/B,kBAAkB,gBAAgB,EAAE,EACpC,QAAQ,MAAM,KACZ,iBAwCF,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,0BAA0B,GACtC,SAAS,gBAAgB,EAAE,EAC3B,QAAQ,MAAM,EACd,iBAAiB,MAAM,EAAE,KACvB,iBA2EF,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,iBAAiB,GAC7B,aAAa,gBAAgB,EAAE,EAC/B,aAAa,gBAAgB,EAAE,EAC/B,QAAQ,MAAM,KACZ;IACF,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,kBAAkB,EAAE,MAAM,CAAC;CAyB3B,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,wBAAwB,GACpC,SAAS,gBAAgB,EAAE,EAC3B,SAAS,yBAAyB,KAChC,wBAqDF,CAAC"}
@@ -1,191 +0,0 @@
1
- /**
2
- * Robustness Analyzer
3
- *
4
- * Analyzes algorithm robustness under perturbations.
5
- * Computes variance, stability, and degradation metrics.
6
- */
7
- import { computeSummaryStats } from "../aggregation/aggregators.js";
8
- /**
9
- * Analyze robustness of a SUT under perturbation.
10
- *
11
- * @param baseResults - Results without perturbation
12
- * @param perturbedResults - Results with perturbation, keyed by perturbation name
13
- * @param metric - Metric to analyze
14
- * @returns Robustness metrics
15
- */
16
- export const analyzeRobustnessForMetric = (baseResults, perturbedResults, metric) => {
17
- // Extract metric values
18
- const baseValues = baseResults
19
- .map((r) => r.metrics.numeric[metric])
20
- .filter((v) => !Number.isNaN(v));
21
- const perturbedValues = perturbedResults
22
- .map((r) => r.metrics.numeric[metric])
23
- .filter((v) => !Number.isNaN(v));
24
- if (baseValues.length === 0 || perturbedValues.length === 0) {
25
- return {
26
- varianceUnderPerturbation: Number.NaN,
27
- stdUnderPerturbation: Number.NaN,
28
- coefficientOfVariation: Number.NaN,
29
- };
30
- }
31
- // Compute statistics for perturbed results
32
- // Note: baseStats could be used for relative comparison in future
33
- const perturbedStats = computeSummaryStats(perturbedValues);
34
- // Variance under perturbation
35
- const varianceUnderPerturbation = perturbedStats.std === undefined ? Number.NaN : perturbedStats.std ** 2;
36
- // Standard deviation
37
- const stdUnderPerturbation = perturbedStats.std ?? Number.NaN;
38
- // Coefficient of variation (relative variance)
39
- const coefficientOfVariation = perturbedStats.mean !== 0 && perturbedStats.std !== undefined
40
- ? perturbedStats.std / Math.abs(perturbedStats.mean)
41
- : Number.NaN;
42
- return {
43
- varianceUnderPerturbation,
44
- stdUnderPerturbation,
45
- coefficientOfVariation,
46
- };
47
- };
48
- /**
49
- * Analyze robustness across multiple perturbation levels.
50
- *
51
- * @param results - All results including perturbed ones
52
- * @param metric - Metric to analyze
53
- * @param intensityLevels - Perturbation intensity levels
54
- * @returns Robustness metrics with degradation curve
55
- */
56
- export const analyzeRobustnessWithCurve = (results, metric, intensityLevels) => {
57
- // Group results by perturbation intensity
58
- const byIntensity = new Map();
59
- // Base results (no perturbation)
60
- const baseResults = results.filter((r) => r.run.config?.perturbationIntensity === undefined);
61
- byIntensity.set(0, baseResults);
62
- // Perturbed results
63
- for (const level of intensityLevels) {
64
- const levelResults = results.filter((r) => r.run.config?.perturbationIntensity === level);
65
- if (levelResults.length > 0) {
66
- byIntensity.set(level, levelResults);
67
- }
68
- }
69
- // Build degradation curve
70
- const degradationCurve = [];
71
- for (const [level, levelResults] of byIntensity) {
72
- const values = levelResults
73
- .map((r) => r.metrics.numeric[metric])
74
- .filter((v) => !Number.isNaN(v));
75
- if (values.length > 0) {
76
- const stats = computeSummaryStats(values);
77
- degradationCurve.push({
78
- perturbationLevel: level,
79
- metricValue: stats.mean,
80
- stdDev: stats.std,
81
- });
82
- }
83
- }
84
- // Sort by level
85
- degradationCurve.sort((a, b) => a.perturbationLevel - b.perturbationLevel);
86
- // Find breakpoint (significant degradation)
87
- let breakpoint;
88
- if (degradationCurve.length >= 2) {
89
- const baseValue = degradationCurve[0].metricValue;
90
- for (let index = 1; index < degradationCurve.length; index++) {
91
- const relativeChange = Math.abs((degradationCurve[index].metricValue - baseValue) / baseValue);
92
- if (relativeChange > 0.1) {
93
- // 10% degradation threshold
94
- breakpoint = degradationCurve[index].perturbationLevel;
95
- break;
96
- }
97
- }
98
- }
99
- // Compute overall variance from all perturbed results
100
- const allPerturbedValues = results
101
- .filter((r) => r.run.config?.perturbationIntensity !== undefined)
102
- .map((r) => r.metrics.numeric[metric])
103
- .filter((v) => !Number.isNaN(v));
104
- const overallStats = computeSummaryStats(allPerturbedValues);
105
- return {
106
- varianceUnderPerturbation: overallStats.std === undefined ? Number.NaN : overallStats.std ** 2,
107
- stdUnderPerturbation: overallStats.std ?? Number.NaN,
108
- coefficientOfVariation: overallStats.mean !== 0 && overallStats.std !== undefined
109
- ? overallStats.std / Math.abs(overallStats.mean)
110
- : Number.NaN,
111
- degradationCurve,
112
- breakpoint,
113
- };
114
- };
115
- /**
116
- * Compare robustness between two SUTs.
117
- *
118
- * @param sutAResults - Results for SUT A (including perturbed)
119
- * @param sutBResults - Results for SUT B (including perturbed)
120
- * @param metric - Metric to compare
121
- * @returns Object with comparison metrics
122
- */
123
- export const compareRobustness = (sutAResults, sutBResults, metric) => {
124
- const sutAPerturbed = sutAResults.filter((r) => r.run.config?.perturbationIntensity !== undefined);
125
- const sutBPerturbed = sutBResults.filter((r) => r.run.config?.perturbationIntensity !== undefined);
126
- const sutABase = sutAResults.filter((r) => r.run.config?.perturbationIntensity === undefined);
127
- const sutBBase = sutBResults.filter((r) => r.run.config?.perturbationIntensity === undefined);
128
- const sutARobustness = analyzeRobustnessForMetric(sutABase, sutAPerturbed, metric);
129
- const sutBRobustness = analyzeRobustnessForMetric(sutBBase, sutBPerturbed, metric);
130
- const relativeRobustness = sutBRobustness.varianceUnderPerturbation === 0
131
- ? Infinity
132
- : sutARobustness.varianceUnderPerturbation / sutBRobustness.varianceUnderPerturbation;
133
- return {
134
- sutAVariance: sutARobustness.varianceUnderPerturbation,
135
- sutBVariance: sutBRobustness.varianceUnderPerturbation,
136
- relativeRobustness,
137
- };
138
- };
139
- /**
140
- * Create a full robustness analysis output.
141
- *
142
- * @param results - All evaluation results (base and perturbed)
143
- * @param options - Analysis options
144
- * @returns Complete robustness analysis output
145
- */
146
- export const createRobustnessAnalysis = (results, options) => {
147
- const analysisResults = [];
148
- // Group results by SUT
149
- const bySut = new Map();
150
- for (const result of results) {
151
- const existing = bySut.get(result.run.sut) ?? [];
152
- existing.push(result);
153
- bySut.set(result.run.sut, existing);
154
- }
155
- // Analyze each SUT for each metric and perturbation
156
- for (const [sut, sutResults] of bySut) {
157
- for (const metric of options.metrics) {
158
- for (const perturbation of options.perturbations) {
159
- // Filter to this perturbation
160
- const baseResults = sutResults.filter((r) => !r.run.config?.perturbation);
161
- const perturbedResults = sutResults.filter((r) => r.run.config?.perturbation === perturbation);
162
- const robustness = analyzeRobustnessForMetric(baseResults, perturbedResults, metric);
163
- // Get baseline value
164
- const baseValues = baseResults.map((r) => r.metrics.numeric[metric]);
165
- const baselineValue = baseValues.length > 0
166
- ? baseValues.reduce((a, b) => a + b, 0) / baseValues.length
167
- : Number.NaN;
168
- analysisResults.push({
169
- sut,
170
- perturbation,
171
- metric,
172
- robustness,
173
- baselineValue,
174
- runCount: perturbedResults.length,
175
- });
176
- }
177
- }
178
- }
179
- return {
180
- version: "1.0.0",
181
- timestamp: new Date().toISOString(),
182
- results: analysisResults,
183
- config: {
184
- perturbations: options.perturbations,
185
- metrics: options.metrics,
186
- intensityLevels: options.intensityLevels,
187
- runsPerLevel: options.runsPerLevel ?? 1,
188
- },
189
- };
190
- };
191
- //# sourceMappingURL=analyzer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"analyzer.js","sourceRoot":"","sources":["../../src/robustness/analyzer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAyBpE;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG,CACzC,WAA+B,EAC/B,gBAAoC,EACpC,MAAc,EACM,EAAE;IACtB,wBAAwB;IACxB,MAAM,UAAU,GAAG,WAAW;SAC5B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;SACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAElC,MAAM,eAAe,GAAG,gBAAgB;SACtC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;SACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAElC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7D,OAAO;YACN,yBAAyB,EAAE,MAAM,CAAC,GAAG;YACrC,oBAAoB,EAAE,MAAM,CAAC,GAAG;YAChC,sBAAsB,EAAE,MAAM,CAAC,GAAG;SAClC,CAAC;IACH,CAAC;IAED,2CAA2C;IAC3C,kEAAkE;IAClE,MAAM,cAAc,GAAG,mBAAmB,CAAC,eAAe,CAAC,CAAC;IAE5D,8BAA8B;IAC9B,MAAM,yBAAyB,GAC9B,cAAc,CAAC,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,cAAc,CAAC,GAAG,IAAI,CAAC,CAAC;IAEzE,qBAAqB;IACrB,MAAM,oBAAoB,GAAG,cAAc,CAAC,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC;IAE9D,+CAA+C;IAC/C,MAAM,sBAAsB,GAC3B,cAAc,CAAC,IAAI,KAAK,CAAC,IAAI,cAAc,CAAC,GAAG,KAAK,SAAS;QAC5D,CAAC,CAAC,cAAc,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC;QACpD,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;IAEf,OAAO;QACN,yBAAyB;QACzB,oBAAoB;QACpB,sBAAsB;KACtB,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG,CACzC,OAA2B,EAC3B,MAAc,EACd,eAAyB,EACL,EAAE;IACtB,0CAA0C;IAC1C,MAAM,WAAW,GAAG,IAAI,GAAG,EAA8B,CAAC;IAE1D,iCAAiC;IACjC,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,qBAAqB,KAAK,SAAS,CAAC,CAAC;IAC7F,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAEhC,oBAAoB;IACpB,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,qBAAqB,KAAK,KAAK,CAAC,CAAC;QAC1F,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QACtC,CAAC;IACF,CAAC;IAED,0BAA0B;IAC1B,MAAM,gBAAgB,GAIhB,EAAE,CAAC;IAET,KAAK,MAAM,CAAC,KAAK,EAAE,YAAY,CAAC,IAAI,WAAW,EAAE,CAAC;QACjD,MAAM,MAAM,GAAG,YAAY;aACzB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;aACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAElC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,KAAK,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC;YAC1C,gBAAgB,CAAC,IAAI,CAAC;gBACrB,iBAAiB,EAAE,KAAK;gBACxB,WAAW,EAAE,KAAK,CAAC,IAAI;gBACvB,MAAM,EAAE,KAAK,CAAC,GAAG;aACjB,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAED,gBAAgB;IAChB,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,GAAG,CAAC,CAAC,iBAAiB,CAAC,CAAC;IAE3E,4CAA4C;IAC5C,IAAI,UAA8B,CAAC;IACnC,IAAI,gBAAgB,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;QAClD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,gBAAgB,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;YAC9D,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAC9B,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC,GAAG,SAAS,CAC7D,CAAC;YACF,IAAI,cAAc,GAAG,GAAG,EAAE,CAAC;gBAC1B,4BAA4B;gBAC5B,UAAU,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC,iBAAiB,CAAC;gBACvD,MAAM;YACP,CAAC;QACF,CAAC;IACF,CAAC;IAED,sDAAsD;IACtD,MAAM,kBAAkB,GAAG,OAAO;SAChC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,qBAAqB,KAAK,SAAS,CAAC;SAChE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;SACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAElC,MAAM,YAAY,GAAG,mBAAmB,CAAC,kBAAkB,CAAC,CAAC;IAE7D,OAAO;QACN,yBAAyB,EAAE,YAAY,CAAC,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC;QAC9F,oBAAoB,EAAE,YAAY,CAAC,GAAG,IAAI,MAAM,CAAC,GAAG;QACpD,sBAAsB,EACrB,YAAY,CAAC,IAAI,KAAK,CAAC,IAAI,YAAY,CAAC,GAAG,KAAK,SAAS;YACxD,CAAC,CAAC,YAAY,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC;YAChD,CAAC,CAAC,MAAM,CAAC,GAAG;QACd,gBAAgB;QAChB,UAAU;KACV,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAChC,WAA+B,EAC/B,WAA+B,EAC/B,MAAc,EAKb,EAAE;IACH,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,CACvC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,qBAAqB,KAAK,SAAS,CACxD,CAAC;IACF,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,CACvC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,qBAAqB,KAAK,SAAS,CACxD,CAAC;IAEF,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,qBAAqB,KAAK,SAAS,CAAC,CAAC;IAC9F,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,qBAAqB,KAAK,SAAS,CAAC,CAAC;IAE9F,MAAM,cAAc,GAAG,0BAA0B,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,CAAC,CAAC;IACnF,MAAM,cAAc,GAAG,0BAA0B,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,CAAC,CAAC;IAEnF,MAAM,kBAAkB,GACvB,cAAc,CAAC,yBAAyB,KAAK,CAAC;QAC7C,CAAC,CAAC,QAAQ;QACV,CAAC,CAAC,cAAc,CAAC,yBAAyB,GAAG,cAAc,CAAC,yBAAyB,CAAC;IAExF,OAAO;QACN,YAAY,EAAE,cAAc,CAAC,yBAAyB;QACtD,YAAY,EAAE,cAAc,CAAC,yBAAyB;QACtD,kBAAkB;KAClB,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAG,CACvC,OAA2B,EAC3B,OAAkC,EACP,EAAE;IAC7B,MAAM,eAAe,GAA+B,EAAE,CAAC;IAEvD,uBAAuB;IACvB,MAAM,KAAK,GAAG,IAAI,GAAG,EAA8B,CAAC;IACpD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACjD,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtB,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,oDAAoD;IACpD,KAAK,MAAM,CAAC,GAAG,EAAE,UAAU,CAAC,IAAI,KAAK,EAAE,CAAC;QACvC,KAAK,MAAM,MAAM,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACtC,KAAK,MAAM,YAAY,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;gBAClD,8BAA8B;gBAC9B,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;gBAC1E,MAAM,gBAAgB,GAAG,UAAU,CAAC,MAAM,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,YAAY,KAAK,YAAY,CAClD,CAAC;gBAEF,MAAM,UAAU,GAAG,0BAA0B,CAAC,WAAW,EAAE,gBAAgB,EAAE,MAAM,CAAC,CAAC;gBAErF,qBAAqB;gBACrB,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;gBACrE,MAAM,aAAa,GAClB,UAAU,CAAC,MAAM,GAAG,CAAC;oBACpB,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM;oBAC3D,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;gBAEf,eAAe,CAAC,IAAI,CAAC;oBACpB,GAAG;oBACH,YAAY;oBACZ,MAAM;oBACN,UAAU;oBACV,aAAa;oBACb,QAAQ,EAAE,gBAAgB,CAAC,MAAM;iBACjC,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO;QACN,OAAO,EAAE,OAAO;QAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,OAAO,EAAE,eAAe;QACxB,MAAM,EAAE;YACP,aAAa,EAAE,OAAO,CAAC,aAAa;YACpC,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC;SACvC;KACD,CAAC;AACH,CAAC,CAAC"}