ppef 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/README.md +172 -0
  2. package/dist/__tests__/framework-pipeline.integration.test.d.ts +7 -0
  3. package/dist/__tests__/framework-pipeline.integration.test.d.ts.map +1 -0
  4. package/dist/__tests__/framework-pipeline.integration.test.js +413 -0
  5. package/dist/__tests__/framework-pipeline.integration.test.js.map +1 -0
  6. package/dist/__tests__/registry-executor.integration.test.d.ts +5 -0
  7. package/dist/__tests__/registry-executor.integration.test.d.ts.map +1 -0
  8. package/dist/__tests__/registry-executor.integration.test.js +349 -0
  9. package/dist/__tests__/registry-executor.integration.test.js.map +1 -0
  10. package/dist/__tests__/test-helpers.d.ts +94 -0
  11. package/dist/__tests__/test-helpers.d.ts.map +1 -0
  12. package/dist/__tests__/test-helpers.js +271 -0
  13. package/dist/__tests__/test-helpers.js.map +1 -0
  14. package/dist/aggregation/aggregators.d.ts +54 -0
  15. package/dist/aggregation/aggregators.d.ts.map +1 -0
  16. package/dist/aggregation/aggregators.js +228 -0
  17. package/dist/aggregation/aggregators.js.map +1 -0
  18. package/dist/aggregation/index.d.ts +8 -0
  19. package/dist/aggregation/index.d.ts.map +1 -0
  20. package/dist/aggregation/index.js +8 -0
  21. package/dist/aggregation/index.js.map +1 -0
  22. package/dist/aggregation/pipeline.d.ts +38 -0
  23. package/dist/aggregation/pipeline.d.ts.map +1 -0
  24. package/dist/aggregation/pipeline.js +198 -0
  25. package/dist/aggregation/pipeline.js.map +1 -0
  26. package/dist/claims/evaluator.d.ts +33 -0
  27. package/dist/claims/evaluator.d.ts.map +1 -0
  28. package/dist/claims/evaluator.js +174 -0
  29. package/dist/claims/evaluator.js.map +1 -0
  30. package/dist/claims/index.d.ts +7 -0
  31. package/dist/claims/index.d.ts.map +1 -0
  32. package/dist/claims/index.js +7 -0
  33. package/dist/claims/index.js.map +1 -0
  34. package/dist/collector/index.d.ts +8 -0
  35. package/dist/collector/index.d.ts.map +1 -0
  36. package/dist/collector/index.js +8 -0
  37. package/dist/collector/index.js.map +1 -0
  38. package/dist/collector/result-collector.d.ts +159 -0
  39. package/dist/collector/result-collector.d.ts.map +1 -0
  40. package/dist/collector/result-collector.js +213 -0
  41. package/dist/collector/result-collector.js.map +1 -0
  42. package/dist/collector/schema.d.ts +34 -0
  43. package/dist/collector/schema.d.ts.map +1 -0
  44. package/dist/collector/schema.js +145 -0
  45. package/dist/collector/schema.js.map +1 -0
  46. package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts +10 -0
  47. package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.d.ts.map +1 -0
  48. package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js +122 -0
  49. package/dist/executor/__tests__/checkpoint-hash-bug.diagnostic.test.js.map +1 -0
  50. package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts +7 -0
  51. package/dist/executor/__tests__/checkpoint-manager.integration.test.d.ts.map +1 -0
  52. package/dist/executor/__tests__/checkpoint-manager.integration.test.js +330 -0
  53. package/dist/executor/__tests__/checkpoint-manager.integration.test.js.map +1 -0
  54. package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts +7 -0
  55. package/dist/executor/__tests__/checkpoint-manager.unit.test.d.ts.map +1 -0
  56. package/dist/executor/__tests__/checkpoint-manager.unit.test.js +449 -0
  57. package/dist/executor/__tests__/checkpoint-manager.unit.test.js.map +1 -0
  58. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts +11 -0
  59. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.d.ts.map +1 -0
  60. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js +224 -0
  61. package/dist/executor/__tests__/checkpoint-merge-bug.diagnostic.test.js.map +1 -0
  62. package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts +8 -0
  63. package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.d.ts.map +1 -0
  64. package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js +164 -0
  65. package/dist/executor/__tests__/checkpoint-merge-bug.unit.test.js.map +1 -0
  66. package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts +7 -0
  67. package/dist/executor/__tests__/checkpoint-storage.unit.test.d.ts.map +1 -0
  68. package/dist/executor/__tests__/checkpoint-storage.unit.test.js +386 -0
  69. package/dist/executor/__tests__/checkpoint-storage.unit.test.js.map +1 -0
  70. package/dist/executor/__tests__/executor.unit.test.d.ts +7 -0
  71. package/dist/executor/__tests__/executor.unit.test.d.ts.map +1 -0
  72. package/dist/executor/__tests__/executor.unit.test.js +134 -0
  73. package/dist/executor/__tests__/executor.unit.test.js.map +1 -0
  74. package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts +12 -0
  75. package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.d.ts.map +1 -0
  76. package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js +196 -0
  77. package/dist/executor/__tests__/parallel-checkpoint-merge.integration.test.js.map +1 -0
  78. package/dist/executor/__tests__/parallel-executor.integration.test.d.ts +7 -0
  79. package/dist/executor/__tests__/parallel-executor.integration.test.d.ts.map +1 -0
  80. package/dist/executor/__tests__/parallel-executor.integration.test.js +249 -0
  81. package/dist/executor/__tests__/parallel-executor.integration.test.js.map +1 -0
  82. package/dist/executor/__tests__/parallel-executor.unit.test.d.ts +7 -0
  83. package/dist/executor/__tests__/parallel-executor.unit.test.d.ts.map +1 -0
  84. package/dist/executor/__tests__/parallel-executor.unit.test.js +203 -0
  85. package/dist/executor/__tests__/parallel-executor.unit.test.js.map +1 -0
  86. package/dist/executor/checkpoint-manager.d.ts +231 -0
  87. package/dist/executor/checkpoint-manager.d.ts.map +1 -0
  88. package/dist/executor/checkpoint-manager.js +395 -0
  89. package/dist/executor/checkpoint-manager.js.map +1 -0
  90. package/dist/executor/checkpoint-storage.d.ts +230 -0
  91. package/dist/executor/checkpoint-storage.d.ts.map +1 -0
  92. package/dist/executor/checkpoint-storage.js +370 -0
  93. package/dist/executor/checkpoint-storage.js.map +1 -0
  94. package/dist/executor/checkpoint-types.d.ts +48 -0
  95. package/dist/executor/checkpoint-types.d.ts.map +1 -0
  96. package/dist/executor/checkpoint-types.js +8 -0
  97. package/dist/executor/checkpoint-types.js.map +1 -0
  98. package/dist/executor/executor.d.ts +164 -0
  99. package/dist/executor/executor.d.ts.map +1 -0
  100. package/dist/executor/executor.js +408 -0
  101. package/dist/executor/executor.js.map +1 -0
  102. package/dist/executor/index.d.ts +11 -0
  103. package/dist/executor/index.d.ts.map +1 -0
  104. package/dist/executor/index.js +11 -0
  105. package/dist/executor/index.js.map +1 -0
  106. package/dist/executor/memory-monitor.d.ts +115 -0
  107. package/dist/executor/memory-monitor.d.ts.map +1 -0
  108. package/dist/executor/memory-monitor.js +168 -0
  109. package/dist/executor/memory-monitor.js.map +1 -0
  110. package/dist/executor/parallel-executor.d.ts +53 -0
  111. package/dist/executor/parallel-executor.d.ts.map +1 -0
  112. package/dist/executor/parallel-executor.js +194 -0
  113. package/dist/executor/parallel-executor.js.map +1 -0
  114. package/dist/executor/run-id.d.ts +71 -0
  115. package/dist/executor/run-id.d.ts.map +1 -0
  116. package/dist/executor/run-id.js +67 -0
  117. package/dist/executor/run-id.js.map +1 -0
  118. package/dist/executor/worker-entry.d.ts +8 -0
  119. package/dist/executor/worker-entry.d.ts.map +1 -0
  120. package/dist/executor/worker-entry.js +67 -0
  121. package/dist/executor/worker-entry.js.map +1 -0
  122. package/dist/index.cjs +11 -0
  123. package/dist/index.d.ts +15 -0
  124. package/dist/index.d.ts.map +1 -0
  125. package/dist/index.js +24 -0
  126. package/dist/index.js.map +1 -0
  127. package/dist/registry/case-registry.d.ts +113 -0
  128. package/dist/registry/case-registry.d.ts.map +1 -0
  129. package/dist/registry/case-registry.js +160 -0
  130. package/dist/registry/case-registry.js.map +1 -0
  131. package/dist/registry/index.d.ts +8 -0
  132. package/dist/registry/index.d.ts.map +1 -0
  133. package/dist/registry/index.js +8 -0
  134. package/dist/registry/index.js.map +1 -0
  135. package/dist/registry/sut-registry.d.ts +96 -0
  136. package/dist/registry/sut-registry.d.ts.map +1 -0
  137. package/dist/registry/sut-registry.js +126 -0
  138. package/dist/registry/sut-registry.js.map +1 -0
  139. package/dist/renderers/index.d.ts +10 -0
  140. package/dist/renderers/index.d.ts.map +1 -0
  141. package/dist/renderers/index.js +9 -0
  142. package/dist/renderers/index.js.map +1 -0
  143. package/dist/renderers/latex-renderer.d.ts +84 -0
  144. package/dist/renderers/latex-renderer.d.ts.map +1 -0
  145. package/dist/renderers/latex-renderer.js +208 -0
  146. package/dist/renderers/latex-renderer.js.map +1 -0
  147. package/dist/renderers/types.d.ts +106 -0
  148. package/dist/renderers/types.d.ts.map +1 -0
  149. package/dist/renderers/types.js +23 -0
  150. package/dist/renderers/types.js.map +1 -0
  151. package/dist/robustness/analyzer.d.ts +61 -0
  152. package/dist/robustness/analyzer.d.ts.map +1 -0
  153. package/dist/robustness/analyzer.js +191 -0
  154. package/dist/robustness/analyzer.js.map +1 -0
  155. package/dist/robustness/index.d.ts +8 -0
  156. package/dist/robustness/index.d.ts.map +1 -0
  157. package/dist/robustness/index.js +8 -0
  158. package/dist/robustness/index.js.map +1 -0
  159. package/dist/robustness/perturbations.d.ts +46 -0
  160. package/dist/robustness/perturbations.d.ts.map +1 -0
  161. package/dist/robustness/perturbations.js +184 -0
  162. package/dist/robustness/perturbations.js.map +1 -0
  163. package/dist/statistical/index.d.ts +8 -0
  164. package/dist/statistical/index.d.ts.map +1 -0
  165. package/dist/statistical/index.js +8 -0
  166. package/dist/statistical/index.js.map +1 -0
  167. package/dist/statistical/mann-whitney-u.d.ts +62 -0
  168. package/dist/statistical/mann-whitney-u.d.ts.map +1 -0
  169. package/dist/statistical/mann-whitney-u.js +127 -0
  170. package/dist/statistical/mann-whitney-u.js.map +1 -0
  171. package/dist/types/aggregate.d.ts +124 -0
  172. package/dist/types/aggregate.d.ts.map +1 -0
  173. package/dist/types/aggregate.js +9 -0
  174. package/dist/types/aggregate.js.map +1 -0
  175. package/dist/types/case.d.ts +105 -0
  176. package/dist/types/case.d.ts.map +1 -0
  177. package/dist/types/case.js +10 -0
  178. package/dist/types/case.js.map +1 -0
  179. package/dist/types/claims.d.ts +122 -0
  180. package/dist/types/claims.d.ts.map +1 -0
  181. package/dist/types/claims.js +14 -0
  182. package/dist/types/claims.js.map +1 -0
  183. package/dist/types/index.d.ts +12 -0
  184. package/dist/types/index.d.ts.map +1 -0
  185. package/dist/types/index.js +7 -0
  186. package/dist/types/index.js.map +1 -0
  187. package/dist/types/perturbation.d.ts +105 -0
  188. package/dist/types/perturbation.d.ts.map +1 -0
  189. package/dist/types/perturbation.js +9 -0
  190. package/dist/types/perturbation.js.map +1 -0
  191. package/dist/types/result.d.ts +150 -0
  192. package/dist/types/result.d.ts.map +1 -0
  193. package/dist/types/result.js +12 -0
  194. package/dist/types/result.js.map +1 -0
  195. package/dist/types/sut.d.ts +128 -0
  196. package/dist/types/sut.d.ts.map +1 -0
  197. package/dist/types/sut.js +12 -0
  198. package/dist/types/sut.js.map +1 -0
  199. package/package.json +283 -7
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Mann-Whitney U Test
3
+ *
4
+ * Statistical test for comparing two independent samples.
5
+ * Tests whether two populations have the same distribution.
6
+ *
7
+ * H0: Both populations have the same distribution
8
+ * H1: Populations have different distributions
9
+ *
10
+ * Returns p-value (smaller = more significant difference)
11
+ */
12
+ /**
13
+ * Standard normal cumulative distribution function.
14
+ * Uses the Abramowitz and Stegun approximation.
15
+ *
16
+ * @param z - Z-score
17
+ * @returns Cumulative probability from -infinity to z
18
+ */
19
+ export const normalCDF = (z) => {
20
+ const sign = z < 0 ? -1 : 1;
21
+ z = Math.abs(z) / Math.sqrt(2);
22
+ const a1 = 0.254_829_592;
23
+ const a2 = -0.284_496_736;
24
+ const a3 = 1.421_413_741;
25
+ const a4 = -1.453_152_027;
26
+ const a5 = 1.061_405_429;
27
+ const p = 0.327_591_1;
28
+ const t = 1 / (1 + p * z);
29
+ const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-z * z);
30
+ return 0.5 * (1 + sign * y);
31
+ };
32
+ /**
33
+ * Mann-Whitney U test for comparing two independent samples.
34
+ *
35
+ * Non-parametric test that does not assume normal distribution.
36
+ * Tests whether two populations have the same distribution.
37
+ *
38
+ * @param sampleA - First sample array
39
+ * @param sampleB - Second sample array
40
+ * @returns Object containing U statistic, p-value, and significance flag
41
+ */
42
+ export const mannWhitneyUTest = (sampleA, sampleB) => {
43
+ // Rank all values combined
44
+ const combined = [...sampleA, ...sampleB];
45
+ const sorted = [...combined].sort((a, b) => a - b);
46
+ // Assign ranks (handle ties)
47
+ const ranks = new Map();
48
+ for (const [index, value] of sorted.entries()) {
49
+ if (!ranks.has(value)) {
50
+ ranks.set(value, []);
51
+ }
52
+ const positions = ranks.get(value);
53
+ if (positions) {
54
+ positions.push(index + 1);
55
+ }
56
+ }
57
+ // Average rank for tied values
58
+ const avgRanks = new Map();
59
+ for (const [value, positions] of ranks) {
60
+ avgRanks.set(value, positions.reduce((a, b) => a + b, 0) / positions.length);
61
+ }
62
+ // Sum ranks for each sample
63
+ const rankSumA = sampleA.reduce((sum, value) => sum + (avgRanks.get(value) ?? 0), 0);
64
+ const rankSumB = sampleB.reduce((sum, value) => sum + (avgRanks.get(value) ?? 0), 0);
65
+ // Calculate U statistics
66
+ const n1 = sampleA.length;
67
+ const n2 = sampleB.length;
68
+ const u1 = rankSumA - (n1 * (n1 + 1)) / 2;
69
+ const u2 = rankSumB - (n2 * (n2 + 1)) / 2;
70
+ const u = Math.min(u1, u2);
71
+ // Calculate z-score for large samples
72
+ const meanU = (n1 * n2) / 2;
73
+ const stdU = Math.sqrt((n1 * n2 * (n1 + n2 + 1)) / 12);
74
+ const z = stdU > 0 ? (u - meanU) / stdU : 0;
75
+ // Two-tailed p-value from z-score (approximation)
76
+ const pValue = 2 * (1 - normalCDF(Math.abs(z)));
77
+ return {
78
+ u,
79
+ pValue,
80
+ significant: pValue < 0.05, // 95% confidence level
81
+ };
82
+ };
83
+ /**
84
+ * Calculate Cohen's d effect size.
85
+ *
86
+ * Measures the standardized difference between two means.
87
+ *
88
+ * Interpretation:
89
+ * - 0.2: Small effect
90
+ * - 0.5: Medium effect
91
+ * - 0.8: Large effect
92
+ *
93
+ * @param sampleA - First sample array
94
+ * @param sampleB - Second sample array
95
+ * @returns Cohen's d effect size
96
+ */
97
+ export const cohensD = (sampleA, sampleB) => {
98
+ const n1 = sampleA.length;
99
+ const n2 = sampleB.length;
100
+ const mean1 = sampleA.reduce((a, b) => a + b, 0) / n1;
101
+ const mean2 = sampleB.reduce((a, b) => a + b, 0) / n2;
102
+ const variable1 = sampleA.reduce((sum, value) => sum + (value - mean1) ** 2, 0) / (n1 - 1);
103
+ const variable2 = sampleB.reduce((sum, value) => sum + (value - mean2) ** 2, 0) / (n2 - 1);
104
+ const pooledStd = Math.sqrt(((n1 - 1) * variable1 + (n2 - 1) * variable2) / (n1 + n2 - 2));
105
+ return pooledStd > 0 ? Math.abs(mean1 - mean2) / pooledStd : 0;
106
+ };
107
+ /**
108
+ * Calculate confidence interval for a mean.
109
+ *
110
+ * Uses t-distribution approximation (1.96 for 95% CI with large samples).
111
+ *
112
+ * @param values - Sample values
113
+ * @returns Object with lower and upper bounds
114
+ */
115
+ export const confidenceInterval = (values) => {
116
+ const n = values.length;
117
+ const mean = values.reduce((a, b) => a + b, 0) / n;
118
+ const std = Math.sqrt(values.reduce((sum, value) => sum + (value - mean) ** 2, 0) / (n - 1));
119
+ const se = std / Math.sqrt(n);
120
+ const t = 1.96; // Approximation for large samples (95% CI)
121
+ const margin = t * se;
122
+ return {
123
+ lower: mean - margin,
124
+ upper: mean + margin,
125
+ };
126
+ };
127
+ //# sourceMappingURL=mann-whitney-u.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mann-whitney-u.js","sourceRoot":"","sources":["../../src/statistical/mann-whitney-u.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,CAAS,EAAU,EAAE;IAC9C,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC/B,MAAM,EAAE,GAAG,aAAa,CAAC;IACzB,MAAM,EAAE,GAAG,CAAC,aAAa,CAAC;IAC1B,MAAM,EAAE,GAAG,aAAa,CAAC;IACzB,MAAM,EAAE,GAAG,CAAC,aAAa,CAAC;IAC1B,MAAM,EAAE,GAAG,aAAa,CAAC;IACzB,MAAM,CAAC,GAAG,WAAW,CAAC;IAEtB,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1B,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACpF,OAAO,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;AAC7B,CAAC,CAAC;AAEF;;;;;;;;;GASG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAC/B,OAAiB,EACjB,OAAiB,EAKhB,EAAE;IACH,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,CAAC,GAAG,OAAO,EAAE,GAAG,OAAO,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAEnD,6BAA6B;IAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC1C,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;QAC/C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACvB,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACtB,CAAC;QACD,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACnC,IAAI,SAAS,EAAE,CAAC;YACf,SAAS,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QAC3B,CAAC;IACF,CAAC;IAED,+BAA+B;IAC/B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,KAAK,MAAM,CAAC,KAAK,EAAE,SAAS,CAAC,IAAI,KAAK,EAAE,CAAC;QACxC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;IAC9E,CAAC;IAED,4BAA4B;IAC5B,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACrF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAErF,yBAAyB;IACzB,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAC1B,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAC1B,MAAM,EAAE,GAAG,QAAQ,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,QAAQ,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IAE3B,sCAAsC;IACtC,MAAM,KAAK,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;IACvD,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAE5C,kDAAkD;IAClD,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEhD,OAAO;QACN,CAAC;QACD,MAAM;QACN,WAAW,EAAE,MAAM,GAAG,IAAI,EAAE,uBAAuB;KACnD,CAAC;AACH,CAAC,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,CAAC,OAAiB,EAAE,OAAiB,EAAU,EAAE;IACvE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAC1B,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC;IACtD,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC;IAEtD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;IAC3F,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;IAE3F,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,SAAS,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IAE3F,OAAO,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,MAAgB,EAAoC,EAAE;IACxF,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC7F,MAAM,EAAE,GAAG,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC9B,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,2CAA2C;IAE3D,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,CAAC;IACtB,OAAO;QACN,KAAK,EAAE,IAAI,GAAG,MAAM;QACpB,KAAK,EAAE,IAAI,GAAG,MAAM;KACpB,CAAC;AACH,CAAC,CAAC"}
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Aggregated Result Type Definitions
3
+ *
4
+ * Aggregated results summarise multiple evaluation runs with statistical
5
+ * measures. This is the intermediate format between raw results and
6
+ * final rendered output (LaTeX tables, etc.).
7
+ */
8
+ import type { Primitive } from "./case.js";
9
+ import type { SutRole } from "./sut.js";
10
+ /**
11
+ * Summary statistics for a numeric metric.
12
+ */
13
+ export interface SummaryStats {
14
+ /** Number of observations */
15
+ n: number;
16
+ /** Arithmetic mean */
17
+ mean: number;
18
+ /** Median (50th percentile) */
19
+ median: number;
20
+ /** Minimum value */
21
+ min: number;
22
+ /** Maximum value */
23
+ max: number;
24
+ /** Standard deviation (sample) */
25
+ std?: number;
26
+ /** 95% confidence interval [lower, upper] */
27
+ confidence95?: [number, number];
28
+ /** Sum of all values */
29
+ sum?: number;
30
+ /** 25th percentile */
31
+ p25?: number;
32
+ /** 75th percentile */
33
+ p75?: number;
34
+ }
35
+ /**
36
+ * Comparison metrics between primary and baseline SUTs.
37
+ */
38
+ export interface ComparisonMetrics {
39
+ /** Absolute deltas (primary - baseline) */
40
+ deltas: Record<string, number>;
41
+ /** Ratios (primary / baseline) */
42
+ ratios: Record<string, number>;
43
+ /** Win rate (% of cases where primary beats baseline) */
44
+ betterRate?: number;
45
+ /** Mann-Whitney U statistic */
46
+ uStatistic?: number;
47
+ /** Statistical significance (p-value) */
48
+ pValue?: number;
49
+ /** Effect size (Cohen's d) */
50
+ effectSize?: number;
51
+ }
52
+ /**
53
+ * Coverage information for the aggregation.
54
+ */
55
+ export interface CoverageMetrics {
56
+ /** Fraction of cases covered */
57
+ caseCoverage: number;
58
+ /** Metric availability (metric name -> coverage fraction) */
59
+ metricCoverage: Record<string, number>;
60
+ /** Missing case IDs */
61
+ missingCases?: string[];
62
+ }
63
+ /**
64
+ * Aggregated result for a SUT (optionally grouped by case class).
65
+ */
66
+ export interface AggregatedResult {
67
+ /** SUT identifier */
68
+ sut: string;
69
+ /** SUT role */
70
+ sutRole: SutRole;
71
+ /** Case class (if grouped) */
72
+ caseClass?: string;
73
+ /** Grouping information */
74
+ group: {
75
+ /** Number of runs in this aggregate */
76
+ runCount: number;
77
+ /** Number of unique cases */
78
+ caseCount: number;
79
+ /** Hash of configuration (for homogeneity check) */
80
+ configHash?: string;
81
+ };
82
+ /** Correctness summary */
83
+ correctness: {
84
+ /** Fraction of runs that produced valid output */
85
+ validRate: number;
86
+ /** Fraction of runs that produced any output */
87
+ producedOutputRate: number;
88
+ /** Fraction of runs matching expected (if oracle available) */
89
+ matchesExpectedRate?: number;
90
+ /** Breakdown of failure types */
91
+ failureBreakdown?: Record<string, number>;
92
+ };
93
+ /** Aggregated metrics (metric name -> summary stats) */
94
+ metrics: Record<string, SummaryStats>;
95
+ /** Comparisons with baselines (baseline SUT id -> comparison) */
96
+ comparisons?: Record<string, ComparisonMetrics>;
97
+ /** Coverage information */
98
+ coverage?: CoverageMetrics;
99
+ /** Additional metadata */
100
+ metadata?: Record<string, Primitive>;
101
+ }
102
+ /**
103
+ * Complete aggregation output.
104
+ */
105
+ export interface AggregationOutput {
106
+ /** Schema version */
107
+ version: string;
108
+ /** Generation timestamp */
109
+ timestamp: string;
110
+ /** Aggregated results */
111
+ aggregates: AggregatedResult[];
112
+ /** Global metadata */
113
+ metadata?: {
114
+ /** Total runs processed */
115
+ totalRuns: number;
116
+ /** Total unique cases */
117
+ totalCases: number;
118
+ /** SUTs included */
119
+ sutsIncluded: string[];
120
+ /** Case classes included */
121
+ caseClassesIncluded?: string[];
122
+ };
123
+ }
124
+ //# sourceMappingURL=aggregate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"aggregate.d.ts","sourceRoot":"","sources":["../../src/types/aggregate.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAExC;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,6BAA6B;IAC7B,CAAC,EAAE,MAAM,CAAC;IAEV,sBAAsB;IACtB,IAAI,EAAE,MAAM,CAAC;IAEb,+BAA+B;IAC/B,MAAM,EAAE,MAAM,CAAC;IAEf,oBAAoB;IACpB,GAAG,EAAE,MAAM,CAAC;IAEZ,oBAAoB;IACpB,GAAG,EAAE,MAAM,CAAC;IAEZ,kCAAkC;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,6CAA6C;IAC7C,YAAY,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEhC,wBAAwB;IACxB,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,sBAAsB;IACtB,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,sBAAsB;IACtB,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE/B,kCAAkC;IAClC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE/B,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,8BAA8B;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,gCAAgC;IAChC,YAAY,EAAE,MAAM,CAAC;IAErB,6DAA6D;IAC7D,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAEvC,uBAAuB;IACvB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,qBAAqB;IACrB,GAAG,EAAE,MAAM,CAAC;IAEZ,eAAe;IACf,OAAO,EAAE,OAAO,CAAC;IAEjB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,2BAA2B;IAC3B,KAAK,EAAE;QACN,uCAAuC;QACvC,QAAQ,EAAE,MAAM,CAAC;QAEjB,6BAA6B;QAC7B,SAAS,EAAE,MAAM,CAAC;QAElB,oDAAoD;QACpD,UAAU,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;IAEF,0BAA0B;IAC1B,WAAW,EAAE;QACZ,kDAAkD;QAClD,SAAS,EAAE,MAAM,CAAC;QAElB,gDAAgD;QAChD,kBAAkB,EAAE,MAAM,CAAC;QAE3B,+DAA+D;QAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAE7B,iCAAiC;QACjC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAC1C,CAAC;IAEF,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IAEtC,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAEhD,2BAA2B;IAC3B,QAAQ,CAAC,EAAE,eAAe,CAAC;IAE3B,0BAA0B;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,yBAAyB;IACzB,UAAU,EAAE,gBAAgB,EAAE,CAAC;IAE/B,sBAAsB;IACtB,QAAQ,CAAC,EAAE;QACV,2BAA2B;QAC3B,SAAS,EAAE,MAAM,CAAC;QAElB,yBAAyB;QACzB,UAAU,EAAE,MAAM,CAAC;QAEnB,oBAAoB;QACpB,YAAY,EAAE,MAAM,EAAE,CAAC;QAEvB,4BAA4B;QAC5B,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC/B,CAAC;CACF"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Aggregated Result Type Definitions
3
+ *
4
+ * Aggregated results summarise multiple evaluation runs with statistical
5
+ * measures. This is the intermediate format between raw results and
6
+ * final rendered output (LaTeX tables, etc.).
7
+ */
8
+ export {};
9
+ //# sourceMappingURL=aggregate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"aggregate.js","sourceRoot":"","sources":["../../src/types/aggregate.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Evaluation Case Type Definitions
3
+ *
4
+ * A case represents a single evaluation scenario with:
5
+ * - Deterministic inputs (graph, seeds, etc.)
6
+ * - Expected behavior or ground truth (if applicable)
7
+ * - Grouping metadata for aggregation
8
+ */
9
+ /**
10
+ * Primitive types allowed in case summaries.
11
+ */
12
+ export type Primitive = string | number | boolean | null;
13
+ /**
14
+ * Reference to an external artefact (graph file, path set, etc.).
15
+ */
16
+ export interface ArtefactReference {
17
+ /** Type of artefact */
18
+ type: "graph" | "path-set" | "subgraph" | "embedding" | "other";
19
+ /** URI or path to artefact */
20
+ uri: string;
21
+ /** Content hash for integrity verification */
22
+ hash?: string;
23
+ /** Optional metadata */
24
+ metadata?: Record<string, Primitive>;
25
+ }
26
+ /**
27
+ * Input specification for an evaluation case.
28
+ */
29
+ export interface CaseInputs {
30
+ /** Scalar summary values (e.g., { nodes: 100, seeds: ["a", "b"] }) */
31
+ summary?: Record<string, Primitive | Primitive[]>;
32
+ /** References to external artefacts */
33
+ artefacts?: ArtefactReference[];
34
+ }
35
+ /**
36
+ * A single evaluation case.
37
+ *
38
+ * The caseId should be a deterministic hash of the canonical inputs
39
+ * to ensure reproducibility across runs.
40
+ */
41
+ export interface EvaluationCase {
42
+ /** Deterministic ID (SHA-256 of canonical inputs) */
43
+ caseId: string;
44
+ /** Human-readable name */
45
+ name?: string;
46
+ /** Grouping label for aggregation (e.g., "scale-free", "bidirectional") */
47
+ caseClass?: string;
48
+ /** Input specification */
49
+ inputs: CaseInputs;
50
+ /** Optional expected output for oracle-based evaluation */
51
+ expectedOutput?: {
52
+ /** Expected summary values */
53
+ summary?: Record<string, Primitive | Primitive[]>;
54
+ /** Expected labels */
55
+ labels?: Record<string, Primitive>;
56
+ /** Expected ranking (for ranking tasks) */
57
+ ranking?: {
58
+ itemId: string;
59
+ score: number;
60
+ }[];
61
+ };
62
+ /** Version of this case definition */
63
+ version?: string;
64
+ /** Tags for filtering */
65
+ tags?: readonly string[];
66
+ }
67
+ /**
68
+ * Complete case definition with universal input factories.
69
+ *
70
+ * The framework doesn't need to know what "expander" or "seeds" mean.
71
+ * It only needs:
72
+ * 1. getInput() - Load whatever resource the algorithm needs (graph, dataset, API client, etc.)
73
+ * 2. getInputs() - Get algorithm-specific inputs from the case
74
+ *
75
+ * @template TInput - The resource type (e.g., Graph, Dataset, API client)
76
+ * @template TInputs - The algorithm inputs type
77
+ */
78
+ export interface CaseDefinition<TInput = unknown, TInputs = unknown> {
79
+ /** The case specification */
80
+ case: EvaluationCase;
81
+ /**
82
+ * Load the primary resource needed by the algorithm.
83
+ * This is called once per case and cached.
84
+ *
85
+ * Examples:
86
+ * - Expansion: Load a benchmark graph
87
+ * - Ranking: Load a graph with source/target metadata
88
+ * - ML: Load training dataset
89
+ *
90
+ * @returns Promise resolving to the resource
91
+ */
92
+ getInput(): Promise<TInput>;
93
+ /**
94
+ * Get algorithm-specific inputs for this case.
95
+ *
96
+ * Examples:
97
+ * - Expansion: { seeds: ["node1", "node2"] }
98
+ * - Ranking: { source: "node1", target: "node2", maxPaths: 10 }
99
+ * - Classification: { labels: ["cat", "dog"], threshold: 0.5 }
100
+ *
101
+ * @returns Algorithm inputs
102
+ */
103
+ getInputs(): TInputs;
104
+ }
105
+ //# sourceMappingURL=case.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"case.d.ts","sourceRoot":"","sources":["../../src/types/case.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAAC;AAEzD;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,uBAAuB;IACvB,IAAI,EAAE,OAAO,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,GAAG,OAAO,CAAC;IAEhE,8BAA8B;IAC9B,GAAG,EAAE,MAAM,CAAC;IAEZ,8CAA8C;IAC9C,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,wBAAwB;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,sEAAsE;IACtE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;IAElD,uCAAuC;IACvC,SAAS,CAAC,EAAE,iBAAiB,EAAE,CAAC;CAChC;AAED;;;;;GAKG;AACH,MAAM,WAAW,cAAc;IAC9B,qDAAqD;IACrD,MAAM,EAAE,MAAM,CAAC;IAEf,0BAA0B;IAC1B,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,0BAA0B;IAC1B,MAAM,EAAE,UAAU,CAAC;IAEnB,2DAA2D;IAC3D,cAAc,CAAC,EAAE;QAChB,8BAA8B;QAC9B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;QAElD,sBAAsB;QACtB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAEnC,2CAA2C;QAC3C,OAAO,CAAC,EAAE;YAAE,MAAM,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;KAC9C,CAAC;IAEF,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB,yBAAyB;IACzB,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CACzB;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,cAAc,CAAC,MAAM,GAAG,OAAO,EAAE,OAAO,GAAG,OAAO;IAClE,6BAA6B;IAC7B,IAAI,EAAE,cAAc,CAAC;IAErB;;;;;;;;;;OAUG;IACH,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAE5B;;;;;;;;;OASG;IACH,SAAS,IAAI,OAAO,CAAC;CACrB"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Evaluation Case Type Definitions
3
+ *
4
+ * A case represents a single evaluation scenario with:
5
+ * - Deterministic inputs (graph, seeds, etc.)
6
+ * - Expected behavior or ground truth (if applicable)
7
+ * - Grouping metadata for aggregation
8
+ */
9
+ export {};
10
+ //# sourceMappingURL=case.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"case.js","sourceRoot":"","sources":["../../src/types/case.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG"}
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Evaluation Claims Type Definitions
3
+ *
4
+ * Claims represent explicit hypotheses to be tested. Each claim specifies:
5
+ * - Which SUTs are being compared
6
+ * - Which metric is being evaluated
7
+ * - The expected relationship (greater, less, equal)
8
+ * - The scope of validity
9
+ *
10
+ * This enables claim-driven evaluation where experiments are designed
11
+ * to test specific hypotheses rather than collect arbitrary metrics.
12
+ */
13
+ import type { Primitive } from "./case.js";
14
+ /**
15
+ * Scope of claim validity.
16
+ *
17
+ * - `global`: Claim should hold across all cases
18
+ * - `caseClass`: Claim holds within specific case classes
19
+ * - `parameterRange`: Claim holds for specific parameter ranges
20
+ * - `localStructure`: Claim depends on local graph structure
21
+ */
22
+ export type ValidityScope = "global" | "caseClass" | "parameterRange" | "localStructure";
23
+ /**
24
+ * Direction of comparison.
25
+ */
26
+ export type ComparisonDirection = "greater" | "less" | "equal";
27
+ /**
28
+ * An evaluation claim (hypothesis).
29
+ */
30
+ export interface EvaluationClaim {
31
+ /** Unique identifier for this claim */
32
+ claimId: string;
33
+ /** Human-readable description */
34
+ description: string;
35
+ /** Primary SUT being evaluated */
36
+ sut: string;
37
+ /** Baseline SUT for comparison */
38
+ baseline: string;
39
+ /** Metric being compared */
40
+ metric: string;
41
+ /** Expected direction of difference */
42
+ direction: ComparisonDirection;
43
+ /** Optional threshold for the difference */
44
+ threshold?: number;
45
+ /** Scope of validity */
46
+ scope: ValidityScope;
47
+ /** Scope constraints (e.g., { caseClass: "scale-free" }) */
48
+ scopeConstraints?: Record<string, Primitive | Primitive[]>;
49
+ /** Required significance level (default: 0.05) */
50
+ significanceLevel?: number;
51
+ /** Minimum effect size (Cohen's d) */
52
+ minEffectSize?: number;
53
+ /** Tags for filtering */
54
+ tags?: readonly string[];
55
+ /** Citation/reference for the claim */
56
+ citation?: string;
57
+ }
58
+ /**
59
+ * Status of a claim evaluation.
60
+ */
61
+ export type ClaimStatus = "satisfied" | "violated" | "inconclusive";
62
+ /**
63
+ * Evidence supporting a claim evaluation.
64
+ */
65
+ export interface ClaimEvidence {
66
+ /** Primary SUT metric value */
67
+ primaryValue: number;
68
+ /** Baseline SUT metric value */
69
+ baselineValue: number;
70
+ /** Absolute delta (primary - baseline) */
71
+ delta: number;
72
+ /** Ratio (primary / baseline) */
73
+ ratio: number;
74
+ /** P-value from statistical test */
75
+ pValue?: number;
76
+ /** Effect size (Cohen's d) */
77
+ effectSize?: number;
78
+ /** Number of observations */
79
+ n?: number;
80
+ /** 95% confidence interval for delta */
81
+ deltaCI95?: [number, number];
82
+ }
83
+ /**
84
+ * Result of evaluating a single claim.
85
+ */
86
+ export interface ClaimEvaluation {
87
+ /** The claim being evaluated */
88
+ claim: EvaluationClaim;
89
+ /** Evaluation status */
90
+ status: ClaimStatus;
91
+ /** Supporting evidence */
92
+ evidence: ClaimEvidence;
93
+ /** Reason for inconclusive status (if applicable) */
94
+ inconclusiveReason?: string;
95
+ /** Additional notes */
96
+ notes?: string[];
97
+ }
98
+ /**
99
+ * Summary of all claim evaluations.
100
+ */
101
+ export interface ClaimEvaluationSummary {
102
+ /** Schema version */
103
+ version: string;
104
+ /** Generation timestamp */
105
+ timestamp: string;
106
+ /** Individual claim evaluations */
107
+ evaluations: ClaimEvaluation[];
108
+ /** Summary statistics */
109
+ summary: {
110
+ /** Total claims evaluated */
111
+ total: number;
112
+ /** Claims satisfied */
113
+ satisfied: number;
114
+ /** Claims violated */
115
+ violated: number;
116
+ /** Claims inconclusive */
117
+ inconclusive: number;
118
+ /** Satisfaction rate (satisfied / (satisfied + violated)) */
119
+ satisfactionRate: number;
120
+ };
121
+ }
122
+ //# sourceMappingURL=claims.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claims.d.ts","sourceRoot":"","sources":["../../src/types/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAE3C;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,WAAW,GAAG,gBAAgB,GAAG,gBAAgB,CAAC;AAEzF;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;AAE/D;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAC;IAEhB,iCAAiC;IACjC,WAAW,EAAE,MAAM,CAAC;IAEpB,kCAAkC;IAClC,GAAG,EAAE,MAAM,CAAC;IAEZ,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IAEjB,4BAA4B;IAC5B,MAAM,EAAE,MAAM,CAAC;IAEf,uCAAuC;IACvC,SAAS,EAAE,mBAAmB,CAAC;IAE/B,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,wBAAwB;IACxB,KAAK,EAAE,aAAa,CAAC;IAErB,4DAA4D;IAC5D,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;IAE3D,kDAAkD;IAClD,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B,sCAAsC;IACtC,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,yBAAyB;IACzB,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAEzB,uCAAuC;IACvC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,UAAU,GAAG,cAAc,CAAC;AAEpE;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;IAErB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAC;IAEtB,0CAA0C;IAC1C,KAAK,EAAE,MAAM,CAAC;IAEd,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IAEd,oCAAoC;IACpC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,8BAA8B;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,6BAA6B;IAC7B,CAAC,CAAC,EAAE,MAAM,CAAC;IAEX,wCAAwC;IACxC,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC/B,gCAAgC;IAChC,KAAK,EAAE,eAAe,CAAC;IAEvB,wBAAwB;IACxB,MAAM,EAAE,WAAW,CAAC;IAEpB,0BAA0B;IAC1B,QAAQ,EAAE,aAAa,CAAC;IAExB,qDAAqD;IACrD,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B,uBAAuB;IACvB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACtC,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;IAElB,mCAAmC;IACnC,WAAW,EAAE,eAAe,EAAE,CAAC;IAE/B,yBAAyB;IACzB,OAAO,EAAE;QACR,6BAA6B;QAC7B,KAAK,EAAE,MAAM,CAAC;QAEd,uBAAuB;QACvB,SAAS,EAAE,MAAM,CAAC;QAElB,sBAAsB;QACtB,QAAQ,EAAE,MAAM,CAAC;QAEjB,0BAA0B;QAC1B,YAAY,EAAE,MAAM,CAAC;QAErB,6DAA6D;QAC7D,gBAAgB,EAAE,MAAM,CAAC;KACzB,CAAC;CACF"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Evaluation Claims Type Definitions
3
+ *
4
+ * Claims represent explicit hypotheses to be tested. Each claim specifies:
5
+ * - Which SUTs are being compared
6
+ * - Which metric is being evaluated
7
+ * - The expected relationship (greater, less, equal)
8
+ * - The scope of validity
9
+ *
10
+ * This enables claim-driven evaluation where experiments are designed
11
+ * to test specific hypotheses rather than collect arbitrary metrics.
12
+ */
13
+ export {};
14
+ //# sourceMappingURL=claims.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claims.js","sourceRoot":"","sources":["../../src/types/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG"}
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Framework Type Definitions
3
+ *
4
+ * Re-exports all canonical types for the evaluation framework.
5
+ */
6
+ export type { SutDefinition, SutFactory, SutRegistration, SutRole } from "./sut.js";
7
+ export type { ArtefactReference, CaseDefinition, CaseInputs, EvaluationCase, Primitive, } from "./case.js";
8
+ export type { CorrectnessResult, EvaluationResult, FailureType, Provenance, RankedItem, ResultBatch, ResultMetrics, ResultOutputs, RunContext, } from "./result.js";
9
+ export type { AggregatedResult, AggregationOutput, ComparisonMetrics, CoverageMetrics, SummaryStats, } from "./aggregate.js";
10
+ export type { ClaimEvaluation, ClaimEvaluationSummary, ClaimEvidence, ClaimStatus, ComparisonDirection, EvaluationClaim, ValidityScope, } from "./claims.js";
11
+ export type { Perturbation, PerturbationConfig, PerturbationType, RobustnessAnalysisOutput, RobustnessAnalysisResult, RobustnessMetrics, } from "./perturbation.js";
12
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,eAAe,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAGpF,YAAY,EACX,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,cAAc,EACd,SAAS,GACT,MAAM,WAAW,CAAC;AAGnB,YAAY,EACX,iBAAiB,EACjB,gBAAgB,EAChB,WAAW,EACX,UAAU,EACV,UAAU,EACV,WAAW,EACX,aAAa,EACb,aAAa,EACb,UAAU,GACV,MAAM,aAAa,CAAC;AAGrB,YAAY,EACX,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,EACf,YAAY,GACZ,MAAM,gBAAgB,CAAC;AAGxB,YAAY,EACX,eAAe,EACf,sBAAsB,EACtB,aAAa,EACb,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,aAAa,GACb,MAAM,aAAa,CAAC;AAGrB,YAAY,EACX,YAAY,EACZ,kBAAkB,EAClB,gBAAgB,EAChB,wBAAwB,EACxB,wBAAwB,EACxB,iBAAiB,GACjB,MAAM,mBAAmB,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Framework Type Definitions
3
+ *
4
+ * Re-exports all canonical types for the evaluation framework.
5
+ */
6
+ export {};
7
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}