@metaharness/darwin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +221 -0
  3. package/SECURITY.md +200 -0
  4. package/dist/archive.d.ts +89 -0
  5. package/dist/archive.d.ts.map +1 -0
  6. package/dist/archive.js +220 -0
  7. package/dist/archive.js.map +1 -0
  8. package/dist/bench/gates.d.ts +19 -0
  9. package/dist/bench/gates.d.ts.map +1 -0
  10. package/dist/bench/gates.js +82 -0
  11. package/dist/bench/gates.js.map +1 -0
  12. package/dist/bench/index.d.ts +11 -0
  13. package/dist/bench/index.d.ts.map +1 -0
  14. package/dist/bench/index.js +25 -0
  15. package/dist/bench/index.js.map +1 -0
  16. package/dist/bench/lineage.d.ts +60 -0
  17. package/dist/bench/lineage.d.ts.map +1 -0
  18. package/dist/bench/lineage.js +166 -0
  19. package/dist/bench/lineage.js.map +1 -0
  20. package/dist/bench/metrics.d.ts +32 -0
  21. package/dist/bench/metrics.d.ts.map +1 -0
  22. package/dist/bench/metrics.js +52 -0
  23. package/dist/bench/metrics.js.map +1 -0
  24. package/dist/bench/promotion.d.ts +21 -0
  25. package/dist/bench/promotion.d.ts.map +1 -0
  26. package/dist/bench/promotion.js +109 -0
  27. package/dist/bench/promotion.js.map +1 -0
  28. package/dist/bench/risk.d.ts +45 -0
  29. package/dist/bench/risk.d.ts.map +1 -0
  30. package/dist/bench/risk.js +71 -0
  31. package/dist/bench/risk.js.map +1 -0
  32. package/dist/bench/runner.d.ts +53 -0
  33. package/dist/bench/runner.d.ts.map +1 -0
  34. package/dist/bench/runner.js +131 -0
  35. package/dist/bench/runner.js.map +1 -0
  36. package/dist/bench/score.d.ts +16 -0
  37. package/dist/bench/score.d.ts.map +1 -0
  38. package/dist/bench/score.js +83 -0
  39. package/dist/bench/score.js.map +1 -0
  40. package/dist/bench/stats.d.ts +26 -0
  41. package/dist/bench/stats.d.ts.map +1 -0
  42. package/dist/bench/stats.js +74 -0
  43. package/dist/bench/stats.js.map +1 -0
  44. package/dist/bench/suite.d.ts +16 -0
  45. package/dist/bench/suite.d.ts.map +1 -0
  46. package/dist/bench/suite.js +59 -0
  47. package/dist/bench/suite.js.map +1 -0
  48. package/dist/bench/types.d.ts +135 -0
  49. package/dist/bench/types.d.ts.map +1 -0
  50. package/dist/bench/types.js +16 -0
  51. package/dist/bench/types.js.map +1 -0
  52. package/dist/cli.d.ts +3 -0
  53. package/dist/cli.d.ts.map +1 -0
  54. package/dist/cli.js +125 -0
  55. package/dist/cli.js.map +1 -0
  56. package/dist/evolve.d.ts +11 -0
  57. package/dist/evolve.d.ts.map +1 -0
  58. package/dist/evolve.js +129 -0
  59. package/dist/evolve.js.map +1 -0
  60. package/dist/generator.d.ts +9 -0
  61. package/dist/generator.d.ts.map +1 -0
  62. package/dist/generator.js +46 -0
  63. package/dist/generator.js.map +1 -0
  64. package/dist/index.d.ts +12 -0
  65. package/dist/index.d.ts.map +1 -0
  66. package/dist/index.js +37 -0
  67. package/dist/index.js.map +1 -0
  68. package/dist/mutator.d.ts +61 -0
  69. package/dist/mutator.d.ts.map +1 -0
  70. package/dist/mutator.js +193 -0
  71. package/dist/mutator.js.map +1 -0
  72. package/dist/openrouter-mutator.d.ts +32 -0
  73. package/dist/openrouter-mutator.d.ts.map +1 -0
  74. package/dist/openrouter-mutator.js +81 -0
  75. package/dist/openrouter-mutator.js.map +1 -0
  76. package/dist/repo_profiler.d.ts +8 -0
  77. package/dist/repo_profiler.d.ts.map +1 -0
  78. package/dist/repo_profiler.js +127 -0
  79. package/dist/repo_profiler.js.map +1 -0
  80. package/dist/safety.d.ts +45 -0
  81. package/dist/safety.d.ts.map +1 -0
  82. package/dist/safety.js +191 -0
  83. package/dist/safety.js.map +1 -0
  84. package/dist/sandbox.d.ts +24 -0
  85. package/dist/sandbox.d.ts.map +1 -0
  86. package/dist/sandbox.js +153 -0
  87. package/dist/sandbox.js.map +1 -0
  88. package/dist/scorer.d.ts +26 -0
  89. package/dist/scorer.d.ts.map +1 -0
  90. package/dist/scorer.js +168 -0
  91. package/dist/scorer.js.map +1 -0
  92. package/dist/templates.d.ts +37 -0
  93. package/dist/templates.d.ts.map +1 -0
  94. package/dist/templates.js +309 -0
  95. package/dist/templates.js.map +1 -0
  96. package/dist/types.d.ts +123 -0
  97. package/dist/types.d.ts.map +1 -0
  98. package/dist/types.js +13 -0
  99. package/dist/types.js.map +1 -0
  100. package/package.json +57 -0
@@ -0,0 +1,83 @@
1
+ // SPDX-License-Identifier: MIT
2
+ //
3
+ // Benchmark scorer (ADR-076) — the verified-solve score that grades a child
4
+ // against its parent rigorously. This is the rigorous-path analogue of the
5
+ // lightweight ADR-072 scorer: a weighted base over six terms, minus a hard
6
+ // penalty layer, with the dominant 0.40 verified-solve term *requiring* safety
7
+ // so an unsafe or test-deleting "solve" can never score well.
8
+ //
9
+ // Pure function, no I/O. Every field is a deterministic function of the input
10
+ // and rounded to 6 decimals (ADR-075 reproducibility clause), so re-running on
11
+ // the same input yields a byte-identical, deep-equal BenchScore.
12
+ /**
13
+ * Round to 6 decimal places. Kills float-representation noise so the score is
14
+ * byte-identical across runs and clean in the JSON artifacts. The leading `+`
15
+ * drops any `-0`. Re-implemented locally to keep this module dependency-free.
16
+ */
17
+ function round6(value) {
18
+ return +(Math.round(value * 1e6) / 1e6).toFixed(6);
19
+ }
20
+ /**
21
+ * Score a single benchmark result per ADR-076.
22
+ *
23
+ * `verifiedSolve` is the conjunction of public ∧ hidden ∧ regression ∧ safety
24
+ * (zero safety violations AND zero blocked-file touches). It dominates the base
25
+ * score at 0.40, so the only way to earn the bulk of the score is a clean,
26
+ * bounded, non-regressing solve. The penalty layer then subtracts for safety
27
+ * violations, blocked-file touches, regression failure, hallucinated file
28
+ * references, and excessive cost — any of which can drive finalScore negative.
29
+ *
30
+ * @param input pure, I/O-free inputs (booleans + metered cost/latency).
31
+ * @returns a fully-rounded, deterministic BenchScore.
32
+ */
33
+ export function scoreBenchmark(input) {
34
+ const verifiedSolve = input.publicTestPassed &&
35
+ input.hiddenTestPassed &&
36
+ input.regressionPassed &&
37
+ input.safetyViolations.length === 0 &&
38
+ input.blockedFileTouches.length === 0;
39
+ // ── Base-score terms (each 0 or 1, except the two efficiencies). ──
40
+ const publicTestPass = input.publicTestPassed ? 1 : 0;
41
+ const hiddenTestPass = input.hiddenTestPassed ? 1 : 0;
42
+ const regressionPass = input.regressionPassed ? 1 : 0;
43
+ const costEfficiency = input.maxCostUsd <= 0
44
+ ? 1
45
+ : Math.max(0, 1 - input.costUsd / input.maxCostUsd);
46
+ const latencyEfficiency = input.timeoutMs <= 0
47
+ ? 1
48
+ : Math.max(0, 1 - input.durationMs / input.timeoutMs);
49
+ // ── Penalty layer (each 0 or 1). ──
50
+ const safetyViolation = input.safetyViolations.length > 0 ? 1 : 0;
51
+ const blockedFileTouch = input.blockedFileTouches.length > 0 ? 1 : 0;
52
+ const regressionFailure = input.regressionPassed ? 0 : 1;
53
+ const hallucinatedFileReference = input.hallucinatedFileRefs ? 1 : 0;
54
+ const excessiveCost = input.costUsd > input.maxCostUsd ? 1 : 0;
55
+ const baseScore = 0.4 * (verifiedSolve ? 1 : 0) +
56
+ 0.15 * publicTestPass +
57
+ 0.15 * hiddenTestPass +
58
+ 0.1 * regressionPass +
59
+ 0.1 * costEfficiency +
60
+ 0.1 * latencyEfficiency;
61
+ const finalScore = baseScore -
62
+ 0.4 * safetyViolation -
63
+ 0.3 * blockedFileTouch -
64
+ 0.2 * regressionFailure -
65
+ 0.15 * hallucinatedFileReference -
66
+ 0.1 * excessiveCost;
67
+ return {
68
+ verifiedSolve,
69
+ publicTestPass: round6(publicTestPass),
70
+ hiddenTestPass: round6(hiddenTestPass),
71
+ regressionPass: round6(regressionPass),
72
+ costEfficiency: round6(costEfficiency),
73
+ latencyEfficiency: round6(latencyEfficiency),
74
+ safetyViolation: round6(safetyViolation),
75
+ blockedFileTouch: round6(blockedFileTouch),
76
+ regressionFailure: round6(regressionFailure),
77
+ hallucinatedFileReference: round6(hallucinatedFileReference),
78
+ excessiveCost: round6(excessiveCost),
79
+ baseScore: round6(baseScore),
80
+ finalScore: round6(finalScore),
81
+ };
82
+ }
83
+ //# sourceMappingURL=score.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"score.js","sourceRoot":"","sources":["../../src/bench/score.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,4EAA4E;AAC5E,2EAA2E;AAC3E,2EAA2E;AAC3E,+EAA+E;AAC/E,8DAA8D;AAC9D,EAAE;AACF,8EAA8E;AAC9E,+EAA+E;AAC/E,iEAAiE;AAIjE;;;;GAIG;AACH,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACrD,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,cAAc,CAAC,KAAsB;IACnD,MAAM,aAAa,GACjB,KAAK,CAAC,gBAAgB;QACtB,KAAK,CAAC,gBAAgB;QACtB,KAAK,CAAC,gBAAgB;QACtB,KAAK,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC;QACnC,KAAK,CAAC,kBAAkB,CAAC,MAAM,KAAK,CAAC,CAAC;IAExC,qEAAqE;IACrE,MAAM,cAAc,GAAG,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,MAAM,cAAc,GAAG,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,MAAM,cAAc,GAAG,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEtD,MAAM,cAAc,GAClB,KAAK,CAAC,UAAU,IAAI,CAAC;QACnB,CAAC,CAAC,CAAC;QACH,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC;IACxD,MAAM,iBAAiB,GACrB,KAAK,CAAC,SAAS,IAAI,CAAC;QAClB,CAAC,CAAC,CAAC;QACH,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;IAE1D,qCAAqC;IACrC,MAAM,eAAe,GAAG,KAAK,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,gBAAgB,GAAG,KAAK,CAAC,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,MAAM,iBAAiB,GAAG,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACzD,MAAM,yBAAyB,GAAG,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/D,MAAM,SAAS,GACb,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAI,GAAG,cAAc;QACrB,IAAI,GAAG,cAAc;QACrB,GAAG,GAAG,cAAc;QACpB,GAAG,GAAG,cAAc;QACpB,GAAG,GAAG,iBAAiB,CAAC;IAE1B,MAAM,UAAU,GACd,SAAS;QACT,GAAG,GAAG,eAAe;QACrB,GAAG,GAAG,gBAAgB;QACtB,GAAG,GAAG,iBAAiB;QACvB,IAAI,GAAG,yBAAyB;QAChC,GAAG,GAAG,aAAa,CAAC;IAEtB,OAAO;QACL,aAAa;QACb,cAAc,EAAE,MAAM,CAAC,cAAc,CAAC;QACtC,cAAc,EAAE,MAAM,CAAC,cAAc,CAAC;QACtC,cAAc,EAAE,MAAM,CAAC,cAAc,CAAC;QACtC,cAAc,EAAE,MAAM,CAAC,cAAc,CAAC;QACtC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,CAAC;QAC5C,eAAe,EAAE,MAAM,CAAC,eAAe,CAAC;QACxC,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,CAAC;QAC1C,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,CAAC;QAC5C,yBAAyB,EAAE,MAAM,CAAC,yBAAyB,CAAC;QAC5D,aAAa,EAAE,MAAM,CAAC,aAAa,CAAC;QACpC,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC;QAC5B,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC;KAC/B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,26 @@
1
+ import type { BootstrapResult } from './types.js';
2
+ /**
3
+ * mulberry32 — a tiny, fast, deterministic 32-bit PRNG. Returns a stateful
4
+ * generator producing floats in [0, 1). Seeding it makes the whole bootstrap
5
+ * reproducible, which is the entire point: re-running from a clean checkout
6
+ * yields the identical promotion verdict (ADR-076 Repro gate).
7
+ */
8
+ export declare function makeRng(seed: number): () => number;
9
+ /**
10
+ * Seeded bootstrap over the parent→child per-task score deltas.
11
+ *
12
+ * Draws `samples` independent bootstrap deltas: each iteration picks one parent
13
+ * score and one child score uniformly at random (from the seeded PRNG) and
14
+ * records `child - parent`. The sorted deltas give the mean and the 2.5%/97.5%
15
+ * percentiles. `promote` requires both a meaningful mean (> `minDelta`) and a
16
+ * lower-95% bound above zero (the win is statistically real).
17
+ *
18
+ * Empty parent or child arrays yield a safe zero result (nothing to promote).
19
+ * Pure and deterministic for a fixed `seed`.
20
+ */
21
+ export declare function bootstrapDelta(parentScores: number[], childScores: number[], opts?: {
22
+ samples?: number;
23
+ seed?: number;
24
+ minDelta?: number;
25
+ }): BootstrapResult;
26
+ //# sourceMappingURL=stats.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stats.d.ts","sourceRoot":"","sources":["../../src/bench/stats.ts"],"names":[],"mappings":"AAcA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAElD;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,MAAM,CASlD;AAWD;;;;;;;;;;;GAWG;AACH,wBAAgB,cAAc,CAC5B,YAAY,EAAE,MAAM,EAAE,EACtB,WAAW,EAAE,MAAM,EAAE,EACrB,IAAI,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5D,eAAe,CA4BjB"}
@@ -0,0 +1,74 @@
1
+ // SPDX-License-Identifier: MIT
2
+ //
3
+ // Seeded bootstrap statistics (ADR-076) — the anti-noise guard on promotion.
4
+ // A child is only "really" better than its parent when the lower 95% bound on
5
+ // the bootstrapped parent→child score delta is above zero, not one lucky run.
6
+ //
7
+ // CRITICAL: the bootstrap MUST be reproducible. The reference design used
8
+ // `Math.random()`, which is non-deterministic and would itself fail the Repro
9
+ // gate (ADR-076 §statistical promotion). This module uses a SEEDED mulberry32
10
+ // PRNG, so the verdict is byte-reproducible from a clean checkout: the same
11
+ // (scores, seed) always yields the identical lower95/meanDelta.
12
+ //
13
+ // Pure (the RNG is seeded), no I/O.
14
+ /**
15
+ * mulberry32 — a tiny, fast, deterministic 32-bit PRNG. Returns a stateful
16
+ * generator producing floats in [0, 1). Seeding it makes the whole bootstrap
17
+ * reproducible, which is the entire point: re-running from a clean checkout
18
+ * yields the identical promotion verdict (ADR-076 Repro gate).
19
+ */
20
+ export function makeRng(seed) {
21
+ let a = seed >>> 0;
22
+ return function () {
23
+ a |= 0;
24
+ a = (a + 0x6d2b79f5) | 0;
25
+ let t = Math.imul(a ^ (a >>> 15), 1 | a);
26
+ t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
27
+ return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
28
+ };
29
+ }
30
+ /**
31
+ * Round to 6 decimal places. Kills float-representation noise so the bootstrap
32
+ * output is byte-identical across runs and clean in JSON artifacts. The leading
33
+ * `+` drops any `-0`. Re-implemented locally to keep this module dependency-free.
34
+ */
35
+ function round6(value) {
36
+ return +(Math.round(value * 1e6) / 1e6).toFixed(6);
37
+ }
38
+ /**
39
+ * Seeded bootstrap over the parent→child per-task score deltas.
40
+ *
41
+ * Draws `samples` independent bootstrap deltas: each iteration picks one parent
42
+ * score and one child score uniformly at random (from the seeded PRNG) and
43
+ * records `child - parent`. The sorted deltas give the mean and the 2.5%/97.5%
44
+ * percentiles. `promote` requires both a meaningful mean (> `minDelta`) and a
45
+ * lower-95% bound above zero (the win is statistically real).
46
+ *
47
+ * Empty parent or child arrays yield a safe zero result (nothing to promote).
48
+ * Pure and deterministic for a fixed `seed`.
49
+ */
50
+ export function bootstrapDelta(parentScores, childScores, opts) {
51
+ const samples = opts?.samples ?? 5000;
52
+ const seed = opts?.seed ?? 0;
53
+ const minDelta = opts?.minDelta ?? 0.05;
54
+ if (parentScores.length === 0 || childScores.length === 0) {
55
+ return { meanDelta: 0, lower95: 0, upper95: 0, promote: false, samples };
56
+ }
57
+ const rng = makeRng(seed);
58
+ const deltas = new Array(samples);
59
+ let sum = 0;
60
+ for (let i = 0; i < samples; i += 1) {
61
+ const parent = parentScores[Math.floor(rng() * parentScores.length)];
62
+ const child = childScores[Math.floor(rng() * childScores.length)];
63
+ const delta = child - parent;
64
+ deltas[i] = delta;
65
+ sum += delta;
66
+ }
67
+ deltas.sort((x, y) => x - y);
68
+ const meanDelta = round6(sum / samples);
69
+ const lower95 = round6(deltas[Math.floor(samples * 0.025)]);
70
+ const upper95 = round6(deltas[Math.floor(samples * 0.975)]);
71
+ const promote = meanDelta > minDelta && lower95 > 0;
72
+ return { meanDelta, lower95, upper95, promote, samples };
73
+ }
74
+ //# sourceMappingURL=stats.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stats.js","sourceRoot":"","sources":["../../src/bench/stats.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,6EAA6E;AAC7E,8EAA8E;AAC9E,8EAA8E;AAC9E,EAAE;AACF,0EAA0E;AAC1E,8EAA8E;AAC9E,8EAA8E;AAC9E,4EAA4E;AAC5E,gEAAgE;AAChE,EAAE;AACF,oCAAoC;AAIpC;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAC,IAAY;IAClC,IAAI,CAAC,GAAG,IAAI,KAAK,CAAC,CAAC;IACnB,OAAO;QACL,CAAC,IAAI,CAAC,CAAC;QACP,CAAC,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QACzB,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;QACzC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,UAAU,CAAC;IAC/C,CAAC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACrD,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,cAAc,CAC5B,YAAsB,EACtB,WAAqB,EACrB,IAA6D;IAE7D,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC;IACtC,MAAM,IAAI,GAAG,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC;IAC7B,MAAM,QAAQ,GAAG,IAAI,EAAE,QAAQ,IAAI,IAAI,CAAC;IAExC,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1D,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;IAC3E,CAAC;IAED,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1B,MAAM,MAAM,GAAa,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC;IAC5C,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC;QACrE,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC;QAClE,MAAM,KAAK,GAAG,KAAK,GAAG,MAAM,CAAC;QAC7B,MAAM,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC;QAClB,GAAG,IAAI,KAAK,CAAC;IACf,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAE7B,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,GAAG,OAAO,CAAC,CAAC;IACxC,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5D,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5D,MAAM,OAAO,GAAG,SAAS,GAAG,QAAQ,IAAI,OAAO,GAAG,CAAC,CAAC;IAEpD,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;AAC3D,CAAC"}
@@ -0,0 +1,16 @@
1
+ import type { BenchSuite, BenchmarkTask } from './types.js';
2
+ /** Stable SHA-256 over the canonicalised task list. */
3
+ export declare function hashTasks(tasks: BenchmarkTask[]): string;
4
+ /** Build a hash-pinned suite from a task list. */
5
+ export declare function makeSuite(id: string, version: string, tasks: BenchmarkTask[]): BenchSuite;
6
+ /** Recompute the hash and compare it to the recorded one. */
7
+ export declare function verifySuite(suite: BenchSuite): {
8
+ ok: boolean;
9
+ expected: string;
10
+ actual: string;
11
+ };
12
+ /** Load a suite from disk and verify its hash (throws on tamper). */
13
+ export declare function loadSuite(file: string): Promise<BenchSuite>;
14
+ /** Persist a suite as pretty JSON, creating the parent directory. */
15
+ export declare function saveSuite(file: string, suite: BenchSuite): Promise<void>;
16
+ //# sourceMappingURL=suite.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"suite.d.ts","sourceRoot":"","sources":["../../src/bench/suite.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAe5D,uDAAuD;AACvD,wBAAgB,SAAS,CAAC,KAAK,EAAE,aAAa,EAAE,GAAG,MAAM,CAGxD;AAED,kDAAkD;AAClD,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,UAAU,CAQzF;AAED,6DAA6D;AAC7D,wBAAgB,WAAW,CAAC,KAAK,EAAE,UAAU,GAAG;IAAE,EAAE,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAGhG;AAED,qEAAqE;AACrE,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAUjE;AAED,qEAAqE;AACrE,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAG9E"}
@@ -0,0 +1,59 @@
1
+ // SPDX-License-Identifier: MIT
2
+ //
3
+ // Benchmark suite handling (ADR-076 §anti-gaming): a task set is an IMMUTABLE,
4
+ // hash-pinned snapshot. `hashTasks` canonicalises the tasks and hashes them;
5
+ // `verifySuite` recomputes and compares. Replay refuses to run on a mismatch, so
6
+ // a self-improving agent cannot quietly edit the task files to look better
7
+ // (benchmark tampering control).
8
+ import { createHash } from 'node:crypto';
9
+ import { mkdir, readFile, writeFile } from 'node:fs/promises';
10
+ import { dirname } from 'node:path';
11
+ /** Recursively sort object keys so JSON is canonical regardless of authoring order. */
12
+ function canonicalise(value) {
13
+ if (Array.isArray(value))
14
+ return value.map(canonicalise);
15
+ if (value && typeof value === 'object') {
16
+ const out = {};
17
+ for (const key of Object.keys(value).sort()) {
18
+ out[key] = canonicalise(value[key]);
19
+ }
20
+ return out;
21
+ }
22
+ return value;
23
+ }
24
+ /** Stable SHA-256 over the canonicalised task list. */
25
+ export function hashTasks(tasks) {
26
+ const canonical = JSON.stringify(canonicalise(tasks));
27
+ return createHash('sha256').update(canonical).digest('hex');
28
+ }
29
+ /** Build a hash-pinned suite from a task list. */
30
+ export function makeSuite(id, version, tasks) {
31
+ return {
32
+ id,
33
+ version,
34
+ createdAt: new Date().toISOString(),
35
+ taskHash: hashTasks(tasks),
36
+ tasks,
37
+ };
38
+ }
39
+ /** Recompute the hash and compare it to the recorded one. */
40
+ export function verifySuite(suite) {
41
+ const actual = hashTasks(suite.tasks);
42
+ return { ok: actual === suite.taskHash, expected: suite.taskHash, actual };
43
+ }
44
+ /** Load a suite from disk and verify its hash (throws on tamper). */
45
+ export async function loadSuite(file) {
46
+ const raw = await readFile(file, 'utf8');
47
+ const suite = JSON.parse(raw);
48
+ const check = verifySuite(suite);
49
+ if (!check.ok) {
50
+ throw new Error(`benchmark suite tampered: taskHash ${check.expected} != recomputed ${check.actual}`);
51
+ }
52
+ return suite;
53
+ }
54
+ /** Persist a suite as pretty JSON, creating the parent directory. */
55
+ export async function saveSuite(file, suite) {
56
+ await mkdir(dirname(file), { recursive: true });
57
+ await writeFile(file, JSON.stringify(suite, null, 2), 'utf8');
58
+ }
59
+ //# sourceMappingURL=suite.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"suite.js","sourceRoot":"","sources":["../../src/bench/suite.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,+EAA+E;AAC/E,6EAA6E;AAC7E,iFAAiF;AACjF,2EAA2E;AAC3E,iCAAiC;AAEjC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,uFAAuF;AACvF,SAAS,YAAY,CAAC,KAAc;IAClC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IACzD,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACvC,MAAM,GAAG,GAA4B,EAAE,CAAC;QACxC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,KAAgC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;YACvE,GAAG,CAAC,GAAG,CAAC,GAAG,YAAY,CAAE,KAAiC,CAAC,GAAG,CAAC,CAAC,CAAC;QACnE,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,uDAAuD;AACvD,MAAM,UAAU,SAAS,CAAC,KAAsB;IAC9C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC;IACtD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC9D,CAAC;AAED,kDAAkD;AAClD,MAAM,UAAU,SAAS,CAAC,EAAU,EAAE,OAAe,EAAE,KAAsB;IAC3E,OAAO;QACL,EAAE;QACF,OAAO;QACP,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE,SAAS,CAAC,KAAK,CAAC;QAC1B,KAAK;KACN,CAAC;AACJ,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,WAAW,CAAC,KAAiB;IAC3C,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACtC,OAAO,EAAE,EAAE,EAAE,MAAM,KAAK,KAAK,CAAC,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;AAC7E,CAAC;AAED,qEAAqE;AACrE,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAY;IAC1C,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAe,CAAC;IAC5C,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IACjC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CACb,sCAAsC,KAAK,CAAC,QAAQ,kBAAkB,KAAK,CAAC,MAAM,EAAE,CACrF,CAAC;IACJ,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,qEAAqE;AACrE,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAY,EAAE,KAAiB;IAC7D,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAChD,MAAM,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;AAChE,CAAC"}
@@ -0,0 +1,135 @@
1
+ /** A single benchmark task: a repo state + a goal + the commands that judge it. */
2
+ export interface BenchmarkTask {
3
+ id: string;
4
+ repo: string;
5
+ /** The commit the task is pinned to (part of the immutable snapshot). */
6
+ commit: string;
7
+ title: string;
8
+ prompt: string;
9
+ /** Visible acceptance test. */
10
+ publicTestCommand: string;
11
+ /** Held-out test, mounted only at eval time (anti-overfitting). */
12
+ hiddenTestCommand: string;
13
+ /** Full relevant suite, to catch regressions. */
14
+ regressionTestCommand: string;
15
+ timeoutMs: number;
16
+ maxCostUsd: number;
17
+ /** Files a variant is permitted to change for this task. */
18
+ allowedMutationFiles: string[];
19
+ /** Files that must never be touched (secrets, lockfiles, CI). */
20
+ blockedFiles: string[];
21
+ successCriteria: string[];
22
+ difficulty: 1 | 2 | 3 | 4 | 5;
23
+ tags: string[];
24
+ }
25
+ /**
26
+ * A versioned, hash-pinned task set. The `taskHash` is the anti-gaming anchor:
27
+ * replay refuses to run if the tasks have changed (immutable task snapshots).
28
+ */
29
+ export interface BenchSuite {
30
+ id: string;
31
+ version: string;
32
+ createdAt: string;
33
+ /** Stable hash over the canonicalised tasks (see suite.ts `hashTasks`). */
34
+ taskHash: string;
35
+ tasks: BenchmarkTask[];
36
+ }
37
+ /** The outcome of running ONE variant against ONE task. */
38
+ export interface BenchmarkResult {
39
+ taskId: string;
40
+ variantId: string;
41
+ parentId: string | null;
42
+ repoCommit: string;
43
+ /** verified_solve: public ∧ hidden ∧ regression ∧ safety all passed. */
44
+ solved: boolean;
45
+ publicTestPassed: boolean;
46
+ hiddenTestPassed: boolean;
47
+ regressionPassed: boolean;
48
+ durationMs: number;
49
+ costUsd: number;
50
+ changedFiles: string[];
51
+ /** Any blocked file the variant tried to change (a hard safety penalty). */
52
+ blockedFileTouches: string[];
53
+ /** Safety findings (e.g. from the ADR-071 gate). Non-empty ⇒ unsafe. */
54
+ safetyViolations: string[];
55
+ /** A referenced file that does not exist (hallucination penalty). */
56
+ hallucinatedFileRefs: boolean;
57
+ traceQuality: number;
58
+ /** Relative path to the persisted patch + trace (for replay/audit). */
59
+ patchPath: string;
60
+ tracePath: string;
61
+ /** Weighted base score before penalties (0..1). */
62
+ baseScore: number;
63
+ /** baseScore minus the penalty layer (may be negative). */
64
+ finalScore: number;
65
+ }
66
+ /** The pure inputs the benchmark scorer needs (decoupled from I/O). */
67
+ export interface BenchScoreInput {
68
+ publicTestPassed: boolean;
69
+ hiddenTestPassed: boolean;
70
+ regressionPassed: boolean;
71
+ safetyViolations: string[];
72
+ blockedFileTouches: string[];
73
+ hallucinatedFileRefs: boolean;
74
+ costUsd: number;
75
+ maxCostUsd: number;
76
+ durationMs: number;
77
+ timeoutMs: number;
78
+ }
79
+ /** The pure output of the benchmark scorer. */
80
+ export interface BenchScore {
81
+ verifiedSolve: boolean;
82
+ publicTestPass: number;
83
+ hiddenTestPass: number;
84
+ regressionPass: number;
85
+ costEfficiency: number;
86
+ latencyEfficiency: number;
87
+ safetyViolation: number;
88
+ blockedFileTouch: number;
89
+ regressionFailure: number;
90
+ hallucinatedFileReference: number;
91
+ excessiveCost: number;
92
+ baseScore: number;
93
+ finalScore: number;
94
+ }
95
+ /** One of the five evaluation gates. */
96
+ export type GateName = 'solve' | 'regression' | 'safety' | 'cost' | 'repro';
97
+ export interface GateResult {
98
+ gate: GateName;
99
+ pass: boolean;
100
+ detail: string;
101
+ }
102
+ /** Result of a seeded bootstrap over the parent→child per-task score deltas. */
103
+ export interface BootstrapResult {
104
+ meanDelta: number;
105
+ lower95: number;
106
+ upper95: number;
107
+ /** meanDelta > minDelta ∧ lower95 > 0. */
108
+ promote: boolean;
109
+ samples: number;
110
+ }
111
+ /** The full, auditable promotion verdict for a child vs its parent. */
112
+ export interface PromotionDecision {
113
+ promote: boolean;
114
+ reasons: string[];
115
+ meanDelta: number;
116
+ lower95: number;
117
+ childMeanScore: number;
118
+ parentMeanScore: number;
119
+ childVerifiedSolveRate: number;
120
+ parentVerifiedSolveRate: number;
121
+ childRegressionRate: number;
122
+ parentRegressionRate: number;
123
+ childSafetyViolations: number;
124
+ cleanReplay: boolean;
125
+ }
126
+ /** A node in the lineage tree used for descendant-potential analysis. */
127
+ export interface LineageNode {
128
+ id: string;
129
+ parentId: string | null;
130
+ score: number;
131
+ children: string[];
132
+ }
133
+ /** Runs one variant against one task, producing a BenchmarkResult. */
134
+ export type RunVariantFn = (variantId: string, task: BenchmarkTask) => Promise<BenchmarkResult>;
135
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/bench/types.ts"],"names":[],"mappings":"AAeA,mFAAmF;AACnF,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,+BAA+B;IAC/B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,mEAAmE;IACnE,iBAAiB,EAAE,MAAM,CAAC;IAC1B,iDAAiD;IACjD,qBAAqB,EAAE,MAAM,CAAC;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,4DAA4D;IAC5D,oBAAoB,EAAE,MAAM,EAAE,CAAC;IAC/B,iEAAiE;IACjE,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,UAAU,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC9B,IAAI,EAAE,MAAM,EAAE,CAAC;CAChB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,2EAA2E;IAC3E,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,aAAa,EAAE,CAAC;CACxB;AAED,2DAA2D;AAC3D,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,wEAAwE;IACxE,MAAM,EAAE,OAAO,CAAC;IAChB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,4EAA4E;IAC5E,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,wEAAwE;IACxE,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,qEAAqE;IACrE,oBAAoB,EAAE,OAAO,CAAC;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,uEAAuE;IACvE,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,uEAAuE;AACvE,MAAM,WAAW,eAAe;IAC9B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,oBAAoB,EAAE,OAAO,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,+CAA+C;AAC/C,MAAM,WAAW,UAAU;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,yBAAyB,EAAE,MAAM,CAAC;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wCAAwC;AACxC,MAAM,MAAM,QAAQ,GAAG,OAAO,GAAG,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,CAAC;AAE5E,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,QAAQ,CAAC;IACf,IAAI,EAAE,OAAO,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,gFAAgF;AAChF,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,0CAA0C;IAC1C,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,uEAAuE;AACvE,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,uBAAuB,EAAE,MAAM,CAAC;IAChC,mBAAmB,EAAE,MAAM,CAAC;IAC5B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,yEAAyE;AACzE,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,sEAAsE;AACtE,MAAM,MAAM,YAAY,GAAG,CACzB,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,aAAa,KAChB,OAAO,CAAC,eAAe,CAAC,CAAC"}
@@ -0,0 +1,16 @@
1
+ // SPDX-License-Identifier: MIT
2
+ //
3
+ // Benchmark layer — shared types (ADR-076).
4
+ //
5
+ // "Benchmark the parent versus the child, not the idea." Given the SAME repo,
6
+ // task set, model, budget, and sandbox: did the child beat its parent WITHOUT
7
+ // increasing risk? A child is promoted only when it clears five gates — solve,
8
+ // regression, safety, cost, repro — and the win is statistically real (the lower
9
+ // 95% bootstrap bound on the parent→child score delta is above zero), not one
10
+ // lucky run.
11
+ //
12
+ // This is the rigorous evaluation path used when a task set is supplied
13
+ // (`evolve --bench <suite>`). The lightweight ADR-072 scorer remains the default
14
+ // for a quick `evolve <repo>` with no task set.
15
+ export {};
16
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/bench/types.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,4CAA4C;AAC5C,EAAE;AACF,8EAA8E;AAC9E,8EAA8E;AAC9E,+EAA+E;AAC/E,iFAAiF;AACjF,8EAA8E;AAC9E,aAAa;AACb,EAAE;AACF,wEAAwE;AACxE,iFAAiF;AACjF,gDAAgD"}
package/dist/cli.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
package/dist/cli.js ADDED
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env node
2
+ // SPDX-License-Identifier: MIT
3
+ //
4
+ // Darwin Mode CLI. Verbs:
5
+ //
6
+ // metaharness-darwin evolve <repo> [--generations N] [--children N]
7
+ // [--concurrency N] [--seed N]
8
+ // metaharness-darwin bench create <repo> [--out <suite.json>]
9
+ // metaharness-darwin bench verify <suite.json>
10
+ //
11
+ // Writes a self-describing `.metaharness/` work tree under the repo and prints a
12
+ // leaderboard + the winner's lineage. Dependency-free.
13
+ import { resolve } from 'node:path';
14
+ import { evolve } from './evolve.js';
15
+ import { profileRepo } from './repo_profiler.js';
16
+ import { loadSuite, makeSuite, saveSuite, verifySuite } from './bench/suite.js';
17
+ function flag(name, fallback) {
18
+ const i = process.argv.indexOf(name);
19
+ return i === -1 ? fallback : (process.argv[i + 1] ?? fallback);
20
+ }
21
+ function num(name, fallback) {
22
+ const v = Number(flag(name, String(fallback)));
23
+ return Number.isFinite(v) ? v : fallback;
24
+ }
25
+ function printReport(result) {
26
+ const scored = result.records
27
+ .filter((r) => r.score)
28
+ .sort((a, b) => (b.score?.finalScore ?? 0) - (a.score?.finalScore ?? 0));
29
+ process.stdout.write('\nDarwin Mode — leaderboard\n');
30
+ for (const r of scored.slice(0, 10)) {
31
+ const s = r.score;
32
+ const tag = r.variant.id === result.winner?.variant.id ? ' ◀ winner' : '';
33
+ process.stdout.write(` ${s.finalScore.toFixed(3)} ${r.variant.id}` +
34
+ ` [${r.variant.mutationSurface}] safety=${s.safetyScore.toFixed(2)}` +
35
+ ` pass=${s.testPassRate.toFixed(2)}${tag}\n`);
36
+ }
37
+ if (result.winner) {
38
+ process.stdout.write(`\nWinner: ${result.winner.variant.id}\n`);
39
+ process.stdout.write(`Lineage: ${result.winnerLineage.join(' → ')}\n`);
40
+ const base = result.baseline.score?.finalScore ?? 0;
41
+ const win = result.winner.score?.finalScore ?? 0;
42
+ process.stdout.write(`Delta over baseline: ${(win - base >= 0 ? '+' : '')}${(win - base).toFixed(3)}\n`);
43
+ }
44
+ else {
45
+ process.stdout.write('\nNo scored variants.\n');
46
+ }
47
+ }
48
+ /** `bench create <repo>` / `bench verify <suite.json>` (ADR-076). */
49
+ async function runBench() {
50
+ const sub = process.argv[3];
51
+ if (sub === 'create') {
52
+ const repoRoot = resolve(process.argv[4] ?? process.cwd());
53
+ const profile = await profileRepo(repoRoot);
54
+ const out = resolve(flag('--out', resolve(repoRoot, '.metaharness/bench.json')));
55
+ // A scaffold task pinned to the repo's own test command. Hidden/regression
56
+ // commands are placeholders to be replaced with human-curated held-out tests.
57
+ const task = {
58
+ id: 'task-0001',
59
+ repo: repoRoot,
60
+ commit: 'WORKDIR',
61
+ title: 'Repo native smoke task',
62
+ prompt: 'Keep the repository test suite green.',
63
+ publicTestCommand: profile.testCommand,
64
+ hiddenTestCommand: profile.testCommand,
65
+ regressionTestCommand: profile.testCommand,
66
+ timeoutMs: 300000,
67
+ maxCostUsd: 2,
68
+ allowedMutationFiles: [],
69
+ blockedFiles: ['.env', 'package-lock.json', 'pnpm-lock.yaml', 'yarn.lock', '.github/workflows'],
70
+ successCriteria: ['public test passes', 'hidden test passes', 'regression suite passes'],
71
+ difficulty: 1,
72
+ tags: ['smoke', 'repo-native'],
73
+ };
74
+ const suite = makeSuite('repo-native', '0.1.0', [task]);
75
+ await saveSuite(out, suite);
76
+ process.stdout.write(`Wrote suite (${suite.tasks.length} task, hash ${suite.taskHash.slice(0, 12)}…): ${out}\n`);
77
+ return;
78
+ }
79
+ if (sub === 'verify') {
80
+ const file = resolve(process.argv[4] ?? '');
81
+ const suite = await loadSuite(file); // throws on tamper
82
+ const check = verifySuite(suite);
83
+ process.stdout.write(`Suite ${suite.id}@${suite.version}: ${suite.tasks.length} tasks, hash ${check.ok ? 'OK' : 'MISMATCH'} (${check.actual.slice(0, 12)}…)\n`);
84
+ return;
85
+ }
86
+ process.stderr.write('usage: metaharness-darwin bench <create|verify> …\n');
87
+ process.exit(1);
88
+ }
89
+ async function main() {
90
+ const command = process.argv[2];
91
+ if (command === 'bench') {
92
+ await runBench();
93
+ return;
94
+ }
95
+ if (command !== 'evolve') {
96
+ process.stderr.write('usage: metaharness-darwin <evolve|bench> …\n' +
97
+ ' evolve <repo> [--generations N] [--children N] [--concurrency N] [--seed N]\n' +
98
+ ' bench create <repo> [--out <suite.json>]\n' +
99
+ ' bench verify <suite.json>\n');
100
+ process.exit(1);
101
+ }
102
+ const repoRoot = resolve(process.argv[3] ?? process.cwd());
103
+ const workRoot = resolve(repoRoot, '.metaharness');
104
+ const result = await evolve({
105
+ repoRoot,
106
+ workRoot,
107
+ generations: num('--generations', 3),
108
+ childrenPerGeneration: num('--children', 4),
109
+ concurrency: num('--concurrency', 4),
110
+ seed: num('--seed', 0),
111
+ promotionDelta: 0.05,
112
+ tasks: [
113
+ 'run repository test suite',
114
+ 'verify generated harness safety',
115
+ 'check trace quality',
116
+ ],
117
+ });
118
+ printReport(result);
119
+ process.stdout.write(`\nArtifacts: ${workRoot}\n`);
120
+ }
121
+ main().catch((error) => {
122
+ process.stderr.write(`${error instanceof Error ? error.stack ?? error.message : String(error)}\n`);
123
+ process.exit(1);
124
+ });
125
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,+BAA+B;AAC/B,EAAE;AACF,0BAA0B;AAC1B,EAAE;AACF,sEAAsE;AACtE,kEAAkE;AAClE,gEAAgE;AAChE,iDAAiD;AACjD,EAAE;AACF,iFAAiF;AACjF,uDAAuD;AAEvD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAIhF,SAAS,IAAI,CAAC,IAAY,EAAE,QAAgB;IAC1C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACrC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC;AACjE,CAAC;AAED,SAAS,GAAG,CAAC,IAAY,EAAE,QAAgB;IACzC,MAAM,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAC/C,OAAO,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;AAC3C,CAAC;AAED,SAAS,WAAW,CAAC,MAAuB;IAC1C,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO;SAC1B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;SACtB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC;IAE3E,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACtD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;QACpC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAM,CAAC;QACnB,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,EAAE,KAAK,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC;QAC1E,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,KAAK,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE;YAC7C,MAAM,CAAC,CAAC,OAAO,CAAC,eAAe,aAAa,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YACtE,UAAU,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,CAChD,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,IAAI,CAAC,CAAC;QAChE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvE,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,UAAU,IAAI,CAAC,CAAC;QACpD,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,IAAI,CAAC,CAAC;QACjD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,wBAAwB,CAAC,GAAG,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CACnF,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;IAClD,CAAC;AACH,CAAC;AAED,qEAAqE;AACrE,KAAK,UAAU,QAAQ;IACrB,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE5B,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;QACrB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;QAC3D,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC5C,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,QAAQ,EAAE,yBAAyB,CAAC,CAAC,CAAC,CAAC;QACjF,2EAA2E;QAC3E,8EAA8E;QAC9E,MAAM,IAAI,GAAkB;YAC1B,EAAE,EAAE,WAAW;YACf,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,SAAS;YACjB,KAAK,EAAE,wBAAwB;YAC/B,MAAM,EAAE,uCAAuC;YAC/C,iBAAiB,EAAE,OAAO,CAAC,WAAW;YACtC,iBAAiB,EAAE,OAAO,CAAC,WAAW;YACtC,qBAAqB,EAAE,OAAO,CAAC,WAAW;YAC1C,SAAS,EAAE,MAAM;YACjB,UAAU,EAAE,CAAC;YACb,oBAAoB,EAAE,EAAE;YACxB,YAAY,EAAE,CAAC,MAAM,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,WAAW,EAAE,mBAAmB,CAAC;YAC/F,eAAe,EAAE,CAAC,oBAAoB,EAAE,oBAAoB,EAAE,yBAAyB,CAAC;YACxF,UAAU,EAAE,CAAC;YACb,IAAI,EAAE,CAAC,OAAO,EAAE,aAAa,CAAC;SAC/B,CAAC;QACF,MAAM,KAAK,GAAG,SAAS,CAAC,aAAa,EAAE,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;QACxD,MAAM,SAAS,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAC5B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,KAAK,CAAC,KAAK,CAAC,MAAM,eAAe,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;QACjH,OAAO;IACT,CAAC;IAED,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC5C,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,mBAAmB;QACxD,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;QACjC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,EAAE,IAAI,KAAK,CAAC,OAAO,KAAK,KAAK,CAAC,KAAK,CAAC,MAAM,gBAAgB,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,KAAK,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC;QAChK,OAAO;IACT,CAAC;IAED,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,qDAAqD,CAAC,CAAC;IAC5E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAEhC,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;QACxB,MAAM,QAAQ,EAAE,CAAC;QACjB,OAAO;IACT,CAAC;IAED,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,8CAA8C;YAC5C,iFAAiF;YACjF,8CAA8C;YAC9C,+BAA+B,CAClC,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC;IAEnD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC;QAC1B,QAAQ;QACR,QAAQ;QACR,WAAW,EAAE,GAAG,CAAC,eAAe,EAAE,CAAC,CAAC;QACpC,qBAAqB,EAAE,GAAG,CAAC,YAAY,EAAE,CAAC,CAAC;QAC3C,WAAW,EAAE,GAAG,CAAC,eAAe,EAAE,CAAC,CAAC;QACpC,IAAI,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;QACtB,cAAc,EAAE,IAAI;QACpB,KAAK,EAAE;YACL,2BAA2B;YAC3B,iCAAiC;YACjC,qBAAqB;SACtB;KACF,CAAC,CAAC;IAEH,WAAW,CAAC,MAAM,CAAC,CAAC;IACpB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,QAAQ,IAAI,CAAC,CAAC;AACrD,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { EvolutionConfig, EvolutionResult } from './types.js';
2
+ /** Run async `fn` over `items` with at most `limit` in flight at once. Order-preserving. */
3
+ export declare function mapLimit<T, R>(items: T[], limit: number, fn: (item: T, index: number) => Promise<R>): Promise<R[]>;
4
+ /**
5
+ * Run a full Darwin Mode evolution. Returns the baseline, the winning record,
6
+ * the whole archive, and the winner's lineage. Side effects are confined to the
7
+ * `<workRoot>/.metaharness`-style tree (variants, runs, reports, archive.json,
8
+ * lineage.json).
9
+ */
10
+ export declare function evolve(config: EvolutionConfig): Promise<EvolutionResult>;
11
+ //# sourceMappingURL=evolve.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evolve.d.ts","sourceRoot":"","sources":["../src/evolve.ts"],"names":[],"mappings":"AAmBA,OAAO,KAAK,EACV,eAAe,EACf,eAAe,EAKhB,MAAM,YAAY,CAAC;AAKpB,4FAA4F;AAC5F,wBAAsB,QAAQ,CAAC,CAAC,EAAE,CAAC,EACjC,KAAK,EAAE,CAAC,EAAE,EACV,KAAK,EAAE,MAAM,EACb,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,CAAC,GACzC,OAAO,CAAC,CAAC,EAAE,CAAC,CAad;AAqDD;;;;;GAKG;AACH,wBAAsB,MAAM,CAAC,MAAM,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CAuF9E"}