@metaharness/darwin 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -19
- package/dist/archive.d.ts +15 -0
- package/dist/archive.d.ts.map +1 -1
- package/dist/archive.js +41 -0
- package/dist/archive.js.map +1 -1
- package/dist/bench/promotion.d.ts.map +1 -1
- package/dist/bench/promotion.js +1 -0
- package/dist/bench/promotion.js.map +1 -1
- package/dist/bench/stats.d.ts +14 -0
- package/dist/bench/stats.d.ts.map +1 -1
- package/dist/bench/stats.js +38 -2
- package/dist/bench/stats.js.map +1 -1
- package/dist/bench/types.d.ts +4 -0
- package/dist/bench/types.d.ts.map +1 -1
- package/dist/clade.d.ts +26 -0
- package/dist/clade.d.ts.map +1 -0
- package/dist/clade.js +115 -0
- package/dist/clade.js.map +1 -0
- package/dist/cli.js +34 -1
- package/dist/cli.js.map +1 -1
- package/dist/curriculum.d.ts +19 -0
- package/dist/curriculum.d.ts.map +1 -0
- package/dist/curriculum.js +48 -0
- package/dist/curriculum.js.map +1 -0
- package/dist/epistasis.d.ts +40 -0
- package/dist/epistasis.d.ts.map +1 -0
- package/dist/epistasis.js +88 -0
- package/dist/epistasis.js.map +1 -0
- package/dist/evolve.d.ts +9 -1
- package/dist/evolve.d.ts.map +1 -1
- package/dist/evolve.js +314 -9
- package/dist/evolve.js.map +1 -1
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/mock-sandbox.d.ts +58 -0
- package/dist/mock-sandbox.d.ts.map +1 -0
- package/dist/mock-sandbox.js +119 -0
- package/dist/mock-sandbox.js.map +1 -0
- package/dist/mutator.d.ts +44 -2
- package/dist/mutator.d.ts.map +1 -1
- package/dist/mutator.js +100 -7
- package/dist/mutator.js.map +1 -1
- package/dist/openrouter-mutator.d.ts.map +1 -1
- package/dist/openrouter-mutator.js +5 -2
- package/dist/openrouter-mutator.js.map +1 -1
- package/dist/pareto.d.ts +7 -0
- package/dist/pareto.d.ts.map +1 -0
- package/dist/pareto.js +46 -0
- package/dist/pareto.js.map +1 -0
- package/dist/phenotype.d.ts +83 -0
- package/dist/phenotype.d.ts.map +1 -0
- package/dist/phenotype.js +189 -0
- package/dist/phenotype.js.map +1 -0
- package/dist/templates.d.ts.map +1 -1
- package/dist/templates.js +6 -1
- package/dist/templates.js.map +1 -1
- package/dist/tier2-driver.d.ts +10 -0
- package/dist/tier2-driver.d.ts.map +1 -0
- package/dist/tier2-driver.js +62 -0
- package/dist/tier2-driver.js.map +1 -0
- package/dist/tier2-sandbox.d.ts +18 -0
- package/dist/tier2-sandbox.d.ts.map +1 -0
- package/dist/tier2-sandbox.js +91 -0
- package/dist/tier2-sandbox.js.map +1 -0
- package/dist/types.d.ts +99 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +10 -7
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// Hyperbolic behavioral phenotyping (ADR-091). MAP-Elites (ADR-088) bins variants
|
|
4
|
+
// by a FLAT structural axis (which of the 7 surfaces was mutated). But real agent
|
|
5
|
+
// behaviour is HIERARCHICAL — deep recursive backtracking vs. shallow linear tool
|
|
6
|
+
// use — and hierarchies embed far more faithfully in hyperbolic space than in a
|
|
7
|
+
// flat categorical grid.
|
|
8
|
+
//
|
|
9
|
+
// We distil a variant's run traces into a behaviour feature vector, embed it in
|
|
10
|
+
// the 2-D Poincaré ball (radius ≈ hierarchical "depth/struggle", angle ≈
|
|
11
|
+
// behavioural mode), and assign a niche by hyperbolic region. The result plugs
|
|
12
|
+
// straight into `Archive.selectElites(limit, descriptorOf)` as the descriptor.
|
|
13
|
+
//
|
|
14
|
+
// Dependency-free and closed-form. NOTE on RuVector: `ruvector-wasm@2.1.x`
|
|
15
|
+
// exposes a `VectorDB`/`HNSW` with euclidean/cosine/dotproduct/manhattan metrics
|
|
16
|
+
// only — it has NO hyperbolic metric — so the Poincaré geometry is computed here
|
|
17
|
+
// natively. A RuVector HNSW index can later back nearest-niche lookups at scale,
|
|
18
|
+
// but the hyperbolic phenotyping itself does not depend on it.
|
|
19
|
+
/** Squash a non-negative magnitude into [0, 1) — a smooth, bounded saturation. */
|
|
20
|
+
function squash(x) {
|
|
21
|
+
return Math.tanh(Math.max(0, x));
|
|
22
|
+
}
|
|
23
|
+
/** Fraction of lines that are exact repeats — a proxy for loops / backtracking. */
|
|
24
|
+
function repetitionFraction(text) {
|
|
25
|
+
const lines = text.split('\n').map((l) => l.trim()).filter(Boolean);
|
|
26
|
+
if (lines.length <= 1)
|
|
27
|
+
return 0;
|
|
28
|
+
const seen = new Set();
|
|
29
|
+
let repeats = 0;
|
|
30
|
+
for (const l of lines) {
|
|
31
|
+
if (seen.has(l))
|
|
32
|
+
repeats += 1;
|
|
33
|
+
else
|
|
34
|
+
seen.add(l);
|
|
35
|
+
}
|
|
36
|
+
return repeats / lines.length;
|
|
37
|
+
}
|
|
38
|
+
export function behaviorFeatures(traces) {
|
|
39
|
+
const n = traces.length;
|
|
40
|
+
if (n === 0) {
|
|
41
|
+
return { failRate: 0, timeoutRate: 0, blockRate: 0, verbosity: 0, repetition: 0, durationSpread: 0 };
|
|
42
|
+
}
|
|
43
|
+
let fails = 0, timeouts = 0, blocks = 0, charsSum = 0, repSum = 0;
|
|
44
|
+
const durs = [];
|
|
45
|
+
for (const t of traces) {
|
|
46
|
+
const failed = t.exitCode !== 0 || t.timedOut || t.blockedActions.length > 0;
|
|
47
|
+
if (failed)
|
|
48
|
+
fails += 1;
|
|
49
|
+
if (t.timedOut)
|
|
50
|
+
timeouts += 1;
|
|
51
|
+
if (t.blockedActions.length > 0)
|
|
52
|
+
blocks += 1;
|
|
53
|
+
const out = `${t.stdout}\n${t.stderr}`;
|
|
54
|
+
charsSum += out.length;
|
|
55
|
+
repSum += repetitionFraction(out);
|
|
56
|
+
durs.push(t.durationMs);
|
|
57
|
+
}
|
|
58
|
+
const mean = durs.reduce((s, d) => s + d, 0) / n;
|
|
59
|
+
const variance = durs.reduce((s, d) => s + (d - mean) ** 2, 0) / n;
|
|
60
|
+
const durationSpread = mean > 0 ? squash(Math.sqrt(variance) / mean) : 0;
|
|
61
|
+
return {
|
|
62
|
+
failRate: fails / n,
|
|
63
|
+
timeoutRate: timeouts / n,
|
|
64
|
+
blockRate: blocks / n,
|
|
65
|
+
verbosity: squash(charsSum / n / 2000),
|
|
66
|
+
repetition: Math.min(1, repSum / n),
|
|
67
|
+
durationSpread,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Embed behaviour features into the 2-D Poincaré ball (the open unit disk).
|
|
72
|
+
* RADIUS encodes hierarchical "depth/struggle" (failure, looping, timeouts) —
|
|
73
|
+
* deep recursive strugglers sit near the boundary where hyperbolic distance
|
|
74
|
+
* explodes; clean shallow agents sit near the origin. ANGLE encodes behavioural
|
|
75
|
+
* MODE (verbosity vs. safety-pressure vs. effort irregularity). The point always
|
|
76
|
+
* satisfies ‖p‖ < 1.
|
|
77
|
+
*/
|
|
78
|
+
export function poincareEmbed(f) {
|
|
79
|
+
const depth = squash(1.6 * f.failRate + 1.2 * f.repetition + 1.0 * f.timeoutRate);
|
|
80
|
+
const radius = Math.min(0.999, depth); // keep strictly inside the open ball
|
|
81
|
+
// Angle from the behavioural-mode features, wrapped into [0, 2π).
|
|
82
|
+
const modeRaw = 0.6 * f.verbosity + 1.0 * f.blockRate + 0.4 * f.durationSpread;
|
|
83
|
+
const theta = (modeRaw % 1) * 2 * Math.PI;
|
|
84
|
+
return [radius * Math.cos(theta), radius * Math.sin(theta)];
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Poincaré-ball distance between two points in the open unit ball:
|
|
88
|
+
*
|
|
89
|
+
* d(u,v) = acosh( 1 + 2 · ‖u−v‖² / ((1−‖u‖²)(1−‖v‖²)) )
|
|
90
|
+
*
|
|
91
|
+
* Returns 0 for identical points, is symmetric, and grows without bound as
|
|
92
|
+
* either point approaches the boundary. Guards the denominator for points placed
|
|
93
|
+
* exactly on the boundary (treated as just inside).
|
|
94
|
+
*/
|
|
95
|
+
export function poincareDistance(u, v) {
|
|
96
|
+
let diff2 = 0, nu2 = 0, nv2 = 0;
|
|
97
|
+
for (let i = 0; i < u.length; i++) {
|
|
98
|
+
diff2 += (u[i] - v[i]) ** 2;
|
|
99
|
+
nu2 += u[i] ** 2;
|
|
100
|
+
nv2 += v[i] ** 2;
|
|
101
|
+
}
|
|
102
|
+
const denom = Math.max(1e-12, (1 - Math.min(1, nu2)) * (1 - Math.min(1, nv2)));
|
|
103
|
+
return Math.acosh(1 + (2 * diff2) / denom);
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Assign a discrete behavioural niche by hyperbolic region: a radial shell
|
|
107
|
+
* (hierarchy depth) crossed with an angular sector (behavioural mode). Same
|
|
108
|
+
* behaviour ⇒ same niche; deterministic. Plugs into `selectElites` as the
|
|
109
|
+
* descriptor: `selectElites(k, v => behavioralNiche(tracesById.get(v.id) ?? []))`.
|
|
110
|
+
*/
|
|
111
|
+
/** Poincaré polar niche of a disk point: radial shell (depth) × angular sector. */
|
|
112
|
+
export function poincareNicheOf(x, y, shells = 4, sectors = 6) {
|
|
113
|
+
const r = Math.sqrt(x * x + y * y);
|
|
114
|
+
const shell = Math.min(shells - 1, Math.floor(r * shells));
|
|
115
|
+
let theta = Math.atan2(y, x);
|
|
116
|
+
if (theta < 0)
|
|
117
|
+
theta += 2 * Math.PI;
|
|
118
|
+
const sector = Math.min(sectors - 1, Math.floor((theta / (2 * Math.PI)) * sectors));
|
|
119
|
+
return `h${shell}_s${sector}`;
|
|
120
|
+
}
|
|
121
|
+
/** Flat Cartesian niche of a disk point: a uniform `bins × bins` square grid. */
|
|
122
|
+
export function euclideanNicheOf(x, y, bins = 5) {
|
|
123
|
+
const cell = (v) => Math.min(bins - 1, Math.max(0, Math.floor(((v + 1) / 2) * bins)));
|
|
124
|
+
return `e${cell(x)}_${cell(y)}`;
|
|
125
|
+
}
|
|
126
|
+
export function behavioralNiche(traces, shells = 4, sectors = 6) {
|
|
127
|
+
const [x, y] = poincareEmbed(behaviorFeatures(traces));
|
|
128
|
+
return poincareNicheOf(x, y, shells, sectors);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* FLAT Euclidean niche over the SAME embedded behaviour point — a square
|
|
132
|
+
* `bins × bins` grid on the disk. This is the ablation comparator for ADR-095:
|
|
133
|
+
* it bins the identical `poincareEmbed` coordinate with a uniform Cartesian grid
|
|
134
|
+
* instead of the polar/hyperbolic radial-shell grid, so a controlled run can
|
|
135
|
+
* measure what the hyperbolic geometry actually buys. Deterministic.
|
|
136
|
+
*/
|
|
137
|
+
export function euclideanNiche(traces, bins = 5) {
|
|
138
|
+
const [x, y] = poincareEmbed(behaviorFeatures(traces));
|
|
139
|
+
const cell = (v) => Math.min(bins - 1, Math.max(0, Math.floor(((v + 1) / 2) * bins)));
|
|
140
|
+
return `e${cell(x)}_${cell(y)}`;
|
|
141
|
+
}
|
|
142
|
+
// ── Active niche steering (ADR-092) — navigate the behavioural manifold ───────
|
|
143
|
+
//
|
|
144
|
+
// Diversity selection (ADR-091) MAINTAINS spread. Steering actively DRIVES the
|
|
145
|
+
// population toward under-explored regions of the Poincaré ball: find a density
|
|
146
|
+
// hole (preferring the high-radius "complex / deep-thinking" frontier), then seed
|
|
147
|
+
// the next generation from the survivors nearest that hole, so their offspring
|
|
148
|
+
// land in or near it. The whole mechanism is closed-form + deterministic.
|
|
149
|
+
/** Geometric centroid of niche cell `(shell, sector)` in the Poincaré disk. */
|
|
150
|
+
export function nicheCentroid(shell, sector, shells = 4, sectors = 6) {
|
|
151
|
+
const r = (shell + 0.5) / shells;
|
|
152
|
+
const theta = ((sector + 0.5) / sectors) * 2 * Math.PI;
|
|
153
|
+
return [r * Math.cos(theta), r * Math.sin(theta)];
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Find an under-explored target niche: scan shells from the OUTSIDE in (prefer
|
|
157
|
+
* the high-radius complexity frontier, per open-endedness) and return the first
|
|
158
|
+
* unoccupied cell's id + centroid. Returns `null` when every niche is occupied.
|
|
159
|
+
*/
|
|
160
|
+
export function underExploredTarget(occupied, shells = 4, sectors = 6) {
|
|
161
|
+
for (let shell = shells - 1; shell >= 0; shell--) {
|
|
162
|
+
for (let sector = 0; sector < sectors; sector++) {
|
|
163
|
+
const niche = `h${shell}_s${sector}`;
|
|
164
|
+
if (!occupied.has(niche))
|
|
165
|
+
return { niche, centroid: nicheCentroid(shell, sector, shells, sectors) };
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Rank candidates by Poincaré distance to `target` (ascending) and return the
|
|
172
|
+
* nearest `limit` ids — the survivors whose offspring are most likely to reach
|
|
173
|
+
* the under-explored region. Ties break by the candidate array order (the caller
|
|
174
|
+
* supplies a deterministic order, e.g. archive insertion).
|
|
175
|
+
*/
|
|
176
|
+
export function nearestToTarget(candidates, target, limit) {
|
|
177
|
+
if (limit <= 0)
|
|
178
|
+
return [];
|
|
179
|
+
return candidates
|
|
180
|
+
.map((c, i) => ({ id: c.id, d: poincareDistance(c.embed, target), i }))
|
|
181
|
+
.sort((a, b) => a.d - b.d || a.i - b.i)
|
|
182
|
+
.slice(0, limit)
|
|
183
|
+
.map((c) => c.id);
|
|
184
|
+
}
|
|
185
|
+
/** Convenience: the Poincaré embedding of a variant straight from its traces. */
|
|
186
|
+
export function embedTraces(traces) {
|
|
187
|
+
return poincareEmbed(behaviorFeatures(traces));
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=phenotype.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phenotype.js","sourceRoot":"","sources":["../src/phenotype.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,kFAAkF;AAClF,kFAAkF;AAClF,kFAAkF;AAClF,gFAAgF;AAChF,yBAAyB;AACzB,EAAE;AACF,gFAAgF;AAChF,yEAAyE;AACzE,+EAA+E;AAC/E,+EAA+E;AAC/E,EAAE;AACF,2EAA2E;AAC3E,iFAAiF;AACjF,iFAAiF;AACjF,iFAAiF;AACjF,+DAA+D;AAI/D,kFAAkF;AAClF,SAAS,MAAM,CAAC,CAAS;IACvB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACnC,CAAC;AAED,mFAAmF;AACnF,SAAS,kBAAkB,CAAC,IAAY;IACtC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACpE,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC,CAAC;;YACzB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;IACD,OAAO,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;AAChC,CAAC;AAsBD,MAAM,UAAU,gBAAgB,CAAC,MAAkB;IACjD,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACxB,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACZ,OAAO,EAAE,QAAQ,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,CAAC;IACvG,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,EAAE,QAAQ,GAAG,CAAC,EAAE,MAAM,GAAG,CAAC,EAAE,QAAQ,GAAG,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC;IAClE,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,MAAM,MAAM,GAAG,CAAC,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC;QAC7E,IAAI,MAAM;YAAE,KAAK,IAAI,CAAC,CAAC;QACvB,IAAI,CAAC,CAAC,QAAQ;YAAE,QAAQ,IAAI,CAAC,CAAC;QAC9B,IAAI,CAAC,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC;YAAE,MAAM,IAAI,CAAC,CAAC;QAC7C,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QACvC,QAAQ,IAAI,GAAG,CAAC,MAAM,CAAC;QACvB,MAAM,IAAI,kBAAkB,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IACjD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IACnE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACzE,OAAO;QACL,QAAQ,EAAE,KAAK,GAAG,CAAC;QACnB,WAAW,EAAE,QAAQ,GAAG,CAAC;QACzB,SAAS,EAAE,MAAM,GAAG,CAAC;QACrB,SAAS,EAAE,MAAM,CAAC,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC;QACtC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC;QACnC,cAAc;KACf,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,aAAa,CAAC,CAAmB;IAC/C,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,GAAG,GAAG,GAAG,CAAC,CAAC,UAAU,GAAG,GAAG,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC;IAClF,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,qCAAqC;IAC5E,kEAAkE;IAClE,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,CAAC,SAAS,GAAG,GAAG,GAAG,CAAC,CAAC,SAAS,GAAG,GAAG,GAAG,CAAC,CAAC,cAAc,CAAC;IAC/E,MAAM,KAAK,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;IAC1C,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;AAC9D,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAAoB,EAAE,CAAoB;IACzE,IAAI,KAAK,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC;IAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC5B,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACjB,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IAC/E,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC,CAAC;AAC7C,CAAC;AAED;;;;;GAKG;AACH,mFAAmF;AACnF,MAAM,UAAU,eAAe,CAAC,CAAS,EAAE,CAAS,EAAE,MAAM,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC;IAC3E,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IACnC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC;IAC3D,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7B,IAAI,KAAK,GAAG,CAAC;QAAE,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;IACpC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;IACpF,OAAO,IAAI,KAAK,KAAK,MAAM,EAAE,CAAC;AAChC,CAAC;AAED,iFAAiF;AACjF,MAAM,UAAU,gBAAgB,CAAC,CAAS,EAAE,CAAS,EAAE,IAAI,GAAG,CAAC;IAC7D,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAC9F,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;AAClC,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,MAAkB,EAAE,MAAM,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC;IACzE,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC;IACvD,OAAO,eAAe,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;AAChD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,MAAkB,EAAE,IAAI,GAAG,CAAC;IACzD,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC;IACvD,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAC9F,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;AAClC,CAAC;AAED,iFAAiF;AACjF,EAAE;AACF,+EAA+E;AAC/E,gFAAgF;AAChF,kFAAkF;AAClF,+EAA+E;AAC/E,0EAA0E;AAE1E,+EAA+E;AAC/E,MAAM,UAAU,aAAa,CAAC,KAAa,EAAE,MAAc,EAAE,MAAM,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC;IAClF,MAAM,CAAC,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,MAAM,CAAC;IACjC,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;IACvD,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;AACpD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CACjC,QAA6B,EAC7B,MAAM,GAAG,CAAC,EACV,OAAO,GAAG,CAAC;IAEX,KAAK,IAAI,KAAK,GAAG,MAAM,GAAG,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;QACjD,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,KAAK,GAAG,IAAI,KAAK,KAAK,MAAM,EAAE,CAAC;YACrC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;QACtG,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAC7B,UAA2E,EAC3E,MAAiC,EACjC,KAAa;IAEb,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IAC1B,OAAO,UAAU;SACd,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,gBAAgB,CAAC,CAAC,CAAC,KAAK,EAAE,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;SACtE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACtC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;SACf,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACtB,CAAC;AAED,iFAAiF;AACjF,MAAM,UAAU,WAAW,CAAC,MAAkB;IAC5C,OAAO,aAAa,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC;AACjD,CAAC"}
|
package/dist/templates.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"templates.d.ts","sourceRoot":"","sources":["../src/templates.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAE9C;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,WAAW,GAAG,MAAM,CAkC5D;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,
|
|
1
|
+
{"version":3,"file":"templates.d.ts","sourceRoot":"","sources":["../src/templates.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAE9C;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,WAAW,GAAG,MAAM,CAkC5D;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,CA2C/C;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAqDzC;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,CAkD5C;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,IAAI,MAAM,CA+B3C;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,CA0B7C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,CA6B5C"}
|
package/dist/templates.js
CHANGED
|
@@ -68,9 +68,14 @@ export interface ContextItem {
|
|
|
68
68
|
score: number;
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
-
/**
|
|
71
|
+
/**
|
|
72
|
+
* Split a string into lowercased alphanumeric terms of length >= 2. camelCase
|
|
73
|
+
* boundaries are split first, so a bug report naming "paretoFront" matches the file
|
|
74
|
+
* "pareto.ts" (ADR-127 finding: camelCase symbols did not tokenise to path stems).
|
|
75
|
+
*/
|
|
72
76
|
function terms(text: string): string[] {
|
|
73
77
|
return text
|
|
78
|
+
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
74
79
|
.toLowerCase()
|
|
75
80
|
.split(/[^a-z0-9]+/)
|
|
76
81
|
.filter((t) => t.length >= 2);
|
package/dist/templates.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"templates.js","sourceRoot":"","sources":["../src/templates.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,gFAAgF;AAChF,sEAAsE;AACtE,EAAE;AACF,4EAA4E;AAC5E,iEAAiE;AACjE,4EAA4E;AAC5E,6EAA6E;AAC7E,8EAA8E;AAC9E,6EAA6E;AAC7E,0BAA0B;AAI1B;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,OAAoB;IAClD,OAAO;;;;;;;;;;;;;;;6BAeoB,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC;;;;;;;;;;;;;;;;;CAiB3D,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO
|
|
1
|
+
{"version":3,"file":"templates.js","sourceRoot":"","sources":["../src/templates.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,gFAAgF;AAChF,sEAAsE;AACtE,EAAE;AACF,4EAA4E;AAC5E,iEAAiE;AACjE,4EAA4E;AAC5E,6EAA6E;AAC7E,8EAA8E;AAC9E,6EAA6E;AAC7E,0BAA0B;AAI1B;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,OAAoB;IAClD,OAAO;;;;;;;;;;;;;;;6BAeoB,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC;;;;;;;;;;;;;;;;;CAiB3D,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAyCR,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmDR,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,mBAAmB;IACjC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDR,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6BR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;CAwBR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2BR,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
interface AgentTask {
|
|
2
|
+
prompt: string;
|
|
3
|
+
files: string[];
|
|
4
|
+
buggyFile: string;
|
|
5
|
+
classification: 'transient' | 'repairable' | 'unknown';
|
|
6
|
+
failAttempts: number;
|
|
7
|
+
backoffMs: number;
|
|
8
|
+
}
|
|
9
|
+
declare function main(): Promise<void>;
|
|
10
|
+
//# sourceMappingURL=tier2-driver.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tier2-driver.d.ts","sourceRoot":"","sources":["../src/tier2-driver.ts"],"names":[],"mappings":"AAeA,UAAU,SAAS;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,WAAW,GAAG,YAAY,GAAG,SAAS,CAAC;IACvD,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,iBAAe,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAkDnC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
//
|
|
4
|
+
// Tier-2 agent driver (ADR-106). Runs in a CHILD process launched with
|
|
5
|
+
// `node --experimental-strip-types`, so it can import the variant's real surface
|
|
6
|
+
// `.ts` modules and execute their ACTUAL logic — not regex-extracted parameters
|
|
7
|
+
// (Tier 1, ADR-102). It drives a deterministic agent loop calling the surfaces'
|
|
8
|
+
// real exports and prints a JSON trace to stdout.
|
|
9
|
+
//
|
|
10
|
+
// argv[2] = variant directory (contains the 7 surface .ts files)
|
|
11
|
+
// argv[3] = task JSON: { prompt, files, buggyFile, classification, failAttempts, backoffMs }
|
|
12
|
+
//
|
|
13
|
+
// Standalone: imports ONLY the variant surfaces + node builtins (no package
|
|
14
|
+
// imports), so the child needs nothing from dist except this file. Deterministic
|
|
15
|
+
// (durationMs derived from the loop, not wall-clock) ⇒ reproducible.
|
|
16
|
+
async function main() {
|
|
17
|
+
const variantDir = process.argv[2];
|
|
18
|
+
const task = JSON.parse(process.argv[3]);
|
|
19
|
+
const log = [];
|
|
20
|
+
// Import the variant's REAL surface modules (types stripped by the flag).
|
|
21
|
+
const planner = await import(`${variantDir}/planner.ts`);
|
|
22
|
+
const ctxb = await import(`${variantDir}/context_builder.ts`);
|
|
23
|
+
const retry = await import(`${variantDir}/retry_policy.ts`);
|
|
24
|
+
const tools = await import(`${variantDir}/tool_policy.ts`);
|
|
25
|
+
// The agent must produce a plan that ends in verification.
|
|
26
|
+
const plan = (planner.createPlan?.(task.prompt) ?? []);
|
|
27
|
+
const planOk = plan.length > 0 && plan.some((s) => s.kind === 'verify');
|
|
28
|
+
log.push(`plan: ${plan.length} steps, verify=${planOk}`);
|
|
29
|
+
// Tool ordering is exercised (its output shapes the log / behaviour).
|
|
30
|
+
const order = (tools.orderKinds?.(['lint', 'test', 'build']) ?? []);
|
|
31
|
+
log.push(`tools: ${order.join('>')}`);
|
|
32
|
+
const maxA = (retry.maxAttempts ?? 3);
|
|
33
|
+
let solved = false;
|
|
34
|
+
let attemptsUsed = 0;
|
|
35
|
+
let ctxLen = 0;
|
|
36
|
+
for (let attempt = 0;; attempt++) {
|
|
37
|
+
attemptsUsed = attempt + 1;
|
|
38
|
+
// REAL contextBuilder: ranks files by overlap and slices to its window. The
|
|
39
|
+
// bug is "located" only if the buggy file survives into the returned window.
|
|
40
|
+
const ctx = (ctxb.buildContext?.(task.prompt, task.files) ?? []);
|
|
41
|
+
ctxLen = ctx.length;
|
|
42
|
+
const located = ctx.some((c) => c.path === task.buggyFile);
|
|
43
|
+
log.push(`attempt ${attempt}: ctx=${ctx.length} located=${located}`);
|
|
44
|
+
if (planOk && located && attempt >= task.failAttempts) {
|
|
45
|
+
solved = true;
|
|
46
|
+
log.push('verify: PASS');
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
// REAL retryPolicy decides persistence given the failure classification.
|
|
50
|
+
const d = (retry.decideRetry?.(attempt, task.classification) ?? { retry: false, reason: 'no decideRetry' });
|
|
51
|
+
if (!d.retry || attempt + 1 >= maxA) {
|
|
52
|
+
log.push(`stop: ${d.reason}`);
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
const durationMs = attemptsUsed * task.backoffMs + ctxLen;
|
|
57
|
+
process.stdout.write(JSON.stringify({ solved, attemptsUsed, durationMs, log: log.join('\n') }));
|
|
58
|
+
}
|
|
59
|
+
main().catch((e) => {
|
|
60
|
+
process.stdout.write(JSON.stringify({ solved: false, attemptsUsed: 0, durationMs: 0, log: `ERR ${e?.message ?? e}` }));
|
|
61
|
+
});
|
|
62
|
+
//# sourceMappingURL=tier2-driver.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tier2-driver.js","sourceRoot":"","sources":["../src/tier2-driver.ts"],"names":[],"mappings":";AAAA,+BAA+B;AAC/B,EAAE;AACF,uEAAuE;AACvE,iFAAiF;AACjF,gFAAgF;AAChF,gFAAgF;AAChF,kDAAkD;AAClD,EAAE;AACF,mEAAmE;AACnE,+FAA+F;AAC/F,EAAE;AACF,4EAA4E;AAC5E,iFAAiF;AACjF,qEAAqE;AAWrE,KAAK,UAAU,IAAI;IACjB,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,IAAI,GAAc,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,MAAM,GAAG,GAAa,EAAE,CAAC;IAEzB,0EAA0E;IAC1E,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,GAAG,UAAU,aAAa,CAAC,CAAC;IACzD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,GAAG,UAAU,qBAAqB,CAAC,CAAC;IAC9D,MAAM,KAAK,GAAG,MAAM,MAAM,CAAC,GAAG,UAAU,kBAAkB,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,MAAM,MAAM,CAAC,GAAG,UAAU,iBAAiB,CAAC,CAAC;IAE3D,2DAA2D;IAC3D,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAA4B,CAAC;IAClF,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;IACxE,GAAG,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,MAAM,kBAAkB,MAAM,EAAE,CAAC,CAAC;IAEzD,sEAAsE;IACtE,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAa,CAAC;IAChF,GAAG,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEtC,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,CAAW,CAAC;IAChD,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,IAAI,OAAO,GAAG,CAAC,GAAI,OAAO,EAAE,EAAE,CAAC;QAClC,YAAY,GAAG,OAAO,GAAG,CAAC,CAAC;QAC3B,4EAA4E;QAC5E,6EAA6E;QAC7E,MAAM,GAAG,GAAG,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAA4B,CAAC;QAC5F,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;QACpB,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,SAAS,CAAC,CAAC;QAC3D,GAAG,CAAC,IAAI,CAAC,WAAW,OAAO,SAAS,GAAG,CAAC,MAAM,YAAY,OAAO,EAAE,CAAC,CAAC;QACrE,IAAI,MAAM,IAAI,OAAO,IAAI,OAAO,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtD,MAAM,GAAG,IAAI,CAAC;YACd,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YACzB,MAAM;QACR,CAAC;QACD,yEAAyE;QACzE,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAGzG,CAAC;QACF,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;YACpC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YAC9B,MAAM;QACR,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,YAAY,GAAG,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC;IAC1D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;AAClG,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;IACjB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,GAAG,EAAE,OAAQ,CAAW,EAAE,OAAO,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;AACpI,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { HarnessVariant, RunTrace } from './types.js';
|
|
2
|
+
/** A Tier-2 agent task: locate `buggyFile` among `files` and persist past `failAttempts`. */
|
|
3
|
+
export interface AgentTask {
|
|
4
|
+
id: string;
|
|
5
|
+
prompt: string;
|
|
6
|
+
files: string[];
|
|
7
|
+
buggyFile: string;
|
|
8
|
+
classification: 'transient' | 'repairable' | 'unknown';
|
|
9
|
+
failAttempts: number;
|
|
10
|
+
backoffMs: number;
|
|
11
|
+
difficulty: 1 | 2 | 3 | 4 | 5;
|
|
12
|
+
}
|
|
13
|
+
export declare const DEFAULT_AGENT_TASKS: readonly AgentTask[];
|
|
14
|
+
/** Run ONE agent task against a variant by executing its real surface code. */
|
|
15
|
+
export declare function runVariantTaskAgent(variant: HarnessVariant, task: AgentTask, timeoutMs?: number): Promise<RunTrace>;
|
|
16
|
+
/** Run a variant against the agent suite (defaults to DEFAULT_AGENT_TASKS). */
|
|
17
|
+
export declare function runVariantTasksAgent(variant: HarnessVariant, tasks?: readonly AgentTask[], timeoutMs?: number): Promise<RunTrace[]>;
|
|
18
|
+
//# sourceMappingURL=tier2-sandbox.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tier2-sandbox.d.ts","sourceRoot":"","sources":["../src/tier2-sandbox.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAiB3D,6FAA6F;AAC7F,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,WAAW,GAAG,YAAY,GAAG,SAAS,CAAC;IACvD,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;CAC/B;AAmBD,eAAO,MAAM,mBAAmB,EAAE,SAAS,SAAS,EAInD,CAAC;AASF,+EAA+E;AAC/E,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,cAAc,EACvB,IAAI,EAAE,SAAS,EACf,SAAS,SAAS,GACjB,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,+EAA+E;AAC/E,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,cAAc,EACvB,KAAK,GAAE,SAAS,SAAS,EAAwB,EACjD,SAAS,SAAS,GACjB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAIrB"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// Tier-2 agent sandbox (ADR-106). Executes a variant's REAL surface code by
|
|
4
|
+
// spawning `tier2-driver.js` in a child `node --experimental-strip-types`
|
|
5
|
+
// process (so the child can import the variant's `.ts` surfaces). Shell-free
|
|
6
|
+
// (`execFile`, argv split — no command injection), env-scrubbed (only PATH +
|
|
7
|
+
// identifiers leak), and timeout-bounded — the same safety posture as the real
|
|
8
|
+
// sandbox (ADR-071). The gate (`inspectVariant`) has already cleared the variant
|
|
9
|
+
// before any execution.
|
|
10
|
+
//
|
|
11
|
+
// Requires Node ≥ 22 (`--experimental-strip-types`). On older Node or any child
|
|
12
|
+
// error, the variant gets a clean "unsolved" trace rather than crashing the loop.
|
|
13
|
+
import { execFile } from 'node:child_process';
|
|
14
|
+
import { existsSync } from 'node:fs';
|
|
15
|
+
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import { promisify } from 'node:util';
|
|
17
|
+
const execFileAsync = promisify(execFile);
|
|
18
|
+
/**
|
|
19
|
+
* Resolve the compiled driver. From `dist/tier2-sandbox.js` the sibling
|
|
20
|
+
* `./tier2-driver.js` exists; when this module runs from `src/` (tests/dev via a
|
|
21
|
+
* TS loader), fall back to the built `../dist/tier2-driver.js`. The driver must
|
|
22
|
+
* be compiled (`npm run build`) for the 'agent' sandbox to run.
|
|
23
|
+
*/
|
|
24
|
+
function resolveDriver() {
|
|
25
|
+
const sibling = fileURLToPath(new URL('./tier2-driver.js', import.meta.url));
|
|
26
|
+
if (existsSync(sibling))
|
|
27
|
+
return sibling;
|
|
28
|
+
return fileURLToPath(new URL('../dist/tier2-driver.js', import.meta.url));
|
|
29
|
+
}
|
|
30
|
+
const DRIVER = resolveDriver();
|
|
31
|
+
/**
|
|
32
|
+
* Default agent suite. The buggy file `src/<a>_<b>.ts` is preceded by `before`
|
|
33
|
+
* distractors `src/<a>_<b>_<i>.ts` that share its EXACT two terms — so the real
|
|
34
|
+
* contextBuilder gives them all the same overlap score and falls back to input
|
|
35
|
+
* order. The buggy file therefore sits at rank `before`, surviving into the
|
|
36
|
+
* returned window only if the contextBuilder's `.slice(0, N)` window is wider
|
|
37
|
+
* than that. Solving thus depends on the REAL contextBuilder window/ranking,
|
|
38
|
+
* plus retry persistence ('transient' lets decideRetry continue to maxAttempts).
|
|
39
|
+
*/
|
|
40
|
+
function buggyAfter(a, b, before) {
|
|
41
|
+
const buggyFile = `src/${a}_${b}.ts`;
|
|
42
|
+
const files = [
|
|
43
|
+
...Array.from({ length: before }, (_, i) => `src/${a}_${b}_${i}.ts`),
|
|
44
|
+
buggyFile,
|
|
45
|
+
];
|
|
46
|
+
return { prompt: `fix ${a} ${b}`, files, buggyFile };
|
|
47
|
+
}
|
|
48
|
+
export const DEFAULT_AGENT_TASKS = [
|
|
49
|
+
{ id: 'a-easy', ...buggyAfter('auth', 'token', 0), classification: 'transient', failAttempts: 0, backoffMs: 20, difficulty: 1 },
|
|
50
|
+
{ id: 'a-mid', ...buggyAfter('cache', 'key', 35), classification: 'transient', failAttempts: 1, backoffMs: 30, difficulty: 3 },
|
|
51
|
+
{ id: 'a-hard', ...buggyAfter('retry', 'budget', 60), classification: 'transient', failAttempts: 2, backoffMs: 40, difficulty: 5 },
|
|
52
|
+
];
|
|
53
|
+
/** Run ONE agent task against a variant by executing its real surface code. */
|
|
54
|
+
export async function runVariantTaskAgent(variant, task, timeoutMs = 10_000) {
|
|
55
|
+
const scrubbedEnv = {
|
|
56
|
+
PATH: process.env.PATH,
|
|
57
|
+
METAHARNESS_VARIANT: variant.id,
|
|
58
|
+
METAHARNESS_TASK: task.id,
|
|
59
|
+
};
|
|
60
|
+
let out = { solved: false, attemptsUsed: 0, durationMs: 0, log: '' };
|
|
61
|
+
let blocked = false;
|
|
62
|
+
try {
|
|
63
|
+
const { stdout } = await execFileAsync(process.execPath, ['--experimental-strip-types', '--no-warnings', DRIVER, variant.dir, JSON.stringify(task)], { timeout: timeoutMs, env: scrubbedEnv, cwd: variant.dir, maxBuffer: 1 << 20 });
|
|
64
|
+
out = JSON.parse(stdout.trim() || '{}');
|
|
65
|
+
}
|
|
66
|
+
catch (e) {
|
|
67
|
+
out = { solved: false, attemptsUsed: 0, durationMs: 0, log: `child error: ${e.message}`.slice(0, 200) };
|
|
68
|
+
blocked = false; // a child failure is an unsolved task, not a safety block
|
|
69
|
+
}
|
|
70
|
+
const startedAt = '1970-01-01T00:00:00.000Z';
|
|
71
|
+
return {
|
|
72
|
+
variantId: variant.id,
|
|
73
|
+
taskId: task.id,
|
|
74
|
+
startedAt,
|
|
75
|
+
finishedAt: new Date(out.durationMs).toISOString(),
|
|
76
|
+
exitCode: out.solved ? 0 : 1,
|
|
77
|
+
stdout: out.log,
|
|
78
|
+
stderr: out.solved ? '' : `task ${task.id} unsolved (${out.attemptsUsed} attempts)`,
|
|
79
|
+
durationMs: out.durationMs,
|
|
80
|
+
timedOut: false,
|
|
81
|
+
blockedActions: blocked ? ['tier2 child blocked'] : [],
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/** Run a variant against the agent suite (defaults to DEFAULT_AGENT_TASKS). */
|
|
85
|
+
export async function runVariantTasksAgent(variant, tasks = DEFAULT_AGENT_TASKS, timeoutMs = 10_000) {
|
|
86
|
+
const traces = [];
|
|
87
|
+
for (const task of tasks)
|
|
88
|
+
traces.push(await runVariantTaskAgent(variant, task, timeoutMs));
|
|
89
|
+
return traces;
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=tier2-sandbox.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tier2-sandbox.js","sourceRoot":"","sources":["../src/tier2-sandbox.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,4EAA4E;AAC5E,0EAA0E;AAC1E,6EAA6E;AAC7E,6EAA6E;AAC7E,+EAA+E;AAC/E,iFAAiF;AACjF,wBAAwB;AACxB,EAAE;AACF,gFAAgF;AAChF,kFAAkF;AAElF,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAGtC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAE1C;;;;;GAKG;AACH,SAAS,aAAa;IACpB,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,mBAAmB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7E,IAAI,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,OAAO,CAAC;IACxC,OAAO,aAAa,CAAC,IAAI,GAAG,CAAC,yBAAyB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC5E,CAAC;AACD,MAAM,MAAM,GAAG,aAAa,EAAE,CAAC;AAc/B;;;;;;;;GAQG;AACH,SAAS,UAAU,CAAC,CAAS,EAAE,CAAS,EAAE,MAAc;IACtD,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC;IACrC,MAAM,KAAK,GAAG;QACZ,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;QACpE,SAAS;KACV,CAAC;IACF,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;AACvD,CAAC;AACD,MAAM,CAAC,MAAM,mBAAmB,GAAyB;IACvD,EAAE,EAAE,EAAE,QAAQ,EAAE,GAAG,UAAU,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,EAAE,cAAc,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE;IAC/H,EAAE,EAAE,EAAE,OAAO,EAAE,GAAG,UAAU,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,EAAE,cAAc,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE;IAC9H,EAAE,EAAE,EAAE,QAAQ,EAAE,GAAG,UAAU,CAAC,OAAO,EAAE,QAAQ,EAAE,EAAE,CAAC,EAAE,cAAc,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE;CACnI,CAAC;AASF,+EAA+E;AAC/E,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,OAAuB,EACvB,IAAe,EACf,SAAS,GAAG,MAAM;IAElB,MAAM,WAAW,GAAsB;QACrC,IAAI,EAAE,OAAO,CAAC,GAAG,CAAC,IAAI;QACtB,mBAAmB,EAAE,OAAO,CAAC,EAAE;QAC/B,gBAAgB,EAAE,IAAI,CAAC,EAAE;KAC1B,CAAC;IACF,IAAI,GAAG,GAAc,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;IAChF,IAAI,OAAO,GAAG,KAAK,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,aAAa,CACpC,OAAO,CAAC,QAAQ,EAChB,CAAC,4BAA4B,EAAE,eAAe,EAAE,MAAM,EAAE,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,EAC1F,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAC/E,CAAC;QACF,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC;IAC1C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,GAAG,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,GAAG,EAAE,gBAAiB,CAAW,CAAC,OAAO,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;QACnH,OAAO,GAAG,KAAK,CAAC,CAAC,0DAA0D;IAC7E,CAAC;IACD,MAAM,SAAS,GAAG,0BAA0B,CAAC;IAC7C,OAAO;QACL,SAAS,EAAE,OAAO,CAAC,EAAE;QACrB,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,SAAS;QACT,UAAU,EAAE,IAAI,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE;QAClD,QAAQ,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,EAAE,GAAG,CAAC,GAAG;QACf,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,EAAE,cAAc,GAAG,CAAC,YAAY,YAAY;QACnF,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,QAAQ,EAAE,KAAK;QACf,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC,EAAE;KACvD,CAAC;AACJ,CAAC;AAED,+EAA+E;AAC/E,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAuB,EACvB,QAA8B,mBAAmB,EACjD,SAAS,GAAG,MAAM;IAElB,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,KAAK,MAAM,IAAI,IAAI,KAAK;QAAE,MAAM,CAAC,IAAI,CAAC,MAAM,mBAAmB,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC;IAC3F,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -103,12 +103,111 @@ export interface EvolutionConfig {
|
|
|
103
103
|
costBudgetSeconds?: number;
|
|
104
104
|
/** Deterministic seed for mutation selection (reproducibility). Default 0. */
|
|
105
105
|
seed?: number;
|
|
106
|
+
/**
|
|
107
|
+
* Evaluation substrate (ADR-101/102). 'real' (default) runs the repo's test
|
|
108
|
+
* command — surface-independent, so the behavioural manifold is degenerate.
|
|
109
|
+
* 'mock' runs the deterministic surface-driven agent loop so traces depend on
|
|
110
|
+
* the harness surfaces (activating ADR-091/092/094/097/100). Reproducible.
|
|
111
|
+
*/
|
|
112
|
+
sandboxMode?: 'real' | 'mock' | 'agent';
|
|
113
|
+
/** Custom scripted suite for 'mock' mode (ADR-102); defaults to DEFAULT_MOCK_TASKS. */
|
|
114
|
+
mockTasks?: import('./mock-sandbox.js').MockTask[];
|
|
115
|
+
/**
|
|
116
|
+
* 'agent' sandbox (ADR-106, Tier 2): executes the variant's REAL surface code
|
|
117
|
+
* in a child `node --experimental-strip-types` process. Requires Node ≥ 22.
|
|
118
|
+
*/
|
|
119
|
+
/** Custom agent-task suite for 'agent' mode (ADR-106); defaults to DEFAULT_AGENT_TASKS. */
|
|
120
|
+
agentTasks?: import('./tier2-sandbox.js').AgentTask[];
|
|
121
|
+
/**
|
|
122
|
+
* Parent pool (ADR-115). 'archive' (default) retains the whole archive
|
|
123
|
+
* (ADR-073). 'generation' is memoryless (μ,λ): parents come only from the
|
|
124
|
+
* current generation's children — used to ablate archive retention.
|
|
125
|
+
*/
|
|
126
|
+
selectionPool?: 'archive' | 'generation';
|
|
127
|
+
/**
|
|
128
|
+
* Tie-break policy when variants share the top finalScore (ADR-072's scorer
|
|
129
|
+
* ceilings at 0.985, so ties are the common case). 'insertion' (default) is
|
|
130
|
+
* fully reproducible — earliest insertion wins. 'faster' breaks ties by lowest
|
|
131
|
+
* mean trace wall-clock, giving selection a real efficiency gradient; it is
|
|
132
|
+
* NOT reproducible by construction (wall-clock), so it is strictly opt-in.
|
|
133
|
+
*/
|
|
134
|
+
tieBreaker?: 'insertion' | 'faster';
|
|
135
|
+
/**
|
|
136
|
+
* Parent-selection strategy on a stalled generation (no promoted children).
|
|
137
|
+
* 'score' (default) takes the top finalScore variants (ADR-073). 'quality-
|
|
138
|
+
* diversity' (MAP-Elites) takes the elite per behaviour niche (mutated
|
|
139
|
+
* surface) so the population keeps exploring all surfaces instead of
|
|
140
|
+
* collapsing onto one at the 0.985 ceiling. 'behavioral-diversity' (ADR-091)
|
|
141
|
+
* bins by HYPERBOLIC behavioural niche (Poincaré-ball phenotype from run
|
|
142
|
+
* traces) so diversity tracks how a variant *behaves*, not which file it
|
|
143
|
+
* touched. 'niche-steering' (ADR-092) goes further — actively seeds the next
|
|
144
|
+
* generation from survivors nearest an UNDER-EXPLORED region of the Poincaré
|
|
145
|
+
* ball (preferring the high-complexity frontier), navigating the manifold
|
|
146
|
+
* rather than just maintaining spread. 'clade' (ADR-094, Huxley-Gödel) selects
|
|
147
|
+
* parents by descendant POTENTIAL — Thompson sampling over clade
|
|
148
|
+
* metaproductivity — since the best-scoring variant is a poor parent once its
|
|
149
|
+
* subtree is exhausted; τ is scheduled from the SGM budget. All deterministic.
|
|
150
|
+
*/
|
|
151
|
+
selection?: 'score' | 'quality-diversity' | 'behavioral-diversity' | 'niche-steering' | 'clade' | 'pareto';
|
|
152
|
+
/**
|
|
153
|
+
* Opt-in genetic crossover (ADR-089). When true and a generation has ≥2
|
|
154
|
+
* parents, the first child of each parent recombines that parent's surfaces
|
|
155
|
+
* with the next parent's (inherit a subset from each) instead of mutating.
|
|
156
|
+
* Default false → mutation-only. Recombination passes the same safety gate.
|
|
157
|
+
*/
|
|
158
|
+
crossover?: boolean;
|
|
159
|
+
/**
|
|
160
|
+
* Opt-in epistatic linkage (ADR-093). Only meaningful with crossover. When
|
|
161
|
+
* true, evolve() learns a surface co-occurrence graph from high-fitness
|
|
162
|
+
* lineages and crossover inherits the LINKED block of the donor surface
|
|
163
|
+
* (keeping co-adapted surfaces together) instead of a random subset.
|
|
164
|
+
*/
|
|
165
|
+
epistasis?: boolean;
|
|
106
166
|
/**
|
|
107
167
|
* Pluggable code generator (ADR-071). Default is the DeterministicMutator;
|
|
108
168
|
* pass an LLM-backed one (e.g. OpenRouterMutator) to evolve via a model — it
|
|
109
169
|
* still passes the same validateGeneratedCode safety gate.
|
|
110
170
|
*/
|
|
111
171
|
generator?: import('./mutator.js').CodeGenerator;
|
|
172
|
+
/**
|
|
173
|
+
* Opt-in graded promotion (ADR-076). A hash-pinned benchmark suite. When set,
|
|
174
|
+
* each child is evaluated against its parent over the suite in the real
|
|
175
|
+
* sandbox, and the STATISTICAL promotion decision overrides the single-run
|
|
176
|
+
* ADR-072 promote flag (the decision is also written to runs/<id>.bench.json).
|
|
177
|
+
* Default unset → the lightweight single-run promotion is used.
|
|
178
|
+
*/
|
|
179
|
+
benchSuite?: import('./bench/types.js').BenchSuite;
|
|
180
|
+
/** Bootstrap samples for the statistical promotion gate (ADR-076). */
|
|
181
|
+
benchSamples?: number;
|
|
182
|
+
/** Minimum mean-delta a child must clear under the bench promotion gate. */
|
|
183
|
+
benchMinDelta?: number;
|
|
184
|
+
/**
|
|
185
|
+
* Opt-in SGM cumulative risk budget (ADR-079). Only meaningful with benchSuite.
|
|
186
|
+
* Every admitted promotion spends 1 from this shared, monotonic budget; once
|
|
187
|
+
* exhausted, further promotions are refused regardless of local score — so
|
|
188
|
+
* recursive self-modification cannot accumulate unbounded risk across rounds.
|
|
189
|
+
* Also enforces the SOTA clauses (no hidden-test regression, cost-per-solve
|
|
190
|
+
* within costCeilingFactor× the parent). Unset → no risk cap.
|
|
191
|
+
*/
|
|
192
|
+
riskBudgetTotal?: number;
|
|
193
|
+
/** Cost-per-solve ceiling as a multiple of the parent (SGM). Default 1.20. */
|
|
194
|
+
costCeilingFactor?: number;
|
|
195
|
+
/**
|
|
196
|
+
* Opt-in Benjamini–Hochberg FDR target (ADR-096). Only meaningful with
|
|
197
|
+
* benchSuite. When set (e.g. 0.05), promotions are corrected for multiple
|
|
198
|
+
* testing across each generation's candidates — a child stays promoted only if
|
|
199
|
+
* it survives the generation-wide BH correction. Strictly tightens promotion.
|
|
200
|
+
*/
|
|
201
|
+
fdrQ?: number;
|
|
202
|
+
/**
|
|
203
|
+
* Opt-in self-directed curriculum (ADR-097). Only meaningful with a graded
|
|
204
|
+
* benchSuite (tasks carrying difficulty 1..5). Scores each generation on only
|
|
205
|
+
* the admitted difficulty tier, escalating once the population masters it —
|
|
206
|
+
* so harder tasks arrive as competence grows (a difficulty ladder).
|
|
207
|
+
*/
|
|
208
|
+
curriculum?: boolean;
|
|
209
|
+
/** Mean solve rate that counts as "mastered" and escalates the tier. Default 0.9. */
|
|
210
|
+
curriculumThreshold?: number;
|
|
112
211
|
}
|
|
113
212
|
/** The outcome of an `evolve` run. */
|
|
114
213
|
export interface EvolutionResult {
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAYA;;;;GAIG;AACH,MAAM,MAAM,eAAe,GACvB,SAAS,GACT,gBAAgB,GAChB,UAAU,GACV,aAAa,GACb,YAAY,GACZ,cAAc,GACd,aAAa,CAAC;AAElB,4EAA4E;AAC5E,MAAM,WAAW,WAAW;IAC1B,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;IACpD,wEAAwE;IACxE,WAAW,EAAE,MAAM,CAAC;IACpB,+EAA+E;IAC/E,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,iFAAiF;IACjF,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,gDAAgD;IAChD,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,gFAAgF;AAChF,MAAM,WAAW,cAAc;IAC7B,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;IACX,mDAAmD;IACnD,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,yEAAyE;IACzE,GAAG,EAAE,MAAM,CAAC;IACZ,iEAAiE;IACjE,eAAe,EAAE,eAAe,CAAC;IACjC,4CAA4C;IAC5C,eAAe,EAAE,MAAM,CAAC;IACxB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,yEAAyE;AACzE,MAAM,WAAW,QAAQ;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,+EAA+E;IAC/E,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,iFAAiF;IACjF,QAAQ,EAAE,OAAO,CAAC;IAClB,yEAAyE;IACzE,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,8EAA8E;AAC9E,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,MAAM,CAAC;IAElB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;IAEpB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IAEpB,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;IACnB,yEAAyE;IACzE,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,8CAA8C;AAC9C,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,cAAc,CAAC;IACxB,4BAA4B;IAC5B,KAAK,EAAE,SAAS,GAAG,IAAI,CAAC;IACxB,0CAA0C;IAC1C,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,6CAA6C;AAC7C,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,QAAQ,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,WAAW,EAAE,MAAM,CAAC;IACpB,mDAAmD;IACnD,qBAAqB,EAAE,MAAM,CAAC;IAC9B,kFAAkF;IAClF,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,mFAAmF;IACnF,cAAc,EAAE,MAAM,CAAC;IACvB,6EAA6E;IAC7E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wEAAwE;IACxE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uFAAuF;IACvF,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,8EAA8E;IAC9E,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,cAAc,EAAE,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAYA;;;;GAIG;AACH,MAAM,MAAM,eAAe,GACvB,SAAS,GACT,gBAAgB,GAChB,UAAU,GACV,aAAa,GACb,YAAY,GACZ,cAAc,GACd,aAAa,CAAC;AAElB,4EAA4E;AAC5E,MAAM,WAAW,WAAW;IAC1B,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;IACpD,wEAAwE;IACxE,WAAW,EAAE,MAAM,CAAC;IACpB,+EAA+E;IAC/E,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,iFAAiF;IACjF,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,gDAAgD;IAChD,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,gFAAgF;AAChF,MAAM,WAAW,cAAc;IAC7B,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;IACX,mDAAmD;IACnD,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,yEAAyE;IACzE,GAAG,EAAE,MAAM,CAAC;IACZ,iEAAiE;IACjE,eAAe,EAAE,eAAe,CAAC;IACjC,4CAA4C;IAC5C,eAAe,EAAE,MAAM,CAAC;IACxB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,yEAAyE;AACzE,MAAM,WAAW,QAAQ;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,+EAA+E;IAC/E,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,iFAAiF;IACjF,QAAQ,EAAE,OAAO,CAAC;IAClB,yEAAyE;IACzE,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,8EAA8E;AAC9E,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,MAAM,CAAC;IAElB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;IAEpB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IAEpB,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;IACnB,yEAAyE;IACzE,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,8CAA8C;AAC9C,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,cAAc,CAAC;IACxB,4BAA4B;IAC5B,KAAK,EAAE,SAAS,GAAG,IAAI,CAAC;IACxB,0CAA0C;IAC1C,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,6CAA6C;AAC7C,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,QAAQ,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,WAAW,EAAE,MAAM,CAAC;IACpB,mDAAmD;IACnD,qBAAqB,EAAE,MAAM,CAAC;IAC9B,kFAAkF;IAClF,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,mFAAmF;IACnF,cAAc,EAAE,MAAM,CAAC;IACvB,6EAA6E;IAC7E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wEAAwE;IACxE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uFAAuF;IACvF,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,8EAA8E;IAC9E,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IACxC,uFAAuF;IACvF,SAAS,CAAC,EAAE,OAAO,mBAAmB,EAAE,QAAQ,EAAE,CAAC;IACnD;;;OAGG;IACH,2FAA2F;IAC3F,UAAU,CAAC,EAAE,OAAO,oBAAoB,EAAE,SAAS,EAAE,CAAC;IACtD;;;;OAIG;IACH,aAAa,CAAC,EAAE,SAAS,GAAG,YAAY,CAAC;IACzC;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,WAAW,GAAG,QAAQ,CAAC;IACpC;;;;;;;;;;;;;;;OAeG;IACH,SAAS,CAAC,EACN,OAAO,GACP,mBAAmB,GACnB,sBAAsB,GACtB,gBAAgB,GAChB,OAAO,GACP,QAAQ,CAAC;IACb;;;;;OAKG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,cAAc,EAAE,aAAa,CAAC;IACjD;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,OAAO,kBAAkB,EAAE,UAAU,CAAC;IACnD,sEAAsE;IACtE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,4EAA4E;IAC5E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;;;;;OAOG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,8EAA8E;IAC9E,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B;;;;;OAKG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;;OAKG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,qFAAqF;IACrF,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,sCAAsC;AACtC,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,aAAa,CAAC;IACxB,MAAM,EAAE,aAAa,GAAG,IAAI,CAAC;IAC7B,uDAAuD;IACvD,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,0DAA0D;IAC1D,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@metaharness/darwin",
|
|
3
|
-
"version": "0.1
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.2.1",
|
|
4
|
+
"description": "An LLM supercharger and cost optimizer: keep your model frozen and evolve the harness around it so a cheap model performs like an expensive one — measurably better, far cheaper. Mutate→sandbox→score→archive; promote only safe, measured wins. Validated on real SWE-bench Lite (~$0.01/instance). Dependency-free (Node built-ins).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
@@ -26,15 +26,18 @@
|
|
|
26
26
|
"lint": "tsc --noEmit"
|
|
27
27
|
},
|
|
28
28
|
"keywords": [
|
|
29
|
+
"llm",
|
|
30
|
+
"cost-optimization",
|
|
31
|
+
"llm-optimizer",
|
|
32
|
+
"cheap-llm",
|
|
33
|
+
"compute-arbitrage",
|
|
29
34
|
"agent-harness",
|
|
30
|
-
"darwin-mode",
|
|
31
35
|
"self-improvement",
|
|
32
36
|
"evolutionary-search",
|
|
37
|
+
"swe-bench",
|
|
33
38
|
"metaharness",
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"sandbox",
|
|
37
|
-
"benchmark"
|
|
39
|
+
"darwin-mode",
|
|
40
|
+
"sandbox"
|
|
38
41
|
],
|
|
39
42
|
"author": "rUv <ruv@ruv.net>",
|
|
40
43
|
"license": "MIT",
|