@metaharness/darwin 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +221 -0
- package/SECURITY.md +200 -0
- package/dist/archive.d.ts +89 -0
- package/dist/archive.d.ts.map +1 -0
- package/dist/archive.js +220 -0
- package/dist/archive.js.map +1 -0
- package/dist/bench/gates.d.ts +19 -0
- package/dist/bench/gates.d.ts.map +1 -0
- package/dist/bench/gates.js +82 -0
- package/dist/bench/gates.js.map +1 -0
- package/dist/bench/index.d.ts +11 -0
- package/dist/bench/index.d.ts.map +1 -0
- package/dist/bench/index.js +25 -0
- package/dist/bench/index.js.map +1 -0
- package/dist/bench/lineage.d.ts +60 -0
- package/dist/bench/lineage.d.ts.map +1 -0
- package/dist/bench/lineage.js +166 -0
- package/dist/bench/lineage.js.map +1 -0
- package/dist/bench/metrics.d.ts +32 -0
- package/dist/bench/metrics.d.ts.map +1 -0
- package/dist/bench/metrics.js +52 -0
- package/dist/bench/metrics.js.map +1 -0
- package/dist/bench/promotion.d.ts +21 -0
- package/dist/bench/promotion.d.ts.map +1 -0
- package/dist/bench/promotion.js +109 -0
- package/dist/bench/promotion.js.map +1 -0
- package/dist/bench/risk.d.ts +45 -0
- package/dist/bench/risk.d.ts.map +1 -0
- package/dist/bench/risk.js +71 -0
- package/dist/bench/risk.js.map +1 -0
- package/dist/bench/runner.d.ts +53 -0
- package/dist/bench/runner.d.ts.map +1 -0
- package/dist/bench/runner.js +131 -0
- package/dist/bench/runner.js.map +1 -0
- package/dist/bench/score.d.ts +16 -0
- package/dist/bench/score.d.ts.map +1 -0
- package/dist/bench/score.js +83 -0
- package/dist/bench/score.js.map +1 -0
- package/dist/bench/stats.d.ts +26 -0
- package/dist/bench/stats.d.ts.map +1 -0
- package/dist/bench/stats.js +74 -0
- package/dist/bench/stats.js.map +1 -0
- package/dist/bench/suite.d.ts +16 -0
- package/dist/bench/suite.d.ts.map +1 -0
- package/dist/bench/suite.js +59 -0
- package/dist/bench/suite.js.map +1 -0
- package/dist/bench/types.d.ts +135 -0
- package/dist/bench/types.d.ts.map +1 -0
- package/dist/bench/types.js +16 -0
- package/dist/bench/types.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +125 -0
- package/dist/cli.js.map +1 -0
- package/dist/evolve.d.ts +11 -0
- package/dist/evolve.d.ts.map +1 -0
- package/dist/evolve.js +129 -0
- package/dist/evolve.js.map +1 -0
- package/dist/generator.d.ts +9 -0
- package/dist/generator.d.ts.map +1 -0
- package/dist/generator.js +46 -0
- package/dist/generator.js.map +1 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/mutator.d.ts +61 -0
- package/dist/mutator.d.ts.map +1 -0
- package/dist/mutator.js +193 -0
- package/dist/mutator.js.map +1 -0
- package/dist/openrouter-mutator.d.ts +32 -0
- package/dist/openrouter-mutator.d.ts.map +1 -0
- package/dist/openrouter-mutator.js +81 -0
- package/dist/openrouter-mutator.js.map +1 -0
- package/dist/repo_profiler.d.ts +8 -0
- package/dist/repo_profiler.d.ts.map +1 -0
- package/dist/repo_profiler.js +127 -0
- package/dist/repo_profiler.js.map +1 -0
- package/dist/safety.d.ts +45 -0
- package/dist/safety.d.ts.map +1 -0
- package/dist/safety.js +191 -0
- package/dist/safety.js.map +1 -0
- package/dist/sandbox.d.ts +24 -0
- package/dist/sandbox.d.ts.map +1 -0
- package/dist/sandbox.js +153 -0
- package/dist/sandbox.js.map +1 -0
- package/dist/scorer.d.ts +26 -0
- package/dist/scorer.d.ts.map +1 -0
- package/dist/scorer.js +168 -0
- package/dist/scorer.js.map +1 -0
- package/dist/templates.d.ts +37 -0
- package/dist/templates.d.ts.map +1 -0
- package/dist/templates.js +309 -0
- package/dist/templates.js.map +1 -0
- package/dist/types.d.ts +123 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +13 -0
- package/dist/types.js.map +1 -0
- package/package.json +57 -0
package/dist/archive.js
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// The archive (ADR-073) — Darwin Mode's population memory. The archive is a
|
|
4
|
+
// TREE of variants, persisted as `archive.json`, not a single best branch.
|
|
5
|
+
// Non-promoted variants are RETAINED, not deleted: "did not clear the promotion
|
|
6
|
+
// gate" means "not chosen as a parent by the default policy", never "removed".
|
|
7
|
+
// Selection (`selectParents`) samples the WHOLE archive — including older,
|
|
8
|
+
// non-promoted branches — which is how evolution escapes hill-climbing.
|
|
9
|
+
//
|
|
10
|
+
// Dependency-free (Node built-ins only). The on-disk shape is a stable
|
|
11
|
+
// `ArchiveRecord[]` so that load → save → load round-trips exactly.
|
|
12
|
+
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
13
|
+
import { dirname } from 'node:path';
|
|
14
|
+
/**
|
|
15
|
+
* In-memory tree of {@link ArchiveRecord}s keyed by variant id, persisted to a
|
|
16
|
+
* JSON file. Insertion order is preserved (a `Map` iterates in insertion order)
|
|
17
|
+
* so every ordering — `all`, tie-breaks in `best`, ties in `selectParents` — is
|
|
18
|
+
* deterministic and reproducible from `archive.json` alone.
|
|
19
|
+
*/
|
|
20
|
+
export class Archive {
|
|
21
|
+
file;
|
|
22
|
+
/** variantId → record. A Map preserves insertion order. */
|
|
23
|
+
records = new Map();
|
|
24
|
+
/**
|
|
25
|
+
* @param file Absolute path to `archive.json`. The file need not exist yet;
|
|
26
|
+
* {@link load} tolerates a missing or corrupt file by starting empty.
|
|
27
|
+
*/
|
|
28
|
+
constructor(file) {
|
|
29
|
+
this.file = file;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Load records from {@link file} if it exists. A missing, unreadable, or
|
|
33
|
+
* corrupt file (or one whose JSON is not an `ArchiveRecord[]`) is tolerated by
|
|
34
|
+
* starting from an empty archive — never throws.
|
|
35
|
+
*/
|
|
36
|
+
async load() {
|
|
37
|
+
this.records.clear();
|
|
38
|
+
let raw;
|
|
39
|
+
try {
|
|
40
|
+
raw = await readFile(this.file, 'utf8');
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
return; // missing / unreadable — start empty
|
|
44
|
+
}
|
|
45
|
+
let parsed;
|
|
46
|
+
try {
|
|
47
|
+
parsed = JSON.parse(raw);
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
return; // corrupt JSON — start empty
|
|
51
|
+
}
|
|
52
|
+
if (!Array.isArray(parsed))
|
|
53
|
+
return; // wrong shape — start empty
|
|
54
|
+
for (const entry of parsed) {
|
|
55
|
+
if (!isArchiveRecord(entry))
|
|
56
|
+
continue; // skip malformed entries defensively
|
|
57
|
+
this.records.set(entry.variant.id, entry);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Insert a record `{ variant, score: null, children: [] }` if the variant id
|
|
62
|
+
* is absent (idempotent — a re-add is a no-op). When `variant.parentId` is set
|
|
63
|
+
* and that parent already exists, append this id to the parent's `children`
|
|
64
|
+
* (without duplicates), wiring up the tree edge.
|
|
65
|
+
*/
|
|
66
|
+
addVariant(variant) {
|
|
67
|
+
if (this.records.has(variant.id))
|
|
68
|
+
return; // idempotent
|
|
69
|
+
this.records.set(variant.id, { variant, score: null, children: [] });
|
|
70
|
+
const parentId = variant.parentId;
|
|
71
|
+
if (parentId !== null) {
|
|
72
|
+
const parent = this.records.get(parentId);
|
|
73
|
+
if (parent && !parent.children.includes(variant.id)) {
|
|
74
|
+
parent.children.push(variant.id);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Attach a scorecard to a variant. Throws a clear error if the variant id is
|
|
80
|
+
* unknown — scoring a phantom variant is a programmer error, not a soft miss.
|
|
81
|
+
*/
|
|
82
|
+
setScore(variantId, score) {
|
|
83
|
+
const record = this.records.get(variantId);
|
|
84
|
+
if (!record) {
|
|
85
|
+
throw new Error(`Archive.setScore: unknown variant "${variantId}" (add it before scoring)`);
|
|
86
|
+
}
|
|
87
|
+
record.score = score;
|
|
88
|
+
}
|
|
89
|
+
/** The record for `variantId`, or `undefined` if it is not in the archive. */
|
|
90
|
+
get(variantId) {
|
|
91
|
+
return this.records.get(variantId);
|
|
92
|
+
}
|
|
93
|
+
/** Every record, in insertion order. */
|
|
94
|
+
all() {
|
|
95
|
+
return [...this.records.values()];
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* The scored record with the highest `score.finalScore`, or `null` when no
|
|
99
|
+
* record is scored yet. Ties break toward the earlier insertion (the first
|
|
100
|
+
* record to reach that score wins), making the choice deterministic.
|
|
101
|
+
*/
|
|
102
|
+
best() {
|
|
103
|
+
let winner = null;
|
|
104
|
+
for (const record of this.records.values()) {
|
|
105
|
+
if (record.score === null)
|
|
106
|
+
continue;
|
|
107
|
+
if (winner === null || record.score.finalScore > winner.score.finalScore) {
|
|
108
|
+
winner = record;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return winner;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* The archive-wide selection that escapes hill-climbing: the top-`limit`
|
|
115
|
+
* scored variants by `finalScore`, drawn from the WHOLE archive including
|
|
116
|
+
* older, non-promoted branches (ADR-073 stall fallback). Deterministic — ties
|
|
117
|
+
* break by insertion order, so the result is reproducible.
|
|
118
|
+
*
|
|
119
|
+
* @param limit Maximum number of parents to return. `<= 0` yields `[]`.
|
|
120
|
+
*/
|
|
121
|
+
selectParents(limit) {
|
|
122
|
+
if (limit <= 0)
|
|
123
|
+
return [];
|
|
124
|
+
// Tag with insertion index so ties break deterministically by insertion order.
|
|
125
|
+
const scored = [];
|
|
126
|
+
let index = 0;
|
|
127
|
+
for (const record of this.records.values()) {
|
|
128
|
+
if (record.score !== null)
|
|
129
|
+
scored.push({ record, index });
|
|
130
|
+
index += 1;
|
|
131
|
+
}
|
|
132
|
+
scored.sort((a, b) => {
|
|
133
|
+
const delta = b.record.score.finalScore - a.record.score.finalScore;
|
|
134
|
+
if (delta !== 0)
|
|
135
|
+
return delta; // higher finalScore first
|
|
136
|
+
return a.index - b.index; // tie-break: earlier insertion first
|
|
137
|
+
});
|
|
138
|
+
return scored.slice(0, limit).map((s) => s.record.variant);
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* The path of ids from the root ancestor down to `variantId`, following
|
|
142
|
+
* `parentId` upward then reversing. Returns `[]` if `variantId` is unknown.
|
|
143
|
+
* Guarded against cycles (e.g. a self-parent or a corrupt ancestor loop): each
|
|
144
|
+
* id is visited at most once, so the walk always terminates.
|
|
145
|
+
*/
|
|
146
|
+
lineageOf(variantId) {
|
|
147
|
+
if (!this.records.has(variantId))
|
|
148
|
+
return [];
|
|
149
|
+
const path = [];
|
|
150
|
+
const seen = new Set();
|
|
151
|
+
let currentId = variantId;
|
|
152
|
+
while (currentId !== null && !seen.has(currentId)) {
|
|
153
|
+
const record = this.records.get(currentId);
|
|
154
|
+
if (!record)
|
|
155
|
+
break; // dangling parent reference — stop the climb
|
|
156
|
+
seen.add(currentId);
|
|
157
|
+
path.push(currentId);
|
|
158
|
+
currentId = record.variant.parentId;
|
|
159
|
+
}
|
|
160
|
+
return path.reverse(); // root ancestor first, target last
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* A serializable projection of the tree for rendering the evolution graph:
|
|
164
|
+
* one node per record (carrying generation, mutated surface, final score, and
|
|
165
|
+
* promotion flag), and one edge per existing parent→child relationship. Edges
|
|
166
|
+
* referencing a missing endpoint are omitted so the graph stays well-formed.
|
|
167
|
+
*/
|
|
168
|
+
toLineageGraph() {
|
|
169
|
+
const nodes = [];
|
|
170
|
+
const edges = [];
|
|
171
|
+
for (const record of this.records.values()) {
|
|
172
|
+
const { variant, score } = record;
|
|
173
|
+
nodes.push({
|
|
174
|
+
id: variant.id,
|
|
175
|
+
parentId: variant.parentId,
|
|
176
|
+
generation: variant.generation,
|
|
177
|
+
mutationSurface: variant.mutationSurface,
|
|
178
|
+
finalScore: score === null ? null : score.finalScore,
|
|
179
|
+
promoted: score === null ? null : score.promoted,
|
|
180
|
+
});
|
|
181
|
+
for (const childId of record.children) {
|
|
182
|
+
if (this.records.has(childId)) {
|
|
183
|
+
edges.push({ from: variant.id, to: childId });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return { nodes, edges };
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Persist the archive as pretty-printed JSON to {@link file}, creating the
|
|
191
|
+
* parent directory if needed. The on-disk shape is exactly `all()` — an
|
|
192
|
+
* `ArchiveRecord[]` in insertion order — so a subsequent {@link load}
|
|
193
|
+
* reconstructs the same archive.
|
|
194
|
+
*/
|
|
195
|
+
async save() {
|
|
196
|
+
await mkdir(dirname(this.file), { recursive: true });
|
|
197
|
+
const json = JSON.stringify(this.all(), null, 2);
|
|
198
|
+
await writeFile(this.file, `${json}\n`, 'utf8');
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Structural guard for a single on-disk record. Defensive against hand-edited or
|
|
203
|
+
* partially-written `archive.json` files: only entries with the minimal shape
|
|
204
|
+
* (a variant with an id, plus a `children` array) are admitted.
|
|
205
|
+
*/
|
|
206
|
+
function isArchiveRecord(value) {
|
|
207
|
+
if (value === null || typeof value !== 'object')
|
|
208
|
+
return false;
|
|
209
|
+
const obj = value;
|
|
210
|
+
const variant = obj.variant;
|
|
211
|
+
if (variant === null || typeof variant !== 'object')
|
|
212
|
+
return false;
|
|
213
|
+
if (typeof variant.id !== 'string')
|
|
214
|
+
return false;
|
|
215
|
+
if (!Array.isArray(obj.children))
|
|
216
|
+
return false;
|
|
217
|
+
// score may be null or a ScoreCard object; both are acceptable here.
|
|
218
|
+
return true;
|
|
219
|
+
}
|
|
220
|
+
//# sourceMappingURL=archive.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"archive.js","sourceRoot":"","sources":["../src/archive.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,4EAA4E;AAC5E,2EAA2E;AAC3E,gFAAgF;AAChF,+EAA+E;AAC/E,2EAA2E;AAC3E,wEAAwE;AACxE,EAAE;AACF,uEAAuE;AACvE,oEAAoE;AAEpE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC;;;;;GAKG;AACH,MAAM,OAAO,OAAO;IAQW;IAP7B,2DAA2D;IAC1C,OAAO,GAA+B,IAAI,GAAG,EAAE,CAAC;IAEjE;;;OAGG;IACH,YAA6B,IAAY;QAAZ,SAAI,GAAJ,IAAI,CAAQ;IAAG,CAAC;IAE7C;;;;OAIG;IACH,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACrB,IAAI,GAAW,CAAC;QAChB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,qCAAqC;QAC/C,CAAC;QAED,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,6BAA6B;QACvC,CAAC;QAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;YAAE,OAAO,CAAC,4BAA4B;QAEhE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC;gBAAE,SAAS,CAAC,qCAAqC;YAC5E,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,UAAU,CAAC,OAAuB;QAChC,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAAE,OAAO,CAAC,aAAa;QAEvD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,CAAC;QAErE,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QAClC,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;YACtB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC1C,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC;gBACpD,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,QAAQ,CAAC,SAAiB,EAAE,KAAgB;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CACb,sCAAsC,SAAS,2BAA2B,CAC3E,CAAC;QACJ,CAAC;QACD,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC;IAED,8EAA8E;IAC9E,GAAG,CAAC,SAAiB;QACnB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAED,wCAAwC;IACxC,GAAG;QACD,OAAO,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACpC,CAAC;IAED;;;;OAIG;IACH,IAAI;QACF,IAAI,MAAM,GAAyB,IAAI,CAAC;QACxC,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI;gBAAE,SAAS;YACpC,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,UAAU,GAAG,MAAM,CAAC,KAAM,CAAC,UAAU,EAAE,CAAC;gBAC1E,MAAM,GAAG,MAAM,CAAC;YAClB,CAAC;QACH,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;;;;OAOG;IACH,aAAa,CAAC,KAAa;QACzB,IAAI,KAAK,IAAI,CAAC;YAAE,OAAO,EAAE,CAAC;QAE1B,+EAA+E;QAC/E,MAAM,MAAM,GAAoD,EAAE,CAAC;QACnE,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI;gBAAE,MAAM,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;YAC1D,KAAK,IAAI,CAAC,CAAC;QACb,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACnB,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,KAAM,CAAC,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,KAAM,CAAC,UAAU,CAAC;YACtE,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC,CAAC,0BAA0B;YACzD,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,qCAAqC;QACjE,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7D,CAAC;IAED;;;;;OAKG;IACH,SAAS,CAAC,SAAiB;QACzB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC;YAAE,OAAO,EAAE,CAAC;QAE5C,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,IAAI,SAAS,GAAkB,SAAS,CAAC;QAEzC,OAAO,SAAS,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAClD,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC3C,IAAI,CAAC,MAAM;gBAAE,MAAM,CAAC,6CAA6C;YACjE,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YACpB,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACrB,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC;QACtC,CAAC;QAED,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,mCAAmC;IAC5D,CAAC;IAED;;;;;OAKG;IACH,cAAc;QAWZ,MAAM,KAAK,GAON,EAAE,CAAC;QACR,MAAM,KAAK,GAAwC,EAAE,CAAC;QAEtD,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;YAClC,KAAK,CAAC,IAAI,CAAC;gBACT,EAAE,EAAE,OAAO,CAAC,EAAE;gBACd,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,eAAe,EAAE,OAAO,CAAC,eAAe;gBACxC,UAAU,EAAE,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,UAAU;gBACpD,QAAQ,EAAE,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ;aACjD,CAAC,CAAC;YACH,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACtC,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;gBAChD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC1B,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,IAAI;QACR,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACrD,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACjD,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,IAAI,IAAI,EAAE,MAAM,CAAC,CAAC;IAClD,CAAC;CACF;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9D,MAAM,GAAG,GAAG,KAAgC,CAAC;IAC7C,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;IAC5B,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAClE,IAAI,OAAQ,OAAmC,CAAC,EAAE,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC9E,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAC;IAC/C,qEAAqE;IACrE,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { BenchmarkResult, BenchmarkTask, GateResult } from './types.js';
|
|
2
|
+
/** Solve gate: the public AND hidden acceptance tests both pass. */
|
|
3
|
+
export declare function solveGate(r: BenchmarkResult): GateResult;
|
|
4
|
+
/** Regression gate: the full relevant suite still passes. */
|
|
5
|
+
export declare function regressionGate(r: BenchmarkResult): GateResult;
|
|
6
|
+
/** Safety gate: zero blocked-file touches and zero safety violations (ADR-071). */
|
|
7
|
+
export declare function safetyGate(r: BenchmarkResult): GateResult;
|
|
8
|
+
/** Cost gate: the metered cost is within the task's budget. */
|
|
9
|
+
export declare function costGate(r: BenchmarkResult, task: BenchmarkTask): GateResult;
|
|
10
|
+
/** Repro gate: a clean checkout reproduced the result. */
|
|
11
|
+
export declare function reproGate(cleanReplay: boolean): GateResult;
|
|
12
|
+
/**
|
|
13
|
+
* Evaluate all five gates in canonical order: solve, regression, safety, cost,
|
|
14
|
+
* repro. Returns one GateResult per gate.
|
|
15
|
+
*/
|
|
16
|
+
export declare function evaluateGates(r: BenchmarkResult, task: BenchmarkTask, cleanReplay: boolean): GateResult[];
|
|
17
|
+
/** True iff every gate in the list passed. */
|
|
18
|
+
export declare function allGatesPass(gates: GateResult[]): boolean;
|
|
19
|
+
//# sourceMappingURL=gates.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gates.d.ts","sourceRoot":"","sources":["../../src/bench/gates.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EACV,eAAe,EACf,aAAa,EACb,UAAU,EACX,MAAM,YAAY,CAAC;AAEpB,oEAAoE;AACpE,wBAAgB,SAAS,CAAC,CAAC,EAAE,eAAe,GAAG,UAAU,CASxD;AAED,6DAA6D;AAC7D,wBAAgB,cAAc,CAAC,CAAC,EAAE,eAAe,GAAG,UAAU,CAS7D;AAED,mFAAmF;AACnF,wBAAgB,UAAU,CAAC,CAAC,EAAE,eAAe,GAAG,UAAU,CAWzD;AAED,+DAA+D;AAC/D,wBAAgB,QAAQ,CAAC,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,aAAa,GAAG,UAAU,CAS5E;AAED,0DAA0D;AAC1D,wBAAgB,SAAS,CAAC,WAAW,EAAE,OAAO,GAAG,UAAU,CAQ1D;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAC3B,CAAC,EAAE,eAAe,EAClB,IAAI,EAAE,aAAa,EACnB,WAAW,EAAE,OAAO,GACnB,UAAU,EAAE,CAQd;AAED,8CAA8C;AAC9C,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,OAAO,CAEzD"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// The five evaluation gates (ADR-076). A child wins a task only when it clears
|
|
4
|
+
// all five: solve (public ∧ hidden), regression, safety (zero blocked-file
|
|
5
|
+
// touches / safety violations), cost (within the task budget), and repro (a
|
|
6
|
+
// clean checkout reproduces the result).
|
|
7
|
+
//
|
|
8
|
+
// Each gate is a pure predicate over a BenchmarkResult (+ task budget / replay
|
|
9
|
+
// flag) returning a GateResult with a short human-readable verdict. No I/O.
|
|
10
|
+
/** Solve gate: the public AND hidden acceptance tests both pass. */
|
|
11
|
+
export function solveGate(r) {
|
|
12
|
+
const pass = r.publicTestPassed && r.hiddenTestPassed;
|
|
13
|
+
return {
|
|
14
|
+
gate: 'solve',
|
|
15
|
+
pass,
|
|
16
|
+
detail: pass
|
|
17
|
+
? 'solved: public and hidden tests passed'
|
|
18
|
+
: `not solved: public=${r.publicTestPassed}, hidden=${r.hiddenTestPassed}`,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
/** Regression gate: the full relevant suite still passes. */
|
|
22
|
+
export function regressionGate(r) {
|
|
23
|
+
const pass = r.regressionPassed;
|
|
24
|
+
return {
|
|
25
|
+
gate: 'regression',
|
|
26
|
+
pass,
|
|
27
|
+
detail: pass
|
|
28
|
+
? 'no regression: full suite passed'
|
|
29
|
+
: 'regression: full suite failed',
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
/** Safety gate: zero blocked-file touches and zero safety violations (ADR-071). */
|
|
33
|
+
export function safetyGate(r) {
|
|
34
|
+
const pass = r.safetyViolations.length === 0 && r.blockedFileTouches.length === 0;
|
|
35
|
+
return {
|
|
36
|
+
gate: 'safety',
|
|
37
|
+
pass,
|
|
38
|
+
detail: pass
|
|
39
|
+
? 'safe: no safety violations or blocked-file touches'
|
|
40
|
+
: `unsafe: ${r.safetyViolations.length} violation(s), ` +
|
|
41
|
+
`${r.blockedFileTouches.length} blocked-file touch(es)`,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
/** Cost gate: the metered cost is within the task's budget. */
|
|
45
|
+
export function costGate(r, task) {
|
|
46
|
+
const pass = r.costUsd <= task.maxCostUsd;
|
|
47
|
+
return {
|
|
48
|
+
gate: 'cost',
|
|
49
|
+
pass,
|
|
50
|
+
detail: pass
|
|
51
|
+
? `within budget: $${r.costUsd} <= $${task.maxCostUsd}`
|
|
52
|
+
: `over budget: $${r.costUsd} > $${task.maxCostUsd}`,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
/** Repro gate: a clean checkout reproduced the result. */
|
|
56
|
+
export function reproGate(cleanReplay) {
|
|
57
|
+
return {
|
|
58
|
+
gate: 'repro',
|
|
59
|
+
pass: cleanReplay,
|
|
60
|
+
detail: cleanReplay
|
|
61
|
+
? 'reproducible: clean replay matched'
|
|
62
|
+
: 'not reproducible: clean replay mismatched',
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Evaluate all five gates in canonical order: solve, regression, safety, cost,
|
|
67
|
+
* repro. Returns one GateResult per gate.
|
|
68
|
+
*/
|
|
69
|
+
export function evaluateGates(r, task, cleanReplay) {
|
|
70
|
+
return [
|
|
71
|
+
solveGate(r),
|
|
72
|
+
regressionGate(r),
|
|
73
|
+
safetyGate(r),
|
|
74
|
+
costGate(r, task),
|
|
75
|
+
reproGate(cleanReplay),
|
|
76
|
+
];
|
|
77
|
+
}
|
|
78
|
+
/** True iff every gate in the list passed. */
|
|
79
|
+
export function allGatesPass(gates) {
|
|
80
|
+
return gates.every((g) => g.pass);
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=gates.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gates.js","sourceRoot":"","sources":["../../src/bench/gates.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,+EAA+E;AAC/E,2EAA2E;AAC3E,4EAA4E;AAC5E,yCAAyC;AACzC,EAAE;AACF,+EAA+E;AAC/E,4EAA4E;AAQ5E,oEAAoE;AACpE,MAAM,UAAU,SAAS,CAAC,CAAkB;IAC1C,MAAM,IAAI,GAAG,CAAC,CAAC,gBAAgB,IAAI,CAAC,CAAC,gBAAgB,CAAC;IACtD,OAAO;QACL,IAAI,EAAE,OAAO;QACb,IAAI;QACJ,MAAM,EAAE,IAAI;YACV,CAAC,CAAC,wCAAwC;YAC1C,CAAC,CAAC,sBAAsB,CAAC,CAAC,gBAAgB,YAAY,CAAC,CAAC,gBAAgB,EAAE;KAC7E,CAAC;AACJ,CAAC;AAED,6DAA6D;AAC7D,MAAM,UAAU,cAAc,CAAC,CAAkB;IAC/C,MAAM,IAAI,GAAG,CAAC,CAAC,gBAAgB,CAAC;IAChC,OAAO;QACL,IAAI,EAAE,YAAY;QAClB,IAAI;QACJ,MAAM,EAAE,IAAI;YACV,CAAC,CAAC,kCAAkC;YACpC,CAAC,CAAC,+BAA+B;KACpC,CAAC;AACJ,CAAC;AAED,mFAAmF;AACnF,MAAM,UAAU,UAAU,CAAC,CAAkB;IAC3C,MAAM,IAAI,GACR,CAAC,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,kBAAkB,CAAC,MAAM,KAAK,CAAC,CAAC;IACvE,OAAO;QACL,IAAI,EAAE,QAAQ;QACd,IAAI;QACJ,MAAM,EAAE,IAAI;YACV,CAAC,CAAC,oDAAoD;YACtD,CAAC,CAAC,WAAW,CAAC,CAAC,gBAAgB,CAAC,MAAM,iBAAiB;gBACrD,GAAG,CAAC,CAAC,kBAAkB,CAAC,MAAM,yBAAyB;KAC5D,CAAC;AACJ,CAAC;AAED,+DAA+D;AAC/D,MAAM,UAAU,QAAQ,CAAC,CAAkB,EAAE,IAAmB;IAC9D,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,IAAI,IAAI,CAAC,UAAU,CAAC;IAC1C,OAAO;QACL,IAAI,EAAE,MAAM;QACZ,IAAI;QACJ,MAAM,EAAE,IAAI;YACV,CAAC,CAAC,mBAAmB,CAAC,CAAC,OAAO,QAAQ,IAAI,CAAC,UAAU,EAAE;YACvD,CAAC,CAAC,iBAAiB,CAAC,CAAC,OAAO,OAAO,IAAI,CAAC,UAAU,EAAE;KACvD,CAAC;AACJ,CAAC;AAED,0DAA0D;AAC1D,MAAM,UAAU,SAAS,CAAC,WAAoB;IAC5C,OAAO;QACL,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,WAAW;QACjB,MAAM,EAAE,WAAW;YACjB,CAAC,CAAC,oCAAoC;YACtC,CAAC,CAAC,2CAA2C;KAChD,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAC3B,CAAkB,EAClB,IAAmB,EACnB,WAAoB;IAEpB,OAAO;QACL,SAAS,CAAC,CAAC,CAAC;QACZ,cAAc,CAAC,CAAC,CAAC;QACjB,UAAU,CAAC,CAAC,CAAC;QACb,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC;QACjB,SAAS,CAAC,WAAW,CAAC;KACvB,CAAC;AACJ,CAAC;AAED,8CAA8C;AAC9C,MAAM,UAAU,YAAY,CAAC,KAAmB;IAC9C,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;AACpC,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export * from './types.js';
|
|
2
|
+
export * from './score.js';
|
|
3
|
+
export * from './gates.js';
|
|
4
|
+
export * from './stats.js';
|
|
5
|
+
export * from './promotion.js';
|
|
6
|
+
export * from './risk.js';
|
|
7
|
+
export * from './metrics.js';
|
|
8
|
+
export * from './lineage.js';
|
|
9
|
+
export * from './suite.js';
|
|
10
|
+
export * from './runner.js';
|
|
11
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/bench/index.ts"],"names":[],"mappings":"AAeA,cAAc,YAAY,CAAC;AAC3B,cAAc,YAAY,CAAC;AAC3B,cAAc,YAAY,CAAC;AAC3B,cAAc,YAAY,CAAC;AAC3B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,WAAW,CAAC;AAC1B,cAAc,cAAc,CAAC;AAC7B,cAAc,cAAc,CAAC;AAC7B,cAAc,YAAY,CAAC;AAC3B,cAAc,aAAa,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// @metaharness/darwin — benchmark layer (ADR-076 + the SOTA "Darwin Plus" stack,
|
|
4
|
+
// ADR-077…081). Benchmark the parent vs the child, not the idea.
|
|
5
|
+
//
|
|
6
|
+
// types — the benchmark contract (tasks, results, suites, decisions)
|
|
7
|
+
// score — the verified-solve score + penalty layer (ADR-076)
|
|
8
|
+
// gates — the five gates: solve · regression · safety · cost · repro
|
|
9
|
+
// stats — the SEEDED bootstrap confidence (reproducible, unlike Math.random)
|
|
10
|
+
// promotion — the statistical promotion rule (ADR-076)
|
|
11
|
+
// risk — the SGM extended gate + global cumulative risk budget (ADR-079)
|
|
12
|
+
// lineage — descendant potential + clade parent selection (HGM, ADR-078)
|
|
13
|
+
// suite — immutable, hash-pinned task snapshots (anti-tampering)
|
|
14
|
+
// runner — the parent-vs-child evaluation over the secure sandbox
|
|
15
|
+
export * from './types.js';
|
|
16
|
+
export * from './score.js';
|
|
17
|
+
export * from './gates.js';
|
|
18
|
+
export * from './stats.js';
|
|
19
|
+
export * from './promotion.js';
|
|
20
|
+
export * from './risk.js';
|
|
21
|
+
export * from './metrics.js';
|
|
22
|
+
export * from './lineage.js';
|
|
23
|
+
export * from './suite.js';
|
|
24
|
+
export * from './runner.js';
|
|
25
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/bench/index.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,iFAAiF;AACjF,iEAAiE;AACjE,EAAE;AACF,4EAA4E;AAC5E,oEAAoE;AACpE,4EAA4E;AAC5E,oFAAoF;AACpF,0DAA0D;AAC1D,iFAAiF;AACjF,8EAA8E;AAC9E,wEAAwE;AACxE,wEAAwE;AAExE,cAAc,YAAY,CAAC;AAC3B,cAAc,YAAY,CAAC;AAC3B,cAAc,YAAY,CAAC;AAC3B,cAAc,YAAY,CAAC;AAC3B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,WAAW,CAAC;AAC1B,cAAc,cAAc,CAAC;AAC7B,cAAc,cAAc,CAAC;AAC7B,cAAc,YAAY,CAAC;AAC3B,cAAc,aAAa,CAAC"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { LineageNode } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Descendant potential: how much better this node's BEST descendants are than
|
|
4
|
+
* the node itself. We take the top-`topK` descendants by score, average them,
|
|
5
|
+
* and return `avgTop - node.score` (can be negative when descendants regressed).
|
|
6
|
+
* A leaf — or a node whose entire sub-tree is empty — has potential 0.
|
|
7
|
+
*
|
|
8
|
+
* This is the core "best branch beats best agent" signal: a low-scoring node
|
|
9
|
+
* that already produced high-scoring children gets a large positive lift.
|
|
10
|
+
*
|
|
11
|
+
* @param nodeId the node to evaluate.
|
|
12
|
+
* @param nodes the full lineage keyed by id.
|
|
13
|
+
* @param topK how many of the best descendants to average (default 3).
|
|
14
|
+
* @returns rounded `avgTopK(descendantScores) - node.score`, or 0 if no
|
|
15
|
+
* descendants (or the node is absent).
|
|
16
|
+
*/
|
|
17
|
+
export declare function descendantPotential(nodeId: string, nodes: Map<string, LineageNode>, topK?: number): number;
|
|
18
|
+
/**
|
|
19
|
+
* Archive diversity: a structural novelty score in [0,1] that rewards
|
|
20
|
+
* UNDER-explored branches. Defined as `1 / (1 + siblingCount)`, where
|
|
21
|
+
* `siblingCount` is the number of OTHER children of this node's parent. A node
|
|
22
|
+
* with no siblings (or a root, or a node whose parent is absent from the map)
|
|
23
|
+
* scores 1; the more crowded its branch, the lower the score (3 siblings ⇒ 1/4).
|
|
24
|
+
*
|
|
25
|
+
* @param nodeId the node to evaluate.
|
|
26
|
+
* @param nodes the full lineage keyed by id.
|
|
27
|
+
* @returns rounded novelty score in (0,1].
|
|
28
|
+
*/
|
|
29
|
+
export declare function archiveDiversity(nodeId: string, nodes: Map<string, LineageNode>): number;
|
|
30
|
+
/** Relative weights for the three parent-selection signals. */
|
|
31
|
+
export interface SelectionWeights {
|
|
32
|
+
score?: number;
|
|
33
|
+
potential?: number;
|
|
34
|
+
diversity?: number;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Combined parent-selection score for one node: a weighted blend of its own
|
|
38
|
+
* score, its descendant potential, and its archive diversity. With the default
|
|
39
|
+
* weights a fertile low-scoring branch can out-rank a sterile high-scoring leaf.
|
|
40
|
+
*
|
|
41
|
+
* @param nodeId the node to evaluate.
|
|
42
|
+
* @param nodes the full lineage keyed by id.
|
|
43
|
+
* @param weights optional overrides for the three signal weights.
|
|
44
|
+
* @param topK forwarded to {@link descendantPotential}.
|
|
45
|
+
* @returns rounded selection score, or 0 if the node is absent.
|
|
46
|
+
*/
|
|
47
|
+
export declare function parentSelectionScore(nodeId: string, nodes: Map<string, LineageNode>, weights?: SelectionWeights, topK?: number): number;
|
|
48
|
+
/**
|
|
49
|
+
* Rank ALL node ids by {@link parentSelectionScore} (descending) and return the
|
|
50
|
+
* top `limit`. Tie-break is DETERMINISTIC by id ascending, so the result is
|
|
51
|
+
* stable across runs. A non-positive `limit` yields an empty list.
|
|
52
|
+
*
|
|
53
|
+
* @param nodes the full lineage keyed by id.
|
|
54
|
+
* @param limit how many parents to return.
|
|
55
|
+
* @param weights optional overrides forwarded to {@link parentSelectionScore}.
|
|
56
|
+
* @param topK optional, forwarded to {@link descendantPotential}.
|
|
57
|
+
* @returns the top-`limit` node ids, best first.
|
|
58
|
+
*/
|
|
59
|
+
export declare function selectParentsByPotential(nodes: Map<string, LineageNode>, limit: number, weights?: SelectionWeights, topK?: number): string[];
|
|
60
|
+
//# sourceMappingURL=lineage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lineage.d.ts","sourceRoot":"","sources":["../../src/bench/lineage.ts"],"names":[],"mappings":"AAeA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AA6C9C;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,EAC/B,IAAI,SAAI,GACP,MAAM,CAqBR;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,GAC9B,MAAM,CAYR;AAED,+DAA+D;AAC/D,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD;;;;;;;;;;GAUG;AACH,wBAAgB,oBAAoB,CAClC,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,EAC/B,OAAO,CAAC,EAAE,gBAAgB,EAC1B,IAAI,CAAC,EAAE,MAAM,GACZ,MAAM,CAWR;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,EAC/B,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,gBAAgB,EAC1B,IAAI,CAAC,EAAE,MAAM,GACZ,MAAM,EAAE,CAWV"}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// Lineage selection (ADR-076, §"Descendant-potential-aware selection") — pick
|
|
4
|
+
// the next parents to mutate by the PROMISE OF THE BRANCH, not just the score of
|
|
5
|
+
// the node. A low-scoring node that already spawned high-scoring descendants is
|
|
6
|
+
// a fertile lineage worth re-exploring; a high-scoring leaf with no offspring is
|
|
7
|
+
// a dead end. We blend three signals: the node's own score, the lift its best
|
|
8
|
+
// descendants show over it (descendant potential), and a structural novelty
|
|
9
|
+
// bonus for under-explored branches (archive diversity).
|
|
10
|
+
//
|
|
11
|
+
// Pure functions, no I/O. Every public number is rounded to 6 decimals
|
|
12
|
+
// (ADR-075 reproducibility clause) so re-running on the same map yields a
|
|
13
|
+
// byte-identical, deterministic result. Tie-breaks are by id so ranking is
|
|
14
|
+
// stable across runs. Dependency-free (Node built-ins only).
|
|
15
|
+
/**
|
|
16
|
+
* Round to 6 decimal places. Kills float-representation noise so values are
|
|
17
|
+
* byte-identical across runs. The leading `+` drops any `-0`. Re-implemented
|
|
18
|
+
* locally to keep this module dependency-free.
|
|
19
|
+
*/
|
|
20
|
+
function round6(value) {
|
|
21
|
+
return +(Math.round(value * 1e6) / 1e6).toFixed(6);
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Collect ALL descendant ids of `nodeId` via iterative depth-first search over
|
|
25
|
+
* `children`. CYCLE-GUARDED: a `visited` set ensures a malformed tree (e.g. a
|
|
26
|
+
* cycle, or a node listed twice as a child) can never infinite-loop. The start
|
|
27
|
+
* node itself is not included in the result.
|
|
28
|
+
*
|
|
29
|
+
* @param nodeId the root of the sub-tree to enumerate.
|
|
30
|
+
* @param nodes the full lineage keyed by id.
|
|
31
|
+
* @returns the set of reachable descendant ids (excluding `nodeId`).
|
|
32
|
+
*/
|
|
33
|
+
function collectDescendants(nodeId, nodes) {
|
|
34
|
+
const found = new Set();
|
|
35
|
+
const visited = new Set([nodeId]);
|
|
36
|
+
const start = nodes.get(nodeId);
|
|
37
|
+
if (!start)
|
|
38
|
+
return found;
|
|
39
|
+
const stack = [...start.children];
|
|
40
|
+
while (stack.length > 0) {
|
|
41
|
+
const id = stack.pop();
|
|
42
|
+
if (visited.has(id))
|
|
43
|
+
continue; // cycle / re-visit guard
|
|
44
|
+
visited.add(id);
|
|
45
|
+
const node = nodes.get(id);
|
|
46
|
+
if (!node)
|
|
47
|
+
continue; // dangling child reference — skip
|
|
48
|
+
found.add(id);
|
|
49
|
+
for (const child of node.children) {
|
|
50
|
+
if (!visited.has(child))
|
|
51
|
+
stack.push(child);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return found;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Descendant potential: how much better this node's BEST descendants are than
|
|
58
|
+
* the node itself. We take the top-`topK` descendants by score, average them,
|
|
59
|
+
* and return `avgTop - node.score` (can be negative when descendants regressed).
|
|
60
|
+
* A leaf — or a node whose entire sub-tree is empty — has potential 0.
|
|
61
|
+
*
|
|
62
|
+
* This is the core "best branch beats best agent" signal: a low-scoring node
|
|
63
|
+
* that already produced high-scoring children gets a large positive lift.
|
|
64
|
+
*
|
|
65
|
+
* @param nodeId the node to evaluate.
|
|
66
|
+
* @param nodes the full lineage keyed by id.
|
|
67
|
+
* @param topK how many of the best descendants to average (default 3).
|
|
68
|
+
* @returns rounded `avgTopK(descendantScores) - node.score`, or 0 if no
|
|
69
|
+
* descendants (or the node is absent).
|
|
70
|
+
*/
|
|
71
|
+
export function descendantPotential(nodeId, nodes, topK = 3) {
|
|
72
|
+
const node = nodes.get(nodeId);
|
|
73
|
+
if (!node)
|
|
74
|
+
return 0;
|
|
75
|
+
const descendants = collectDescendants(nodeId, nodes);
|
|
76
|
+
if (descendants.size === 0)
|
|
77
|
+
return 0;
|
|
78
|
+
const scores = [];
|
|
79
|
+
for (const id of descendants) {
|
|
80
|
+
const d = nodes.get(id);
|
|
81
|
+
if (d)
|
|
82
|
+
scores.push(d.score);
|
|
83
|
+
}
|
|
84
|
+
if (scores.length === 0)
|
|
85
|
+
return 0;
|
|
86
|
+
const k = Math.max(1, Math.min(topK, scores.length));
|
|
87
|
+
scores.sort((a, b) => b - a);
|
|
88
|
+
let sum = 0;
|
|
89
|
+
for (let i = 0; i < k; i += 1)
|
|
90
|
+
sum += scores[i];
|
|
91
|
+
const avgTop = sum / k;
|
|
92
|
+
return round6(avgTop - node.score);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Archive diversity: a structural novelty score in [0,1] that rewards
|
|
96
|
+
* UNDER-explored branches. Defined as `1 / (1 + siblingCount)`, where
|
|
97
|
+
* `siblingCount` is the number of OTHER children of this node's parent. A node
|
|
98
|
+
* with no siblings (or a root, or a node whose parent is absent from the map)
|
|
99
|
+
* scores 1; the more crowded its branch, the lower the score (3 siblings ⇒ 1/4).
|
|
100
|
+
*
|
|
101
|
+
* @param nodeId the node to evaluate.
|
|
102
|
+
* @param nodes the full lineage keyed by id.
|
|
103
|
+
* @returns rounded novelty score in (0,1].
|
|
104
|
+
*/
|
|
105
|
+
export function archiveDiversity(nodeId, nodes) {
|
|
106
|
+
const node = nodes.get(nodeId);
|
|
107
|
+
if (!node)
|
|
108
|
+
return 1;
|
|
109
|
+
// Root, or an orphan whose parent is not in the map: maximally novel.
|
|
110
|
+
if (node.parentId == null)
|
|
111
|
+
return 1;
|
|
112
|
+
const parent = nodes.get(node.parentId);
|
|
113
|
+
if (!parent)
|
|
114
|
+
return 1;
|
|
115
|
+
// OTHER children of the parent (exclude this node itself, once).
|
|
116
|
+
const siblingCount = parent.children.filter((c) => c !== nodeId).length;
|
|
117
|
+
return round6(1 / (1 + siblingCount));
|
|
118
|
+
}
|
|
119
|
+
/** ADR-076 default blend: score dominates, but branch promise and novelty count. */
|
|
120
|
+
const DEFAULT_WEIGHTS = {
|
|
121
|
+
score: 0.7,
|
|
122
|
+
potential: 0.2,
|
|
123
|
+
diversity: 0.1,
|
|
124
|
+
};
|
|
125
|
+
/**
|
|
126
|
+
* Combined parent-selection score for one node: a weighted blend of its own
|
|
127
|
+
* score, its descendant potential, and its archive diversity. With the default
|
|
128
|
+
* weights a fertile low-scoring branch can out-rank a sterile high-scoring leaf.
|
|
129
|
+
*
|
|
130
|
+
* @param nodeId the node to evaluate.
|
|
131
|
+
* @param nodes the full lineage keyed by id.
|
|
132
|
+
* @param weights optional overrides for the three signal weights.
|
|
133
|
+
* @param topK forwarded to {@link descendantPotential}.
|
|
134
|
+
* @returns rounded selection score, or 0 if the node is absent.
|
|
135
|
+
*/
|
|
136
|
+
export function parentSelectionScore(nodeId, nodes, weights, topK) {
|
|
137
|
+
const node = nodes.get(nodeId);
|
|
138
|
+
if (!node)
|
|
139
|
+
return 0;
|
|
140
|
+
const w = { ...DEFAULT_WEIGHTS, ...weights };
|
|
141
|
+
const potential = descendantPotential(nodeId, nodes, topK);
|
|
142
|
+
const diversity = archiveDiversity(nodeId, nodes);
|
|
143
|
+
return round6(w.score * node.score + w.potential * potential + w.diversity * diversity);
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Rank ALL node ids by {@link parentSelectionScore} (descending) and return the
|
|
147
|
+
* top `limit`. Tie-break is DETERMINISTIC by id ascending, so the result is
|
|
148
|
+
* stable across runs. A non-positive `limit` yields an empty list.
|
|
149
|
+
*
|
|
150
|
+
* @param nodes the full lineage keyed by id.
|
|
151
|
+
* @param limit how many parents to return.
|
|
152
|
+
* @param weights optional overrides forwarded to {@link parentSelectionScore}.
|
|
153
|
+
* @param topK optional, forwarded to {@link descendantPotential}.
|
|
154
|
+
* @returns the top-`limit` node ids, best first.
|
|
155
|
+
*/
|
|
156
|
+
export function selectParentsByPotential(nodes, limit, weights, topK) {
|
|
157
|
+
if (limit <= 0)
|
|
158
|
+
return [];
|
|
159
|
+
const scored = [...nodes.keys()].map((id) => ({
|
|
160
|
+
id,
|
|
161
|
+
score: parentSelectionScore(id, nodes, weights, topK),
|
|
162
|
+
}));
|
|
163
|
+
scored.sort((a, b) => (b.score - a.score) || (a.id < b.id ? -1 : a.id > b.id ? 1 : 0));
|
|
164
|
+
return scored.slice(0, limit).map((s) => s.id);
|
|
165
|
+
}
|
|
166
|
+
//# sourceMappingURL=lineage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lineage.js","sourceRoot":"","sources":["../../src/bench/lineage.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,8EAA8E;AAC9E,iFAAiF;AACjF,gFAAgF;AAChF,iFAAiF;AACjF,8EAA8E;AAC9E,4EAA4E;AAC5E,yDAAyD;AACzD,EAAE;AACF,uEAAuE;AACvE,0EAA0E;AAC1E,2EAA2E;AAC3E,6DAA6D;AAI7D;;;;GAIG;AACH,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACrD,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,kBAAkB,CACzB,MAAc,EACd,KAA+B;IAE/B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAS,CAAC,MAAM,CAAC,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK;QAAE,OAAO,KAAK,CAAC;IAEzB,MAAM,KAAK,GAAa,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC5C,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,EAAE,GAAG,KAAK,CAAC,GAAG,EAAY,CAAC;QACjC,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,SAAS,CAAC,yBAAyB;QACxD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChB,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC3B,IAAI,CAAC,IAAI;YAAE,SAAS,CAAC,kCAAkC;QACvD,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACd,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,mBAAmB,CACjC,MAAc,EACd,KAA+B,EAC/B,IAAI,GAAG,CAAC;IAER,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC/B,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IAEpB,MAAM,WAAW,GAAG,kBAAkB,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IACtD,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAErC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAC7B,MAAM,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxB,IAAI,CAAC;YAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IACD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAElC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;IACrD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;QAAE,GAAG,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC;IAEvB,OAAO,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;AACrC,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,gBAAgB,CAC9B,MAAc,EACd,KAA+B;IAE/B,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC/B,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IAEpB,sEAAsE;IACtE,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI;QAAE,OAAO,CAAC,CAAC;IACpC,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,CAAC,MAAM;QAAE,OAAO,CAAC,CAAC;IAEtB,iEAAiE;IACjE,MAAM,YAAY,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,OAAO,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC;AACxC,CAAC;AASD,oFAAoF;AACpF,MAAM,eAAe,GAA+B;IAClD,KAAK,EAAE,GAAG;IACV,SAAS,EAAE,GAAG;IACd,SAAS,EAAE,GAAG;CACf,CAAC;AAEF;;;;;;;;;;GAUG;AACH,MAAM,UAAU,oBAAoB,CAClC,MAAc,EACd,KAA+B,EAC/B,OAA0B,EAC1B,IAAa;IAEb,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC/B,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IAEpB,MAAM,CAAC,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAC7C,MAAM,SAAS,GAAG,mBAAmB,CAAC,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAElD,OAAO,MAAM,CACX,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,SAAS,GAAG,SAAS,GAAG,CAAC,CAAC,SAAS,GAAG,SAAS,CACzE,CAAC;AACJ,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,wBAAwB,CACtC,KAA+B,EAC/B,KAAa,EACb,OAA0B,EAC1B,IAAa;IAEb,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IAE1B,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;QAC5C,EAAE;QACF,KAAK,EAAE,oBAAoB,CAAC,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC;KACtD,CAAC,CAAC,CAAC;IAEJ,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEvF,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACjD,CAAC"}
|