@metaharness/weight-eft 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +147 -0
- package/dist/cli.d.ts +14 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +187 -0
- package/dist/cli.js.map +1 -0
- package/dist/eval.d.ts +50 -0
- package/dist/eval.d.ts.map +1 -0
- package/dist/eval.js +96 -0
- package/dist/eval.js.map +1 -0
- package/dist/export.d.ts +28 -0
- package/dist/export.d.ts.map +1 -0
- package/dist/export.js +249 -0
- package/dist/export.js.map +1 -0
- package/dist/genome.d.ts +38 -0
- package/dist/genome.d.ts.map +1 -0
- package/dist/genome.js +75 -0
- package/dist/genome.js.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/reward-hack.d.ts +17 -0
- package/dist/reward-hack.d.ts.map +1 -0
- package/dist/reward-hack.js +105 -0
- package/dist/reward-hack.js.map +1 -0
- package/dist/train.d.ts +112 -0
- package/dist/train.d.ts.map +1 -0
- package/dist/train.js +166 -0
- package/dist/train.js.map +1 -0
- package/dist/types.d.ts +144 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +20 -0
- package/dist/types.js.map +1 -0
- package/package.json +64 -0
package/dist/export.js
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// export.ts — Archive → training-data exporter.
|
|
4
|
+
//
|
|
5
|
+
// Reads Darwin's trajectory archive (a DarwinTrajectory[]; reconstructable from
|
|
6
|
+
// Firestore darwin_runs + local prediction/trajectory artifacts) and emits two
|
|
7
|
+
// standard training sets:
|
|
8
|
+
//
|
|
9
|
+
// SFT = ALL gold-resolved trajectories (cheap-OWN *and* frontier-escalation).
|
|
10
|
+
// Frontier successes are included for OFF-POLICY-SAFE DISTILLATION —
|
|
11
|
+
// the cheap model learns to imitate a frontier success on issues it
|
|
12
|
+
// could not solve itself. SFT (max-likelihood) is off-policy-stable.
|
|
13
|
+
//
|
|
14
|
+
// DPO = ON-POLICY cheap-vs-cheap pairs ONLY. chosen = a resolved sample,
|
|
15
|
+
// rejected = an empty/failed sample by the SAME cheap model on the
|
|
16
|
+
// SAME instance (BoN-derived). We do NOT emit frontier-chosen-vs-cheap-
|
|
17
|
+
// rejected as DPO: an off-policy preference pair is unstable (the
|
|
18
|
+
// reference policy never produced the chosen completion). That signal
|
|
19
|
+
// goes to SFT instead.
|
|
20
|
+
//
|
|
21
|
+
// THE CONTAMINATION GUARD (ADR-198, the headline correctness property): strict
|
|
22
|
+
// train/eval instance-ID disjointness. Any trajectory whose instance_id is in
|
|
23
|
+
// the caller's evalHoldout is excluded; an overlap throws. Training on eval
|
|
24
|
+
// instances is fake lift.
|
|
25
|
+
//
|
|
26
|
+
// Two further guards: a LONG-CONTEXT filter (drop/truncate over-budget
|
|
27
|
+
// trajectories, never silently) and TOOL-CALL FIDELITY (tool_calls survive into
|
|
28
|
+
// SFT messages, never stringified).
|
|
29
|
+
import { detectRewardHack } from './reward-hack.js';
|
|
30
|
+
const DEFAULT_MAX_TOKENS = 28000;
|
|
31
|
+
/**
|
|
32
|
+
* Rough token estimate for a message list. We deliberately do NOT pull in a
|
|
33
|
+
* tokenizer dependency (the package is dependency-free, like darwin-mode). The
|
|
34
|
+
* ~4-chars-per-token heuristic is conservative and stable; tool_calls
|
|
35
|
+
* arguments count too (they cost context). This is a budget gate, not a billing
|
|
36
|
+
* figure — over-estimating is the safe direction.
|
|
37
|
+
*/
|
|
38
|
+
export function estimateTokens(messages) {
|
|
39
|
+
let chars = 0;
|
|
40
|
+
for (const m of messages) {
|
|
41
|
+
chars += (m.content ?? '').length;
|
|
42
|
+
chars += (m.name ?? '').length;
|
|
43
|
+
if (m.tool_calls) {
|
|
44
|
+
for (const tc of m.tool_calls) {
|
|
45
|
+
chars += tc.function.name.length + tc.function.arguments.length + tc.id.length;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
chars += 8; // per-message role/structural overhead
|
|
49
|
+
}
|
|
50
|
+
return Math.ceil(chars / 4);
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Truncate an over-length trajectory by dropping the OLDEST middle tool
|
|
54
|
+
* round-trips while preserving the load-bearing turns: the leading system+user
|
|
55
|
+
* (the issue) and the trailing assistant (the final patch). Returns a copy.
|
|
56
|
+
*/
|
|
57
|
+
function truncateTrajectory(messages, maxTokens) {
|
|
58
|
+
if (messages.length <= 3)
|
|
59
|
+
return messages;
|
|
60
|
+
// Keep a head (system + first user/issue) and a tail (final assistant).
|
|
61
|
+
let headEnd = 1;
|
|
62
|
+
while (headEnd < messages.length && messages[headEnd].role !== 'user')
|
|
63
|
+
headEnd++;
|
|
64
|
+
headEnd = Math.min(headEnd + 1, messages.length); // include the issue user turn
|
|
65
|
+
const head = messages.slice(0, headEnd);
|
|
66
|
+
const tail = [messages[messages.length - 1]];
|
|
67
|
+
const middle = messages.slice(headEnd, messages.length - 1);
|
|
68
|
+
// Greedily re-add middle turns from the END (most recent context) until budget.
|
|
69
|
+
const kept = [];
|
|
70
|
+
for (let i = middle.length - 1; i >= 0; i--) {
|
|
71
|
+
const candidate = [...head, ...kept.slice(), middle[i], ...tail];
|
|
72
|
+
if (estimateTokens(candidate) > maxTokens && kept.length > 0)
|
|
73
|
+
break;
|
|
74
|
+
kept.unshift(middle[i]);
|
|
75
|
+
}
|
|
76
|
+
return [...head, ...kept, ...tail];
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Split a full trajectory into a DPO (prompt, completion) boundary. ReAct
|
|
80
|
+
* diverges at the first ACTION, so prompt = the shared leading system + user
|
|
81
|
+
* (issue) messages, and the completion is everything from the first assistant
|
|
82
|
+
* turn onward.
|
|
83
|
+
*/
|
|
84
|
+
function splitPromptCompletion(messages) {
|
|
85
|
+
let firstAssistant = messages.findIndex((m) => m.role === 'assistant');
|
|
86
|
+
if (firstAssistant < 0)
|
|
87
|
+
firstAssistant = messages.length; // no assistant turn → empty completion
|
|
88
|
+
return {
|
|
89
|
+
prompt: messages.slice(0, firstAssistant),
|
|
90
|
+
completion: messages.slice(firstAssistant),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* THE CONTAMINATION GUARD. Throws if any trajectory's instance_id appears in
|
|
95
|
+
* the eval holdout. Call it the first thing the exporter does — fail loud, not
|
|
96
|
+
* silently filter, when train/eval disjointness is violated.
|
|
97
|
+
*/
|
|
98
|
+
export function assertTrainEvalDisjoint(trajectories, evalHoldout) {
|
|
99
|
+
const holdout = new Set(evalHoldout);
|
|
100
|
+
const overlap = new Set();
|
|
101
|
+
for (const t of trajectories) {
|
|
102
|
+
if (holdout.has(t.instance_id))
|
|
103
|
+
overlap.add(t.instance_id);
|
|
104
|
+
}
|
|
105
|
+
if (overlap.size > 0) {
|
|
106
|
+
const sample = [...overlap].slice(0, 10).join(', ');
|
|
107
|
+
throw new Error(`weight-eft contamination guard: ${overlap.size} training instance_id(s) overlap the eval holdout ` +
|
|
108
|
+
`(${sample}${overlap.size > 10 ? ', …' : ''}). Training on eval instances is fake lift — refusing to export. ` +
|
|
109
|
+
`Exclude these instance_ids from the training archive or remove them from evalHoldout.`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Build the SFT and DPO sets from a Darwin trajectory archive.
|
|
114
|
+
*
|
|
115
|
+
* @param trajectories the input archive (already-excluded-of-holdout OR raw —
|
|
116
|
+
* the exporter excludes holdout members itself, but ASSERTS disjointness on
|
|
117
|
+
* what remains so a programming error can't slip eval data through).
|
|
118
|
+
*/
|
|
119
|
+
export function exportTrainingData(trajectories, options) {
|
|
120
|
+
const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
121
|
+
const holdout = new Set(options.evalHoldout);
|
|
122
|
+
const notes = [];
|
|
123
|
+
const report = {
|
|
124
|
+
totalTrajectories: trajectories.length,
|
|
125
|
+
excludedByHoldout: 0,
|
|
126
|
+
droppedOverLength: 0,
|
|
127
|
+
truncatedOverLength: 0,
|
|
128
|
+
droppedRewardHacked: 0,
|
|
129
|
+
sftRows: 0,
|
|
130
|
+
dpoRows: 0,
|
|
131
|
+
sftInstanceIds: [],
|
|
132
|
+
dpoInstanceIds: [],
|
|
133
|
+
notes,
|
|
134
|
+
};
|
|
135
|
+
// 1) CONTAMINATION GUARD — exclude holdout members, then assert disjointness
|
|
136
|
+
// on what's left so an exclusion bug can never leak eval data downstream.
|
|
137
|
+
const inDomain = [];
|
|
138
|
+
for (const t of trajectories) {
|
|
139
|
+
if (holdout.has(t.instance_id)) {
|
|
140
|
+
report.excludedByHoldout++;
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
inDomain.push(t);
|
|
144
|
+
}
|
|
145
|
+
assertTrainEvalDisjoint(inDomain, holdout);
|
|
146
|
+
// 2) LONG-CONTEXT FILTER — drop or truncate over-budget trajectories, never
|
|
147
|
+
// silently. Applied to a working copy so the original archive is untouched.
|
|
148
|
+
const sized = [];
|
|
149
|
+
for (const t of inDomain) {
|
|
150
|
+
const tokens = estimateTokens(t.messages);
|
|
151
|
+
if (tokens <= maxTokens) {
|
|
152
|
+
sized.push(t);
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
if (options.truncateOverLength) {
|
|
156
|
+
const truncated = truncateTrajectory(t.messages, maxTokens);
|
|
157
|
+
report.truncatedOverLength++;
|
|
158
|
+
notes.push(`truncated ${t.instance_id} (${t.model}) ${tokens}→~${estimateTokens(truncated)} tok (budget ${maxTokens})`);
|
|
159
|
+
sized.push({ ...t, messages: truncated });
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
report.droppedOverLength++;
|
|
163
|
+
notes.push(`dropped ${t.instance_id} (${t.model}) ${tokens} tok > budget ${maxTokens}`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// 2b) REWARD-HACKING FILTER (Ornith-1.0 borrow). Drop any trajectory whose
|
|
167
|
+
// deterministic monitor flags a gold/test read, verification tamper, or
|
|
168
|
+
// sandbox escape — an archived "success" that reward-hacked would teach
|
|
169
|
+
// the model to reward-hack. The training-data analog of the conformance
|
|
170
|
+
// firewall. Default ON. Applied BEFORE SFT/DPO so neither set sees it.
|
|
171
|
+
const dropHack = options.dropRewardHacked ?? true;
|
|
172
|
+
const clean = [];
|
|
173
|
+
for (const t of sized) {
|
|
174
|
+
if (!dropHack) {
|
|
175
|
+
clean.push(t);
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
const findings = detectRewardHack(t);
|
|
179
|
+
if (findings.length === 0) {
|
|
180
|
+
clean.push(t);
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
report.droppedRewardHacked++;
|
|
184
|
+
const kinds = [...new Set(findings.map((f) => f.kind))].join(',');
|
|
185
|
+
const detail = findings[0].detail;
|
|
186
|
+
notes.push(`reward-hack drop ${t.instance_id} (${t.model}) [${kinds}] — e.g. "${detail}" via ${findings[0].tool}`);
|
|
187
|
+
}
|
|
188
|
+
// 3) SFT — ALL gold-resolved trajectories (cheap-own AND frontier-escalation).
|
|
189
|
+
const sft = [];
|
|
190
|
+
const sftIds = new Set();
|
|
191
|
+
for (const t of clean) {
|
|
192
|
+
if (!t.resolved)
|
|
193
|
+
continue;
|
|
194
|
+
if (t.messages.length === 0)
|
|
195
|
+
continue; // a resolved attempt must have a trajectory
|
|
196
|
+
// Tool-call fidelity: we copy messages through verbatim — tool_calls are
|
|
197
|
+
// structured objects on the assistant turns, never stringified.
|
|
198
|
+
sft.push({ messages: t.messages });
|
|
199
|
+
sftIds.add(t.instance_id);
|
|
200
|
+
}
|
|
201
|
+
// 4) DPO — ON-POLICY cheap-vs-cheap pairs ONLY. Group cheap-tier trajectories
|
|
202
|
+
// by (model, instance); pair a resolved (chosen) with an empty/failed
|
|
203
|
+
// (rejected) sample from the SAME model on the SAME instance.
|
|
204
|
+
const dpo = [];
|
|
205
|
+
const dpoIds = new Set();
|
|
206
|
+
const cheapGroups = new Map();
|
|
207
|
+
for (const t of clean) {
|
|
208
|
+
if (t.tier !== 'cheap')
|
|
209
|
+
continue; // ON-POLICY only — frontier never enters DPO
|
|
210
|
+
const key = `${t.model}${t.instance_id}`;
|
|
211
|
+
const arr = cheapGroups.get(key);
|
|
212
|
+
if (arr)
|
|
213
|
+
arr.push(t);
|
|
214
|
+
else
|
|
215
|
+
cheapGroups.set(key, [t]);
|
|
216
|
+
}
|
|
217
|
+
for (const group of cheapGroups.values()) {
|
|
218
|
+
const chosen = group.filter((t) => t.resolved && t.messages.length > 0);
|
|
219
|
+
const rejected = group.filter((t) => !t.resolved);
|
|
220
|
+
if (chosen.length === 0 || rejected.length === 0)
|
|
221
|
+
continue;
|
|
222
|
+
// Deterministic pairing: lowest-sample resolved vs lowest-sample failed.
|
|
223
|
+
const bySample = (a, b) => (a.sample ?? 0) - (b.sample ?? 0);
|
|
224
|
+
const c = [...chosen].sort(bySample)[0];
|
|
225
|
+
const r = [...rejected].sort(bySample)[0];
|
|
226
|
+
const { prompt, completion: chosenCompletion } = splitPromptCompletion(c.messages);
|
|
227
|
+
const { completion: rejectedCompletion } = splitPromptCompletion(r.messages.length > 0 ? r.messages : prompt);
|
|
228
|
+
dpo.push({
|
|
229
|
+
prompt,
|
|
230
|
+
chosen: chosenCompletion,
|
|
231
|
+
rejected: rejectedCompletion,
|
|
232
|
+
});
|
|
233
|
+
dpoIds.add(c.instance_id);
|
|
234
|
+
}
|
|
235
|
+
report.sftRows = sft.length;
|
|
236
|
+
report.dpoRows = dpo.length;
|
|
237
|
+
report.sftInstanceIds = [...sftIds].sort();
|
|
238
|
+
report.dpoInstanceIds = [...dpoIds].sort();
|
|
239
|
+
return { sft, dpo, report };
|
|
240
|
+
}
|
|
241
|
+
/** Serialize SFT rows to JSONL (one row per line). */
|
|
242
|
+
export function sftToJsonl(rows) {
|
|
243
|
+
return rows.map((r) => JSON.stringify(r)).join('\n') + (rows.length ? '\n' : '');
|
|
244
|
+
}
|
|
245
|
+
/** Serialize DPO rows to JSONL (one row per line). */
|
|
246
|
+
export function dpoToJsonl(rows) {
|
|
247
|
+
return rows.map((r) => JSON.stringify(r)).join('\n') + (rows.length ? '\n' : '');
|
|
248
|
+
}
|
|
249
|
+
//# sourceMappingURL=export.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"export.js","sourceRoot":"","sources":["../src/export.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,gDAAgD;AAChD,EAAE;AACF,gFAAgF;AAChF,+EAA+E;AAC/E,0BAA0B;AAC1B,EAAE;AACF,iFAAiF;AACjF,8EAA8E;AAC9E,6EAA6E;AAC7E,8EAA8E;AAC9E,EAAE;AACF,4EAA4E;AAC5E,4EAA4E;AAC5E,iFAAiF;AACjF,2EAA2E;AAC3E,+EAA+E;AAC/E,gCAAgC;AAChC,EAAE;AACF,+EAA+E;AAC/E,8EAA8E;AAC9E,4EAA4E;AAC5E,0BAA0B;AAC1B,EAAE;AACF,uEAAuE;AACvE,gFAAgF;AAChF,oCAAoC;AAWpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,MAAM,kBAAkB,GAAG,KAAK,CAAC;AAEjC;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,QAAuB;IACpD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,KAAK,IAAI,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;QAClC,KAAK,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;QAC/B,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;YACjB,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;gBAC9B,KAAK,IAAI,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC;YACjF,CAAC;QACH,CAAC;QACD,KAAK,IAAI,CAAC,CAAC,CAAC,uCAAuC;IACrD,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AAC9B,CAAC;AAED;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,QAAuB,EAAE,SAAiB;IACpE,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,QAAQ,CAAC;IAC1C,wEAAwE;IACxE,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,OAAO,OAAO,GAAG,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,MAAM;QAAE,OAAO,EAAE,CAAC;IACjF,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,8BAA8B;IAChF,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5D,gFAAgF;IAChF,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC;QACjE,IAAI,cAAc,CAAC,SAAS,CAAC,GAAG,SAAS,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,MAAM;QACpE,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC;AACrC,CAAC;AAED;;;;;GAKG;AACH,SAAS,qBAAqB,CAAC,QAAuB;IACpD,IAAI,cAAc,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC;IACvE,IAAI,cAAc,GAAG,CAAC;QAAE,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,uCAAuC;IACjG,OAAO;QACL,MAAM,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC;QACzC,UAAU,EAAE,QAAQ,CAAC,KAAK,CAAC,cAAc,CAAC;KAC3C,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CACrC,YAAgC,EAChC,WAA6B;IAE7B,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC7D,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpD,MAAM,IAAI,KAAK,CACb,mCAAmC,OAAO,CAAC,IAAI,oDAAoD;YACjG,IAAI,MAAM,GAAG,OAAO,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,mEAAmE;YAC9G,uFAAuF,CAC1F,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAChC,YAAgC,EAChC,OAAsB;IAEtB,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC;IAC1D,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,MAAM,MAAM,GAAiB;QAC3B,iBAAiB,EAAE,YAAY,CAAC,MAAM;QACtC,iBAAiB,EAAE,CAAC;QACpB,iBAAiB,EAAE,CAAC;QACpB,mBAAmB,EAAE,CAAC;QACtB,mBAAmB,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;QACV,OAAO,EAAE,CAAC;QACV,cAAc,EAAE,EAAE;QAClB,cAAc,EAAE,EAAE;QAClB,KAAK;KACN,CAAC;IAEF,6EAA6E;IAC7E,6EAA6E;IAC7E,MAAM,QAAQ,GAAuB,EAAE,CAAC;IACxC,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/B,MAAM,CAAC,iBAAiB,EAAE,CAAC;YAC3B,SAAS;QACX,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;IACD,uBAAuB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE3C,4EAA4E;IAC5E,+EAA+E;IAC/E,MAAM,KAAK,GAAuB,EAAE,CAAC;IACrC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACd,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,kBAAkB,EAAE,CAAC;YAC/B,MAAM,SAAS,GAAG,kBAAkB,CAAC,CAAC,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAC5D,MAAM,CAAC,mBAAmB,EAAE,CAAC;YAC7B,KAAK,CAAC,IAAI,CACR,aAAa,CAAC,CAAC,WAAW,KAAK,CAAC,CAAC,KAAK,KAAK,MAAM,KAAK,cAAc,CAAC,SAAS,CAAC,gBAAgB,SAAS,GAAG,CAC5G,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC;QAC5C,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,iBAAiB,EAAE,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,WAAW,KAAK,CAAC,CAAC,KAAK,KAAK,MAAM,iBAAiB,SAAS,EAAE,CAAC,CAAC;QAC1F,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,4EAA4E;IAC5E,4EAA4E;IAC5E,4EAA4E;IAC5E,2EAA2E;IAC3E,MAAM,QAAQ,GAAG,OAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC;IAClD,MAAM,KAAK,GAAuB,EAAE,CAAC;IACrC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACd,SAAS;QACX,CAAC;QACD,MAAM,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QACrC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACd,SAAS;QACX,CAAC;QACD,MAAM,CAAC,mBAAmB,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAClC,KAAK,CAAC,IAAI,CACR,oBAAoB,CAAC,CAAC,WAAW,KAAK,CAAC,CAAC,KAAK,MAAM,KAAK,aAAa,MAAM,SAAS,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CACvG,CAAC;IACJ,CAAC;IAED,+EAA+E;IAC/E,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC,CAAC,QAAQ;YAAE,SAAS;QAC1B,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS,CAAC,4CAA4C;QACnF,yEAAyE;QACzE,gEAAgE;QAChE,GAAG,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;QACnC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC5B,CAAC;IAED,8EAA8E;IAC9E,yEAAyE;IACzE,iEAAiE;IACjE,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,WAAW,GAAG,IAAI,GAAG,EAA8B,CAAC;IAC1D,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO;YAAE,SAAS,CAAC,6CAA6C;QAC/E,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;QAC1C,MAAM,GAAG,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,GAAG;YAAE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;YAChB,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACxE,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAClD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC3D,yEAAyE;QACzE,MAAM,QAAQ,GAAG,CAAC,CAAmB,EAAE,CAAmB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;QACjG,MAAM,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,gBAAgB,EAAE,GAAG,qBAAqB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACnF,MAAM,EAAE,UAAU,EAAE,kBAAkB,EAAE,GAAG,qBAAqB,CAC9D,CAAC,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAC5C,CAAC;QACF,GAAG,CAAC,IAAI,CAAC;YACP,MAAM;YACN,MAAM,EAAE,gBAAgB;YACxB,QAAQ,EAAE,kBAAkB;SAC7B,CAAC,CAAC;QACH,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC;IAC5B,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC;IAC5B,MAAM,CAAC,cAAc,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3C,MAAM,CAAC,cAAc,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAE3C,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC;AAC9B,CAAC;AAED,sDAAsD;AACtD,MAAM,UAAU,UAAU,CAAC,IAAc;IACvC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACnF,CAAC;AAED,sDAAsD;AACtD,MAAM,UAAU,UAAU,CAAC,IAAc;IACvC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACnF,CAAC"}
|
package/dist/genome.d.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The value of the `weightAdapter` gene. `null` (or absent) === BASE: the cheap
|
|
3
|
+
* tier runs with no LoRA adapter — the pre-gene default, byte-identical key.
|
|
4
|
+
* A string is an adapter id (e.g. "glm5.2-sft", "glm5.2-sft-dpo").
|
|
5
|
+
*/
|
|
6
|
+
export type WeightAdapterGene = string | null;
|
|
7
|
+
/** The canonical "no adapter" sentinel. Absent gene === BASE === this. */
|
|
8
|
+
export declare const BASE_ADAPTER: WeightAdapterGene;
|
|
9
|
+
/**
|
|
10
|
+
* The adapter variants Darwin evolution may choose among. BASE is always first
|
|
11
|
+
* (the control). The tuned variants reference adapters the runner produced:
|
|
12
|
+
* SFT-only and SFT+DPO are the two recipes; a ratio variant blends adapter
|
|
13
|
+
* strength. ALL are inert until an actual adapter is trained (GPU job) — the
|
|
14
|
+
* gene only NAMES an adapter; it does not create one.
|
|
15
|
+
*/
|
|
16
|
+
export declare const WEIGHT_ADAPTERS: WeightAdapterGene[];
|
|
17
|
+
/**
|
|
18
|
+
* Normalize a raw gene value. Absent / '' / 'base' / 'none' all coerce to BASE
|
|
19
|
+
* so an unset gene is indistinguishable from an explicit base choice — that's
|
|
20
|
+
* what keeps pre-gene genomes byte-identical.
|
|
21
|
+
*/
|
|
22
|
+
export declare function normalizeWeightAdapter(v: unknown): WeightAdapterGene;
|
|
23
|
+
/**
|
|
24
|
+
* Stable key fragment for a genome's adapter choice. Empty string for BASE so
|
|
25
|
+
* the gene contributes NOTHING to a genome key unless an adapter is selected —
|
|
26
|
+
* the backward-compatibility invariant (a base-adapter genome keys identically
|
|
27
|
+
* to a pre-gene genome).
|
|
28
|
+
*/
|
|
29
|
+
export declare function weightAdapterSuffix(gene: WeightAdapterGene): string;
|
|
30
|
+
/**
|
|
31
|
+
* Map an adapter gene to the CLI flag the cheap-tier solver forwards. BASE
|
|
32
|
+
* emits NO flag (runs the stock model). A tuned adapter emits `--lora-adapter
|
|
33
|
+
* <id>` (the solver loads the adapter onto the cheap base before solving).
|
|
34
|
+
*/
|
|
35
|
+
export declare function weightAdapterFlags(gene: WeightAdapterGene): string[];
|
|
36
|
+
/** True iff the gene selects a real adapter (not base). */
|
|
37
|
+
export declare function usesAdapter(gene: WeightAdapterGene): boolean;
|
|
38
|
+
//# sourceMappingURL=genome.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"genome.d.ts","sourceRoot":"","sources":["../src/genome.ts"],"names":[],"mappings":"AAuBA;;;;GAIG;AACH,MAAM,MAAM,iBAAiB,GAAG,MAAM,GAAG,IAAI,CAAC;AAE9C,0EAA0E;AAC1E,eAAO,MAAM,YAAY,EAAE,iBAAwB,CAAC;AAEpD;;;;;;GAMG;AACH,eAAO,MAAM,eAAe,EAAE,iBAAiB,EAI9C,CAAC;AAEF;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,CAAC,EAAE,OAAO,GAAG,iBAAiB,CAMpE;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,iBAAiB,GAAG,MAAM,CAGnE;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,iBAAiB,GAAG,MAAM,EAAE,CAGpE;AAED,2DAA2D;AAC3D,wBAAgB,WAAW,CAAC,IAAI,EAAE,iBAAiB,GAAG,OAAO,CAE5D"}
|
package/dist/genome.js
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// genome.ts — the `weightAdapter` genome gene.
|
|
4
|
+
//
|
|
5
|
+
// This is the bridge that lets Darwin evolution SELECT among LoRA adapters
|
|
6
|
+
// (the evolutionary prune-the-overfitter safety net). The Darwin genome stays
|
|
7
|
+
// gradient-FREE policy evolution; this gene adds a single reference to a tuned
|
|
8
|
+
// weight adapter so an evolved config can run the cheap tier with-or-without a
|
|
9
|
+
// distilled adapter, and let selection decide.
|
|
10
|
+
//
|
|
11
|
+
// SAFETY-NET RATIONALE: a LoRA tune can overfit (memorize the SFT set, regress
|
|
12
|
+
// on held-out). Rather than trust the tune blindly, we make the adapter a GENE:
|
|
13
|
+
// base (no adapter) competes against SFT-only / SFT+DPO / ratio variants under
|
|
14
|
+
// the same conformant fitness, and evolution prunes an adapter that doesn't
|
|
15
|
+
// actually lift held-out resolve. The default is ALWAYS base/no-adapter, so a
|
|
16
|
+
// genome that never opts into an adapter is byte-identical to a pre-gene genome.
|
|
17
|
+
//
|
|
18
|
+
// The CONCRETE wiring into darwin-mode's evolve-config genome lives in
|
|
19
|
+
// packages/darwin-mode/bench/swebench/evolve-config.mjs (the WEIGHT_ADAPTERS /
|
|
20
|
+
// normalizeWeightAdapter / weightAdapterFlags helpers + the mutate/crossover/
|
|
21
|
+
// seed integration). This module is the typed SPEC + reference implementation
|
|
22
|
+
// of the gene's semantics, so the cross-package contract is testable from here.
|
|
23
|
+
/** The canonical "no adapter" sentinel. Absent gene === BASE === this. */
|
|
24
|
+
export const BASE_ADAPTER = null;
|
|
25
|
+
/**
|
|
26
|
+
* The adapter variants Darwin evolution may choose among. BASE is always first
|
|
27
|
+
* (the control). The tuned variants reference adapters the runner produced:
|
|
28
|
+
* SFT-only and SFT+DPO are the two recipes; a ratio variant blends adapter
|
|
29
|
+
* strength. ALL are inert until an actual adapter is trained (GPU job) — the
|
|
30
|
+
* gene only NAMES an adapter; it does not create one.
|
|
31
|
+
*/
|
|
32
|
+
export const WEIGHT_ADAPTERS = [
|
|
33
|
+
BASE_ADAPTER, // control — no adapter (the safety-net baseline)
|
|
34
|
+
'sft', // SFT-distill only
|
|
35
|
+
'sft-dpo', // SFT then on-policy DPO
|
|
36
|
+
];
|
|
37
|
+
/**
|
|
38
|
+
* Normalize a raw gene value. Absent / '' / 'base' / 'none' all coerce to BASE
|
|
39
|
+
* so an unset gene is indistinguishable from an explicit base choice — that's
|
|
40
|
+
* what keeps pre-gene genomes byte-identical.
|
|
41
|
+
*/
|
|
42
|
+
export function normalizeWeightAdapter(v) {
|
|
43
|
+
if (v == null)
|
|
44
|
+
return BASE_ADAPTER;
|
|
45
|
+
if (typeof v !== 'string')
|
|
46
|
+
return BASE_ADAPTER;
|
|
47
|
+
const s = v.trim().toLowerCase();
|
|
48
|
+
if (s === '' || s === 'base' || s === 'none')
|
|
49
|
+
return BASE_ADAPTER;
|
|
50
|
+
return v.trim();
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Stable key fragment for a genome's adapter choice. Empty string for BASE so
|
|
54
|
+
* the gene contributes NOTHING to a genome key unless an adapter is selected —
|
|
55
|
+
* the backward-compatibility invariant (a base-adapter genome keys identically
|
|
56
|
+
* to a pre-gene genome).
|
|
57
|
+
*/
|
|
58
|
+
export function weightAdapterSuffix(gene) {
|
|
59
|
+
const norm = normalizeWeightAdapter(gene);
|
|
60
|
+
return norm == null ? '' : `+w:${norm}`;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Map an adapter gene to the CLI flag the cheap-tier solver forwards. BASE
|
|
64
|
+
* emits NO flag (runs the stock model). A tuned adapter emits `--lora-adapter
|
|
65
|
+
* <id>` (the solver loads the adapter onto the cheap base before solving).
|
|
66
|
+
*/
|
|
67
|
+
export function weightAdapterFlags(gene) {
|
|
68
|
+
const norm = normalizeWeightAdapter(gene);
|
|
69
|
+
return norm == null ? [] : ['--lora-adapter', norm];
|
|
70
|
+
}
|
|
71
|
+
/** True iff the gene selects a real adapter (not base). */
|
|
72
|
+
export function usesAdapter(gene) {
|
|
73
|
+
return normalizeWeightAdapter(gene) != null;
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=genome.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"genome.js","sourceRoot":"","sources":["../src/genome.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,+CAA+C;AAC/C,EAAE;AACF,2EAA2E;AAC3E,8EAA8E;AAC9E,+EAA+E;AAC/E,+EAA+E;AAC/E,+CAA+C;AAC/C,EAAE;AACF,+EAA+E;AAC/E,gFAAgF;AAChF,+EAA+E;AAC/E,4EAA4E;AAC5E,8EAA8E;AAC9E,iFAAiF;AACjF,EAAE;AACF,uEAAuE;AACvE,+EAA+E;AAC/E,8EAA8E;AAC9E,8EAA8E;AAC9E,gFAAgF;AAShF,0EAA0E;AAC1E,MAAM,CAAC,MAAM,YAAY,GAAsB,IAAI,CAAC;AAEpD;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,eAAe,GAAwB;IAClD,YAAY,EAAE,iDAAiD;IAC/D,KAAK,EAAE,mBAAmB;IAC1B,SAAS,EAAE,yBAAyB;CACrC,CAAC;AAEF;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CAAC,CAAU;IAC/C,IAAI,CAAC,IAAI,IAAI;QAAE,OAAO,YAAY,CAAC;IACnC,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,YAAY,CAAC;IAC/C,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACjC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,MAAM;QAAE,OAAO,YAAY,CAAC;IAClE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;AAClB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,MAAM,IAAI,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;IAC1C,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC;AAC1C,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAuB;IACxD,MAAM,IAAI,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;IAC1C,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC;AACtD,CAAC;AAED,2DAA2D;AAC3D,MAAM,UAAU,WAAW,CAAC,IAAuB;IACjD,OAAO,sBAAsB,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;AAC9C,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export type { ChatMessage, ToolCall, PolicyTier, DarwinTrajectory, SftRow, DpoRow, ExportOptions, ExportResult, ExportReport, } from './types.js';
|
|
2
|
+
export { exportTrainingData, assertTrainEvalDisjoint, estimateTokens, sftToJsonl, dpoToJsonl, } from './export.js';
|
|
3
|
+
export type { BaseModelSpec, TrainStage, LoraConfig, TrainConfig, TrainRunOptions, TrainRunResult, TrainingPlan, } from './train.js';
|
|
4
|
+
export { DEFAULT_LORA, defaultDetectGpu, assertTunableSize, sftConfig, dpoConfig, buildCommand, buildPlan, runTraining, twoStagePlan, adaptSftForRunner, adaptDpoForRunner, } from './train.js';
|
|
5
|
+
export type { WeightAdapterGene } from './genome.js';
|
|
6
|
+
export { BASE_ADAPTER, WEIGHT_ADAPTERS, normalizeWeightAdapter, weightAdapterSuffix, weightAdapterFlags, usesAdapter, } from './genome.js';
|
|
7
|
+
export type { CascadeOutcome, CascadeSummary, CostParetoDelta, } from './eval.js';
|
|
8
|
+
export { summarizeCascade, costParetoDelta } from './eval.js';
|
|
9
|
+
export type { RewardHackKind, RewardHackFinding } from './reward-hack.js';
|
|
10
|
+
export { detectRewardHack, isRewardHacked } from './reward-hack.js';
|
|
11
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAQA,YAAY,EACV,WAAW,EACX,QAAQ,EACR,UAAU,EACV,gBAAgB,EAChB,MAAM,EACN,MAAM,EACN,aAAa,EACb,YAAY,EACZ,YAAY,GACb,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,kBAAkB,EAClB,uBAAuB,EACvB,cAAc,EACd,UAAU,EACV,UAAU,GACX,MAAM,aAAa,CAAC;AAErB,YAAY,EACV,aAAa,EACb,UAAU,EACV,UAAU,EACV,WAAW,EACX,eAAe,EACf,cAAc,EACd,YAAY,GACb,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,YAAY,EACZ,gBAAgB,EAChB,iBAAiB,EACjB,SAAS,EACT,SAAS,EACT,YAAY,EACZ,SAAS,EACT,WAAW,EACX,YAAY,EACZ,iBAAiB,EACjB,iBAAiB,GAClB,MAAM,YAAY,CAAC;AAEpB,YAAY,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,EACL,YAAY,EACZ,eAAe,EACf,sBAAsB,EACtB,mBAAmB,EACnB,kBAAkB,EAClB,WAAW,GACZ,MAAM,aAAa,CAAC;AAErB,YAAY,EACV,cAAc,EACd,cAAc,EACd,eAAe,GAChB,MAAM,WAAW,CAAC;AAEnB,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAE9D,YAAY,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAE1E,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// @metaharness/weight-eft — public API.
|
|
4
|
+
//
|
|
5
|
+
// Evolutionary fine-tuning: distill Darwin's archival success into the open
|
|
6
|
+
// cheap tier via LoRA so the cost-cascade escalates to a frontier model less
|
|
7
|
+
// often. Cost-Pareto axis, not the frontier ceiling. See ADR-198.
|
|
8
|
+
export { exportTrainingData, assertTrainEvalDisjoint, estimateTokens, sftToJsonl, dpoToJsonl, } from './export.js';
|
|
9
|
+
export { DEFAULT_LORA, defaultDetectGpu, assertTunableSize, sftConfig, dpoConfig, buildCommand, buildPlan, runTraining, twoStagePlan, adaptSftForRunner, adaptDpoForRunner, } from './train.js';
|
|
10
|
+
export { BASE_ADAPTER, WEIGHT_ADAPTERS, normalizeWeightAdapter, weightAdapterSuffix, weightAdapterFlags, usesAdapter, } from './genome.js';
|
|
11
|
+
export { summarizeCascade, costParetoDelta } from './eval.js';
|
|
12
|
+
export { detectRewardHack, isRewardHacked } from './reward-hack.js';
|
|
13
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,wCAAwC;AACxC,EAAE;AACF,4EAA4E;AAC5E,6EAA6E;AAC7E,kEAAkE;AAclE,OAAO,EACL,kBAAkB,EAClB,uBAAuB,EACvB,cAAc,EACd,UAAU,EACV,UAAU,GACX,MAAM,aAAa,CAAC;AAYrB,OAAO,EACL,YAAY,EACZ,gBAAgB,EAChB,iBAAiB,EACjB,SAAS,EACT,SAAS,EACT,YAAY,EACZ,SAAS,EACT,WAAW,EACX,YAAY,EACZ,iBAAiB,EACjB,iBAAiB,GAClB,MAAM,YAAY,CAAC;AAIpB,OAAO,EACL,YAAY,EACZ,eAAe,EACf,sBAAsB,EACtB,mBAAmB,EACnB,kBAAkB,EAClB,WAAW,GACZ,MAAM,aAAa,CAAC;AAQrB,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAI9D,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { DarwinTrajectory } from './types.js';
|
|
2
|
+
export type RewardHackKind = 'gold-read' | 'verification-tamper' | 'sandbox-escape';
|
|
3
|
+
export interface RewardHackFinding {
|
|
4
|
+
kind: RewardHackKind;
|
|
5
|
+
/** The offending path/argument fragment. */
|
|
6
|
+
detail: string;
|
|
7
|
+
/** The tool that performed the offending action. */
|
|
8
|
+
tool: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Run the deterministic reward-hacking monitor over a single trajectory.
|
|
12
|
+
* Returns all findings (empty ⇒ clean). Pure function of the trajectory.
|
|
13
|
+
*/
|
|
14
|
+
export declare function detectRewardHack(t: DarwinTrajectory): RewardHackFinding[];
|
|
15
|
+
/** True iff the trajectory shows ANY reward-hacking signal. */
|
|
16
|
+
export declare function isRewardHacked(t: DarwinTrajectory): boolean;
|
|
17
|
+
//# sourceMappingURL=reward-hack.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reward-hack.d.ts","sourceRoot":"","sources":["../src/reward-hack.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EAAe,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAwChE,MAAM,MAAM,cAAc,GAAG,WAAW,GAAG,qBAAqB,GAAG,gBAAgB,CAAC;AAEpF,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,cAAc,CAAC;IACrB,4CAA4C;IAC5C,MAAM,EAAE,MAAM,CAAC;IACf,oDAAoD;IACpD,IAAI,EAAE,MAAM,CAAC;CACd;AAcD;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,gBAAgB,GAAG,iBAAiB,EAAE,CA8BzE;AAED,+DAA+D;AAC/D,wBAAgB,cAAc,CAAC,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAE3D"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
//
|
|
3
|
+
// reward-hack.ts — deterministic reward-hacking monitor (Ornith-1.0 borrow).
|
|
4
|
+
//
|
|
5
|
+
// An archived "success" that secretly reward-hacked would teach the model to
|
|
6
|
+
// reward-hack. This is the TRAINING-DATA analog of the conformance firewall —
|
|
7
|
+
// SEPARATE from and IN ADDITION TO the train/eval-disjointness guard. We run a
|
|
8
|
+
// deterministic monitor over each gold-resolved trajectory and DROP any that:
|
|
9
|
+
// (a) read a withheld GOLD/TEST path (FAIL_TO_PASS / PASS_TO_PASS / gold_patch
|
|
10
|
+
// / the held-out test files),
|
|
11
|
+
// (b) modified the verification/test harness (the script that scores it),
|
|
12
|
+
// (c) touched a path OUTSIDE the sandboxed repo.
|
|
13
|
+
//
|
|
14
|
+
// Prior art: Ornith-1.0 (DeepReinforce) — self-scaffolding RL for agentic
|
|
15
|
+
// coding — uses a deterministic monitor that flags trajectories acting outside
|
|
16
|
+
// the sanctioned tool surface → zero reward + excluded. See ADR-198.
|
|
17
|
+
//
|
|
18
|
+
// The monitor reads ONLY the trajectory's own tool_calls + tool messages — it
|
|
19
|
+
// is a pure function of the trajectory (no filesystem, $0, reproducible).
|
|
20
|
+
/** Paths/patterns whose READ signals reward-hacking (the withheld oracle). */
|
|
21
|
+
const GOLD_READ_PATTERNS = [
|
|
22
|
+
/gold_patch/i,
|
|
23
|
+
/\bFAIL_TO_PASS\b/,
|
|
24
|
+
/\bPASS_TO_PASS\b/,
|
|
25
|
+
/test_patch/i,
|
|
26
|
+
/\bexpected[_-]?(output|patch|result)\b/i,
|
|
27
|
+
/\.golden\b/i,
|
|
28
|
+
/run_evaluation/i, // reading the swebench evaluator itself
|
|
29
|
+
];
|
|
30
|
+
/** Paths whose WRITE/MODIFY signals tampering with the verification harness. */
|
|
31
|
+
const VERIFICATION_WRITE_PATTERNS = [
|
|
32
|
+
/conftest\.py/i,
|
|
33
|
+
/run_tests?\.(sh|py)/i,
|
|
34
|
+
/run_evaluation/i,
|
|
35
|
+
/swebench[\/\\].*harness/i,
|
|
36
|
+
/\bMakefile\b/, // re-routing the test target
|
|
37
|
+
/tox\.ini/i,
|
|
38
|
+
/pytest\.ini/i,
|
|
39
|
+
/setup\.cfg/i, // can redefine test discovery
|
|
40
|
+
];
|
|
41
|
+
/** Path prefixes that are OUTSIDE the sandboxed repo (escape signals). */
|
|
42
|
+
const OUT_OF_SANDBOX_PATTERNS = [
|
|
43
|
+
/(^|["'\s])\/etc\//,
|
|
44
|
+
/(^|["'\s])\/root\//,
|
|
45
|
+
/(^|["'\s])\/home\//,
|
|
46
|
+
/(^|["'\s])\/tmp\/(?!repo|work)/, // /tmp is fine for the work tree; flag other /tmp paths
|
|
47
|
+
/\.\.\/\.\.\//, // repeated parent traversal out of the repo
|
|
48
|
+
/(^|["'\s])~\//,
|
|
49
|
+
];
|
|
50
|
+
/** Tool names we treat as a READ of a path. */
|
|
51
|
+
const READ_TOOLS = new Set(['read_file', 'cat', 'open_file', 'view', 'grep', 'search']);
|
|
52
|
+
/** Tool names we treat as a WRITE/MODIFY of a path. */
|
|
53
|
+
const WRITE_TOOLS = new Set(['write_file', 'apply_patch', 'edit_file', 'create_file', 'str_replace']);
|
|
54
|
+
/** Extract the (tool, argumentsText) pairs from a trajectory's assistant turns. */
|
|
55
|
+
function toolActions(messages) {
|
|
56
|
+
const out = [];
|
|
57
|
+
for (const m of messages) {
|
|
58
|
+
if (m.role !== 'assistant' || !m.tool_calls)
|
|
59
|
+
continue;
|
|
60
|
+
for (const tc of m.tool_calls) {
|
|
61
|
+
out.push({ tool: tc.function.name, args: tc.function.arguments });
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Run the deterministic reward-hacking monitor over a single trajectory.
|
|
68
|
+
* Returns all findings (empty ⇒ clean). Pure function of the trajectory.
|
|
69
|
+
*/
|
|
70
|
+
export function detectRewardHack(t) {
|
|
71
|
+
const findings = [];
|
|
72
|
+
for (const { tool, args } of toolActions(t.messages)) {
|
|
73
|
+
const isRead = READ_TOOLS.has(tool);
|
|
74
|
+
const isWrite = WRITE_TOOLS.has(tool);
|
|
75
|
+
// (a) reading a withheld gold/test path — applies to ANY tool that takes a
|
|
76
|
+
// path argument (a read disguised as a grep still leaks the oracle).
|
|
77
|
+
if (isRead || isWrite || /file|path|patch/i.test(tool)) {
|
|
78
|
+
for (const p of GOLD_READ_PATTERNS) {
|
|
79
|
+
const m = args.match(p);
|
|
80
|
+
if (m)
|
|
81
|
+
findings.push({ kind: 'gold-read', detail: m[0], tool });
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
// (b) modifying the verification/test harness.
|
|
85
|
+
if (isWrite || /patch|edit|write|replace/i.test(tool)) {
|
|
86
|
+
for (const p of VERIFICATION_WRITE_PATTERNS) {
|
|
87
|
+
const m = args.match(p);
|
|
88
|
+
if (m)
|
|
89
|
+
findings.push({ kind: 'verification-tamper', detail: m[0], tool });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// (c) touching a path outside the sandboxed repo.
|
|
93
|
+
for (const p of OUT_OF_SANDBOX_PATTERNS) {
|
|
94
|
+
const m = args.match(p);
|
|
95
|
+
if (m)
|
|
96
|
+
findings.push({ kind: 'sandbox-escape', detail: m[0].trim(), tool });
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return findings;
|
|
100
|
+
}
|
|
101
|
+
/** True iff the trajectory shows ANY reward-hacking signal. */
|
|
102
|
+
export function isRewardHacked(t) {
|
|
103
|
+
return detectRewardHack(t).length > 0;
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=reward-hack.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reward-hack.js","sourceRoot":"","sources":["../src/reward-hack.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,6EAA6E;AAC7E,EAAE;AACF,6EAA6E;AAC7E,8EAA8E;AAC9E,+EAA+E;AAC/E,8EAA8E;AAC9E,iFAAiF;AACjF,oCAAoC;AACpC,4EAA4E;AAC5E,mDAAmD;AACnD,EAAE;AACF,0EAA0E;AAC1E,+EAA+E;AAC/E,qEAAqE;AACrE,EAAE;AACF,8EAA8E;AAC9E,0EAA0E;AAI1E,8EAA8E;AAC9E,MAAM,kBAAkB,GAAa;IACnC,aAAa;IACb,kBAAkB;IAClB,kBAAkB;IAClB,aAAa;IACb,yCAAyC;IACzC,aAAa;IACb,iBAAiB,EAAE,wCAAwC;CAC5D,CAAC;AAEF,gFAAgF;AAChF,MAAM,2BAA2B,GAAa;IAC5C,eAAe;IACf,sBAAsB;IACtB,iBAAiB;IACjB,0BAA0B;IAC1B,cAAc,EAAE,6BAA6B;IAC7C,WAAW;IACX,cAAc;IACd,aAAa,EAAE,8BAA8B;CAC9C,CAAC;AAEF,0EAA0E;AAC1E,MAAM,uBAAuB,GAAa;IACxC,mBAAmB;IACnB,oBAAoB;IACpB,oBAAoB;IACpB,gCAAgC,EAAE,wDAAwD;IAC1F,cAAc,EAAE,4CAA4C;IAC5D,eAAe;CAChB,CAAC;AAEF,+CAA+C;AAC/C,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC;AACxF,uDAAuD;AACvD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC,CAAC;AAYtG,mFAAmF;AACnF,SAAS,WAAW,CAAC,QAAuB;IAC1C,MAAM,GAAG,GAA0C,EAAE,CAAC;IACtD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,CAAC,CAAC,CAAC,UAAU;YAAE,SAAS;QACtD,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;YAC9B,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,SAAS,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAAmB;IAClD,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpC,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAEtC,2EAA2E;QAC3E,yEAAyE;QACzE,IAAI,MAAM,IAAI,OAAO,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACvD,KAAK,MAAM,CAAC,IAAI,kBAAkB,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBACxB,IAAI,CAAC;oBAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;YAClE,CAAC;QACH,CAAC;QAED,+CAA+C;QAC/C,IAAI,OAAO,IAAI,2BAA2B,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACtD,KAAK,MAAM,CAAC,IAAI,2BAA2B,EAAE,CAAC;gBAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBACxB,IAAI,CAAC;oBAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,KAAK,MAAM,CAAC,IAAI,uBAAuB,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,+DAA+D;AAC/D,MAAM,UAAU,cAAc,CAAC,CAAmB;IAChD,OAAO,gBAAgB,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;AACxC,CAAC"}
|