synergyspec-selfevolving 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -18
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +151 -11
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +114 -866
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/fitness/loss.d.ts +5 -5
- package/dist/core/fitness/loss.js +4 -4
- package/dist/core/project-config.d.ts +2 -0
- package/dist/core/project-config.js +28 -0
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +9 -6
- package/dist/core/self-evolution/index.js +18 -6
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.js +2 -2
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/workflows/learn.d.ts +3 -2
- package/dist/core/templates/workflows/learn.js +18 -16
- package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
- package/dist/core/templates/workflows/self-evolving.js +62 -172
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +1 -1
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
import { toActionSkeleton } from '../trajectory/skeleton.js';
|
|
2
|
+
import { getTrajectoryForChange } from '../trajectory/registry.js';
|
|
3
|
+
import { acquireInFlight, releaseInFlight, currentPolicyVersion, readPolicyLedger, initPolicyLineage, rollbackPolicyVersion, } from './policy/policy-store.js';
|
|
4
|
+
import { appendRejectBufferEntry, readRejectBuffer, } from './policy/reject-buffer.js';
|
|
5
|
+
import { createEpisode, advanceEpisodeStage, writeArmCapture, readEpisode, episodeDir, } from './episode-store.js';
|
|
6
|
+
import { shouldRunCriticAgent, runCriticAgent, } from './critic-agent.js';
|
|
7
|
+
import { runRewardAgent } from './reward-agent.js';
|
|
8
|
+
import { runEvolvingAgent, DEFAULT_EVOLVING_AGENT_EDIT_BUDGET, } from './evolving-agent.js';
|
|
9
|
+
/**
|
|
10
|
+
* Build the 主智能体 MAIN AGENT arm `{transcript?, skeleton?, objective}` from an
|
|
11
|
+
* already-computed learn report's {@link FitnessSample} + the discovered
|
|
12
|
+
* trajectory.
|
|
13
|
+
*
|
|
14
|
+
* REUSES the learn/fitness grading verbatim — it never re-grades:
|
|
15
|
+
* - `objective.passRate` prefers the OBSERVED pass rate
|
|
16
|
+
* (`fitnessSample.trajectoryFacts.observedPassRate` when a runner ran), else
|
|
17
|
+
* the authored `testMetrics.passRate`, else `null` (never fabricated);
|
|
18
|
+
* - `objective.healthPenalty` is `fitnessSample.healthSignal` (the raw
|
|
19
|
+
* "no signal ⇒ null" health reading, distinct from the `?? 0`-defaulted
|
|
20
|
+
* `loss.healthPenalty`);
|
|
21
|
+
* - `objective.loss` is the blended `fitnessSample.loss.loss` (or `null`);
|
|
22
|
+
* - `verified` / `observedStatus` come from `trajectoryFacts`.
|
|
23
|
+
* The shape is byte-identical to {@link ArmObjective} so both arms read uniformly.
|
|
24
|
+
*/
|
|
25
|
+
export async function captureMainArm(opts) {
|
|
26
|
+
const sample = opts.report.fitnessSample;
|
|
27
|
+
const facts = sample?.trajectoryFacts;
|
|
28
|
+
// Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
|
|
29
|
+
// authored test-report summary; null when neither parsed (never fabricated) —
|
|
30
|
+
// the exact precedence learn.ts uses to compute the loss.
|
|
31
|
+
const observedPassRate = facts?.testRunObserved && facts.observedPassRate !== null
|
|
32
|
+
? facts.observedPassRate
|
|
33
|
+
: null;
|
|
34
|
+
const passRate = observedPassRate ?? sample?.testMetrics?.passRate ?? null;
|
|
35
|
+
const objective = {
|
|
36
|
+
passRate,
|
|
37
|
+
...(sample?.testMetrics
|
|
38
|
+
? { testsTotal: sample.testMetrics.total, testsFailed: sample.testMetrics.failed }
|
|
39
|
+
: {}),
|
|
40
|
+
// The raw health signal — the same "no signal ⇒ null" reading learn records,
|
|
41
|
+
// NOT the `?? 0`-defaulted loss.healthPenalty.
|
|
42
|
+
healthPenalty: sample?.healthSignal ?? null,
|
|
43
|
+
// The blended functional⊕health per-change loss; null when no functional signal.
|
|
44
|
+
loss: sample?.loss ? sample.loss.loss : null,
|
|
45
|
+
verified: facts ? facts.verified : false,
|
|
46
|
+
observedStatus: facts ? facts.observedStatus : null,
|
|
47
|
+
measuredAt: new Date().toISOString(),
|
|
48
|
+
};
|
|
49
|
+
// Transcript + skeleton: prefer the handles / the report's own observedRun;
|
|
50
|
+
// re-discover the trajectory's skeleton only when none was handed in. The raw
|
|
51
|
+
// transcript jsonl is the CALLER's to pass (the objective already folds in the
|
|
52
|
+
// graded facts) — we never re-read session files here.
|
|
53
|
+
const transcript = opts.trajectoryHandles?.transcript;
|
|
54
|
+
let skeleton = opts.trajectoryHandles?.skeleton ?? opts.report.observedRun ?? undefined;
|
|
55
|
+
if (skeleton === undefined) {
|
|
56
|
+
const trajectory = opts.trajectoryHandles?.trajectorySource
|
|
57
|
+
? await opts.trajectoryHandles.trajectorySource
|
|
58
|
+
.getTrajectory(opts.changeName)
|
|
59
|
+
.catch(() => null)
|
|
60
|
+
: await getTrajectoryForChange(opts.repoRoot, opts.changeName);
|
|
61
|
+
if (trajectory) {
|
|
62
|
+
const derived = toActionSkeleton(trajectory);
|
|
63
|
+
if (derived)
|
|
64
|
+
skeleton = derived;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return {
|
|
68
|
+
...(transcript !== undefined ? { transcript } : {}),
|
|
69
|
+
...(skeleton !== undefined ? { skeleton } : {}),
|
|
70
|
+
objective,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Derive the deterministic episode id seed up front (same recipe as
|
|
75
|
+
* {@link createEpisode}'s default: `<changeName>-<compact UTC yyyyMMddTHHmmss>`
|
|
76
|
+
* sanitized to `[a-z0-9-]`). Computing it before the lock lets us acquire the
|
|
77
|
+
* in-flight slot WITHOUT first creating an episode dir, so the busy path leaves
|
|
78
|
+
* no inert dir behind.
|
|
79
|
+
*/
|
|
80
|
+
function deriveEpisodeId(changeName, now) {
|
|
81
|
+
const pad = (n, width = 2) => String(n).padStart(width, '0');
|
|
82
|
+
const ts = `${pad(now.getUTCFullYear(), 4)}${pad(now.getUTCMonth() + 1)}${pad(now.getUTCDate())}` +
|
|
83
|
+
`T${pad(now.getUTCHours())}${pad(now.getUTCMinutes())}${pad(now.getUTCSeconds())}`;
|
|
84
|
+
return `${changeName}-${ts}`
|
|
85
|
+
.toLowerCase()
|
|
86
|
+
.replace(/[^a-z0-9-]+/g, '-')
|
|
87
|
+
.replace(/-{2,}/g, '-')
|
|
88
|
+
.replace(/^-+|-+$/g, '');
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Run ONE episode through the loop in the strict, durably-persisted order
|
|
92
|
+
* documented at the top of this module. See {@link RunEpisodeResult}.
|
|
93
|
+
*
|
|
94
|
+
* The in-flight lock is released in a finally guard so a throw mid-episode never
|
|
95
|
+
* wedges the target's slot.
|
|
96
|
+
*/
|
|
97
|
+
export async function runEpisode(opts) {
|
|
98
|
+
const repoRoot = opts.repoRoot;
|
|
99
|
+
const targetId = opts.targetId;
|
|
100
|
+
const now = opts.now ?? new Date();
|
|
101
|
+
const threshold = opts.advantageRollbackThreshold ?? 0;
|
|
102
|
+
const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
|
|
103
|
+
// ── b (pre-lock half): ensure the 单一血统 single lineage exists ────────────
|
|
104
|
+
// initPolicyLineage must precede acquireInFlight: the lock records the lineage
|
|
105
|
+
// head as `sinceVersion`, which requires an initialized lineage.
|
|
106
|
+
if ((await currentPolicyVersion(repoRoot, targetId)) === null) {
|
|
107
|
+
await initPolicyLineage({
|
|
108
|
+
repoRoot,
|
|
109
|
+
targetId,
|
|
110
|
+
...(opts.resolveFiles ? { resolveFiles: opts.resolveFiles } : {}),
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
const policyVersionMain = await currentPolicyVersion(repoRoot, targetId);
|
|
114
|
+
// The episode id is fixed up front (deterministic seed) so the lock entry and
|
|
115
|
+
// the episode dir agree and no inert episode dir is created on the busy path.
|
|
116
|
+
const episodeId = deriveEpisodeId(opts.changeName, now);
|
|
117
|
+
// ── a: acquireInFlight FIRST (one in-flight per target). A non-stale lock for
|
|
118
|
+
// the target ⇒ clean 'busy' result, BEFORE any episode dir is created. ───────
|
|
119
|
+
try {
|
|
120
|
+
await acquireInFlight({ repoRoot, targetId, episodeId, now });
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
return {
|
|
124
|
+
episodeId: null,
|
|
125
|
+
busy: true,
|
|
126
|
+
reason: err instanceof Error ? err.message : String(err),
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
// From here the lock is HELD; release in the finally guard. `lockEpisodeId`
|
|
130
|
+
// tracks WHICH episode id currently owns the lock (re-keyed on a rare
|
|
131
|
+
// collision suffix) so the release always matches the holder.
|
|
132
|
+
let lockEpisodeId = episodeId;
|
|
133
|
+
try {
|
|
134
|
+
// ── b/c: create the episode (with the pre-acquired id). createEpisode
|
|
135
|
+
// collision-suffixes only if the dir already exists; the freshly-acquired
|
|
136
|
+
// lock + deterministic seed make a collision practically impossible, but if
|
|
137
|
+
// it does suffix we re-key the lock to the real id so they stay consistent. ─
|
|
138
|
+
const created = await createEpisode({
|
|
139
|
+
repoRoot,
|
|
140
|
+
changeName: opts.changeName,
|
|
141
|
+
changeDirPath: opts.changeDirPath,
|
|
142
|
+
targetId,
|
|
143
|
+
policyVersionMain,
|
|
144
|
+
episodeId,
|
|
145
|
+
now,
|
|
146
|
+
});
|
|
147
|
+
const realEpisodeId = created.episode.episodeId;
|
|
148
|
+
if (realEpisodeId !== episodeId) {
|
|
149
|
+
await releaseInFlight({ repoRoot, targetId, episodeId }).catch(() => { });
|
|
150
|
+
await acquireInFlight({ repoRoot, targetId, episodeId: realEpisodeId, now });
|
|
151
|
+
lockEpisodeId = realEpisodeId;
|
|
152
|
+
}
|
|
153
|
+
return await runEpisodeAfterCreate({
|
|
154
|
+
...opts,
|
|
155
|
+
episodeId: realEpisodeId,
|
|
156
|
+
now,
|
|
157
|
+
threshold,
|
|
158
|
+
editBudget,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
finally {
|
|
162
|
+
// ── h (lock half): release the in-flight slot ALWAYS, even on error. ───────
|
|
163
|
+
await releaseInFlight({ repoRoot, targetId, episodeId: lockEpisodeId }).catch(() => { });
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* The body of {@link runEpisode} AFTER the episode dir exists and the lock is
|
|
168
|
+
* held: steps c→h (arm capture through close). Split out so {@link runEpisode}'s
|
|
169
|
+
* outer try/finally owns the lock and the collision re-key cleanly.
|
|
170
|
+
*/
|
|
171
|
+
async function runEpisodeAfterCreate(opts) {
|
|
172
|
+
const { repoRoot, targetId, episodeId, threshold, editBudget } = opts;
|
|
173
|
+
let decision = 'kept';
|
|
174
|
+
let advantage = null;
|
|
175
|
+
let baselineSkipped = false;
|
|
176
|
+
let evolution = null;
|
|
177
|
+
// ── c: record the 主智能体 MAIN AGENT arm ────────────────────────────────────
|
|
178
|
+
await writeArmCapture({
|
|
179
|
+
repoRoot,
|
|
180
|
+
episodeId,
|
|
181
|
+
arm: 'main-arm',
|
|
182
|
+
...(opts.mainArm.transcript !== undefined
|
|
183
|
+
? { transcript: { fileName: 'transcript.jsonl', content: opts.mainArm.transcript } }
|
|
184
|
+
: {}),
|
|
185
|
+
...(opts.mainArm.skeleton !== undefined ? { skeleton: opts.mainArm.skeleton } : {}),
|
|
186
|
+
objective: opts.mainArm.objective,
|
|
187
|
+
});
|
|
188
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'main-arm-captured' });
|
|
189
|
+
// ── d: CRITIC AGENT(基线智能体 baseline agent)or skip ───────────────────────
|
|
190
|
+
const shouldCritic = await shouldRunCriticAgent({ repoRoot, targetId });
|
|
191
|
+
if (shouldCritic.run && shouldCritic.baselineVersion !== null) {
|
|
192
|
+
// runCriticAgent advances the episode to 'baseline-arm-captured'.
|
|
193
|
+
await runCriticAgent({
|
|
194
|
+
repoRoot,
|
|
195
|
+
targetId,
|
|
196
|
+
changeName: opts.changeName,
|
|
197
|
+
episodeId,
|
|
198
|
+
baselineVersion: shouldCritic.baselineVersion,
|
|
199
|
+
spawn: opts.spawn,
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
baselineSkipped = true;
|
|
204
|
+
await advanceEpisodeStage({
|
|
205
|
+
repoRoot,
|
|
206
|
+
episodeId,
|
|
207
|
+
stage: 'baseline-skipped',
|
|
208
|
+
patch: { baselineSkippedReason: shouldCritic.reason },
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
// ── e: 奖励智能体 REWARD AGENT — score + diagnosis.json + advance 'scored' ────
|
|
212
|
+
const reward = await runRewardAgent({ repoRoot, episodeId, spawn: opts.spawn });
|
|
213
|
+
const diagnosis = reward.diagnosis;
|
|
214
|
+
advantage = diagnosis.advantage;
|
|
215
|
+
// ── f: DECISION (every step durably persisted before the next) ───────────────
|
|
216
|
+
if (diagnosis.abstained || diagnosis.gaps.length === 0) {
|
|
217
|
+
// 弃权 abstained / no nameable gap → no rollback decision; SKIP evolution.
|
|
218
|
+
decision = 'abstained';
|
|
219
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'abstained' });
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
const badAdvantage = advantage !== null && advantage < threshold;
|
|
223
|
+
const ep = await readEpisode(repoRoot, episodeId);
|
|
224
|
+
const headBeforeRollback = await currentPolicyVersion(repoRoot, targetId);
|
|
225
|
+
// Resolve the rollback target: the policy the CRITIC AGENT reran
|
|
226
|
+
// (`policyVersionBaseline`) when it is a valid EARLIER version, else the
|
|
227
|
+
// version immediately before the head (the prior good policy the bad edit
|
|
228
|
+
// advanced past). `rollbackPolicyVersion` requires `toVersion < head`.
|
|
229
|
+
const rollbackTarget = resolveRollbackTarget(ep.policyVersionBaseline, headBeforeRollback);
|
|
230
|
+
if (badAdvantage && rollbackTarget !== null && headBeforeRollback !== null) {
|
|
231
|
+
// (i) ROLLBACK first — durable on disk. `advantage` is non-null inside the
|
|
232
|
+
// badAdvantage branch; `?? undefined` satisfies the optional `number` param.
|
|
233
|
+
await rollbackPolicyVersion({
|
|
234
|
+
repoRoot,
|
|
235
|
+
targetId,
|
|
236
|
+
episodeId,
|
|
237
|
+
toVersion: rollbackTarget,
|
|
238
|
+
advantage: advantage ?? undefined,
|
|
239
|
+
});
|
|
240
|
+
// (ii) THEN append the 否决缓冲 reject-buffer entry — durable on disk —
|
|
241
|
+
// BEFORE the 演进智能体 EVOLVING AGENT is even called, so the entry written
|
|
242
|
+
// THIS episode is in its fresh-from-disk prompt.
|
|
243
|
+
const rejectEntry = {
|
|
244
|
+
schemaVersion: 1,
|
|
245
|
+
at: new Date().toISOString(),
|
|
246
|
+
episodeId,
|
|
247
|
+
targetId,
|
|
248
|
+
// fromVersion = the version we rolled back TO (the prior good policy);
|
|
249
|
+
// toVersion = the (now rolled-back) version the rejected edit reached.
|
|
250
|
+
fromVersion: rollbackTarget,
|
|
251
|
+
toVersion: headBeforeRollback,
|
|
252
|
+
advantage,
|
|
253
|
+
rewardMain: diagnosis.rewardMain,
|
|
254
|
+
rewardBaseline: diagnosis.rewardBaseline,
|
|
255
|
+
textualGradientTried: diagnosis.textualGradient ?? '',
|
|
256
|
+
editSummary: buildRejectEditSummary(diagnosis),
|
|
257
|
+
reason: 'bad-advantage',
|
|
258
|
+
};
|
|
259
|
+
await appendRejectBufferEntry(repoRoot, rejectEntry);
|
|
260
|
+
decision = 'rolled-back';
|
|
261
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'rolled-back' });
|
|
262
|
+
}
|
|
263
|
+
else {
|
|
264
|
+
// Good advantage, OR no earlier version to roll back to (e.g. head is v0):
|
|
265
|
+
// keep the current head.
|
|
266
|
+
decision = 'kept';
|
|
267
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'kept' });
|
|
268
|
+
}
|
|
269
|
+
// ── g: ONLY AFTER (f) persisted 'rolled-back'/'kept' ───────────────────────
|
|
270
|
+
// runEvolvingAgent reads the reject-buffer FRESH from disk (the entry just
|
|
271
|
+
// written THIS episode is in its prompt). Never parallelized with (f).
|
|
272
|
+
evolution = await runEvolvingAgent({
|
|
273
|
+
repoRoot,
|
|
274
|
+
episodeId,
|
|
275
|
+
targetId,
|
|
276
|
+
editBudget,
|
|
277
|
+
spawn: opts.spawn,
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
// ── h (stage half): advance 'closed' (best-effort) ───────────────────────────
|
|
281
|
+
await closeEpisodeBestEffort(repoRoot, episodeId);
|
|
282
|
+
const newPolicyVersion = await currentPolicyVersion(repoRoot, targetId);
|
|
283
|
+
return { episodeId, baselineSkipped, advantage, decision, evolution, newPolicyVersion };
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Advance the episode to 'closed' from whatever terminal-ish stage it reached
|
|
287
|
+
* (evolved | evolution-refused | abstained), best-effort: a stage that cannot
|
|
288
|
+
* legally reach 'closed' (e.g. the evolving agent was not-spawned, leaving the
|
|
289
|
+
* episode at 'kept'/'rolled-back') is left as-is rather than throwing, so the
|
|
290
|
+
* close never masks the real episode outcome.
|
|
291
|
+
*/
|
|
292
|
+
async function closeEpisodeBestEffort(repoRoot, episodeId) {
|
|
293
|
+
const ep = await readEpisode(repoRoot, episodeId);
|
|
294
|
+
const closable = new Set([
|
|
295
|
+
'evolved',
|
|
296
|
+
'evolution-refused',
|
|
297
|
+
'abstained',
|
|
298
|
+
]);
|
|
299
|
+
if (closable.has(ep.stage)) {
|
|
300
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'closed' });
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Resolve the version a bad-advantage rollback should restore. Prefers the
|
|
305
|
+
* policy the CRITIC AGENT reran (`policyVersionBaseline`) when it is a valid
|
|
306
|
+
* EARLIER version than the head; otherwise the version immediately before the
|
|
307
|
+
* head (the prior good policy the rejected edit advanced past). Returns `null`
|
|
308
|
+
* when there is no earlier version to roll back to (head is v0, or the head is
|
|
309
|
+
* unknown) — the caller then keeps the head.
|
|
310
|
+
*/
|
|
311
|
+
function resolveRollbackTarget(baselineVersion, head) {
|
|
312
|
+
if (head === null || head < 1)
|
|
313
|
+
return null;
|
|
314
|
+
if (baselineVersion !== null &&
|
|
315
|
+
Number.isInteger(baselineVersion) &&
|
|
316
|
+
baselineVersion >= 0 &&
|
|
317
|
+
baselineVersion < head) {
|
|
318
|
+
return baselineVersion;
|
|
319
|
+
}
|
|
320
|
+
return head - 1;
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Derive the 否决缓冲 reject-buffer `editSummary` from the diagnosis. The
|
|
324
|
+
* rejected edit's files were already rolled back (产物即弃), so the durable
|
|
325
|
+
* record is the diagnosed gap files (the edit's intended surface) + the 文本梯度
|
|
326
|
+
* textual gradient that lost (its rationale).
|
|
327
|
+
*/
|
|
328
|
+
function buildRejectEditSummary(diagnosis) {
|
|
329
|
+
const files = uniqueStrings(diagnosis.gaps.map((g) => g.file));
|
|
330
|
+
return {
|
|
331
|
+
files,
|
|
332
|
+
linesAdded: 0,
|
|
333
|
+
linesRemoved: 0,
|
|
334
|
+
rationaleExcerpt: (diagnosis.textualGradient ?? '').slice(0, 280),
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
function uniqueStrings(values) {
|
|
338
|
+
const seen = new Set();
|
|
339
|
+
const out = [];
|
|
340
|
+
for (const v of values) {
|
|
341
|
+
if (v.length === 0 || seen.has(v))
|
|
342
|
+
continue;
|
|
343
|
+
seen.add(v);
|
|
344
|
+
out.push(v);
|
|
345
|
+
}
|
|
346
|
+
return out;
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Append a 'bad-advantage' reject-buffer entry for this episode UNLESS one is
|
|
350
|
+
* already on disk for this episodeId. Idempotent so a crash-resume that re-enters
|
|
351
|
+
* the decision (or re-enters AFTER the rollback was applied but BEFORE/AFTER the
|
|
352
|
+
* reject-buffer write) never duplicates the rejection record — the 演进智能体
|
|
353
|
+
* EVOLVING AGENT must see exactly one entry per rejected edit. The 文本梯度
|
|
354
|
+
* textual gradient written here is the diagnosis's, so a freshly-written entry on
|
|
355
|
+
* resume carries the same gradient the in-process path would have.
|
|
356
|
+
*/
|
|
357
|
+
async function ensureRejectBufferEntry(repoRoot, opts) {
|
|
358
|
+
const existing = await readRejectBuffer(repoRoot, opts.targetId);
|
|
359
|
+
if (existing.some((e) => e.episodeId === opts.episodeId))
|
|
360
|
+
return;
|
|
361
|
+
const entry = {
|
|
362
|
+
schemaVersion: 1,
|
|
363
|
+
at: new Date().toISOString(),
|
|
364
|
+
episodeId: opts.episodeId,
|
|
365
|
+
targetId: opts.targetId,
|
|
366
|
+
fromVersion: opts.fromVersion,
|
|
367
|
+
toVersion: opts.toVersion,
|
|
368
|
+
advantage: opts.advantage,
|
|
369
|
+
rewardMain: opts.diagnosis.rewardMain,
|
|
370
|
+
rewardBaseline: opts.diagnosis.rewardBaseline,
|
|
371
|
+
textualGradientTried: opts.diagnosis.textualGradient ?? '',
|
|
372
|
+
editSummary: buildRejectEditSummary(opts.diagnosis),
|
|
373
|
+
reason: 'bad-advantage',
|
|
374
|
+
};
|
|
375
|
+
await appendRejectBufferEntry(repoRoot, entry);
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Re-enter a partially-run episode at its recorded stage and idempotently run
|
|
379
|
+
* the REMAINING steps. Best-effort — used by the CLI `episode resume`. The
|
|
380
|
+
* episode stage machine is monotonic, so this picks up from the first not-yet-
|
|
381
|
+
* done step rather than re-advancing a stage already entered:
|
|
382
|
+
*
|
|
383
|
+
* - 'scored' → run the decision (f) then the 演进智能体 (g).
|
|
384
|
+
* - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
|
|
385
|
+
* - 'evolved'/'evolution-refused'/'abstained' → close.
|
|
386
|
+
* - earlier stages → not auto-resumable here (the arms / reward
|
|
387
|
+
* agent need their own re-entry); reported as-is.
|
|
388
|
+
*
|
|
389
|
+
* NOTE: resume does NOT re-acquire the in-flight lock — the original
|
|
390
|
+
* {@link runEpisode} already released it; a resume is an operator-driven
|
|
391
|
+
* recovery, not a concurrent run.
|
|
392
|
+
*/
|
|
393
|
+
export async function resumeEpisode(opts) {
|
|
394
|
+
const { repoRoot, episodeId } = opts;
|
|
395
|
+
const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
|
|
396
|
+
const threshold = opts.advantageRollbackThreshold ?? 0;
|
|
397
|
+
const ep = await readEpisode(repoRoot, episodeId);
|
|
398
|
+
const resumedFrom = ep.stage;
|
|
399
|
+
const targetId = ep.targetId;
|
|
400
|
+
let evolution = null;
|
|
401
|
+
if (ep.stage === 'scored') {
|
|
402
|
+
// Re-run the decision (f) from the on-disk diagnosis, then (g).
|
|
403
|
+
const diagnosis = await readDiagnosisForResume(repoRoot, episodeId);
|
|
404
|
+
if (diagnosis === null || diagnosis.abstained || diagnosis.gaps.length === 0) {
|
|
405
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'abstained' });
|
|
406
|
+
}
|
|
407
|
+
else {
|
|
408
|
+
const advantage = diagnosis.advantage;
|
|
409
|
+
const badAdvantage = advantage !== null && advantage < threshold;
|
|
410
|
+
// Crash-resume dedup: a 'rollback' ledger entry whose episodeId is THIS
|
|
411
|
+
// episode means runEpisode already applied the rollback before the host
|
|
412
|
+
// crashed (the rollback head is monotonic — re-calling rollbackPolicyVersion
|
|
413
|
+
// would stack a SECOND, duplicate rollback version). When present, reuse its
|
|
414
|
+
// recorded version axis and SKIP the re-rollback; only ensure the
|
|
415
|
+
// reject-buffer entry + the 'rolled-back' stage advance complete.
|
|
416
|
+
const ledger = await readPolicyLedger(repoRoot, targetId);
|
|
417
|
+
const priorRollback = ledger.find((e) => e.action === 'rollback' && e.episodeId === episodeId);
|
|
418
|
+
if (priorRollback) {
|
|
419
|
+
// The prior rollback already advanced to `priorRollback.version`, rolling
|
|
420
|
+
// FORWARD to the content of the version immediately before the rejected
|
|
421
|
+
// edit's head. Reconstruct the reject-buffer axis from that entry:
|
|
422
|
+
// toVersion = the (rolled-back) version the rejected edit reached
|
|
423
|
+
// = priorRollback.version - 1 (the head before the rollback)
|
|
424
|
+
// fromVersion = the prior good policy restored (one before that head)
|
|
425
|
+
const toVersion = priorRollback.version - 1;
|
|
426
|
+
const fromVersion = resolveRollbackTarget(ep.policyVersionBaseline, toVersion);
|
|
427
|
+
await ensureRejectBufferEntry(repoRoot, {
|
|
428
|
+
episodeId,
|
|
429
|
+
targetId,
|
|
430
|
+
fromVersion: fromVersion ?? toVersion,
|
|
431
|
+
toVersion,
|
|
432
|
+
advantage,
|
|
433
|
+
diagnosis,
|
|
434
|
+
});
|
|
435
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'rolled-back' });
|
|
436
|
+
}
|
|
437
|
+
else {
|
|
438
|
+
const headBeforeRollback = await currentPolicyVersion(repoRoot, targetId);
|
|
439
|
+
const rollbackTarget = resolveRollbackTarget(ep.policyVersionBaseline, headBeforeRollback);
|
|
440
|
+
if (badAdvantage && rollbackTarget !== null && headBeforeRollback !== null) {
|
|
441
|
+
await rollbackPolicyVersion({
|
|
442
|
+
repoRoot,
|
|
443
|
+
targetId,
|
|
444
|
+
episodeId,
|
|
445
|
+
toVersion: rollbackTarget,
|
|
446
|
+
advantage: advantage ?? undefined,
|
|
447
|
+
});
|
|
448
|
+
await ensureRejectBufferEntry(repoRoot, {
|
|
449
|
+
episodeId,
|
|
450
|
+
targetId,
|
|
451
|
+
fromVersion: rollbackTarget,
|
|
452
|
+
toVersion: headBeforeRollback,
|
|
453
|
+
advantage,
|
|
454
|
+
diagnosis,
|
|
455
|
+
});
|
|
456
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'rolled-back' });
|
|
457
|
+
}
|
|
458
|
+
else {
|
|
459
|
+
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'kept' });
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
evolution = await runEvolvingAgent({
|
|
463
|
+
repoRoot,
|
|
464
|
+
episodeId,
|
|
465
|
+
targetId,
|
|
466
|
+
editBudget,
|
|
467
|
+
spawn: opts.spawn,
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
await closeEpisodeBestEffort(repoRoot, episodeId);
|
|
471
|
+
}
|
|
472
|
+
else if (ep.stage === 'rolled-back' || ep.stage === 'kept') {
|
|
473
|
+
evolution = await runEvolvingAgent({
|
|
474
|
+
repoRoot,
|
|
475
|
+
episodeId,
|
|
476
|
+
targetId,
|
|
477
|
+
editBudget,
|
|
478
|
+
spawn: opts.spawn,
|
|
479
|
+
});
|
|
480
|
+
await closeEpisodeBestEffort(repoRoot, episodeId);
|
|
481
|
+
}
|
|
482
|
+
else if (ep.stage === 'evolved' ||
|
|
483
|
+
ep.stage === 'evolution-refused' ||
|
|
484
|
+
ep.stage === 'abstained') {
|
|
485
|
+
await closeEpisodeBestEffort(repoRoot, episodeId);
|
|
486
|
+
}
|
|
487
|
+
// earlier stages: not auto-resumable here — reported as-is.
|
|
488
|
+
const after = await readEpisode(repoRoot, episodeId);
|
|
489
|
+
return { episodeId, resumedFrom, stage: after.stage, evolution };
|
|
490
|
+
}
|
|
491
|
+
/**
|
|
492
|
+
* Read the episode's diagnosis.json for resume's decision step, normalized to
|
|
493
|
+
* the minimal slice the decision needs. Returns `null` when unreadable (a
|
|
494
|
+
* missing/corrupt diagnosis ⇒ treat as abstain, the same fail-safe the EVOLVING
|
|
495
|
+
* AGENT's reader uses).
|
|
496
|
+
*/
|
|
497
|
+
async function readDiagnosisForResume(repoRoot, episodeId) {
|
|
498
|
+
const { promises: fs } = await import('node:fs');
|
|
499
|
+
const pathMod = await import('node:path');
|
|
500
|
+
const file = pathMod.join(episodeDir(repoRoot, episodeId), 'diagnosis.json');
|
|
501
|
+
let raw;
|
|
502
|
+
try {
|
|
503
|
+
raw = await fs.readFile(file, 'utf8');
|
|
504
|
+
}
|
|
505
|
+
catch {
|
|
506
|
+
return null;
|
|
507
|
+
}
|
|
508
|
+
let parsed;
|
|
509
|
+
try {
|
|
510
|
+
parsed = JSON.parse(raw);
|
|
511
|
+
}
|
|
512
|
+
catch {
|
|
513
|
+
return null;
|
|
514
|
+
}
|
|
515
|
+
const gaps = Array.isArray(parsed.gaps)
|
|
516
|
+
? parsed.gaps.flatMap((g) => {
|
|
517
|
+
const o = (g && typeof g === 'object' ? g : {});
|
|
518
|
+
return typeof o.file === 'string' &&
|
|
519
|
+
typeof o.section === 'string' &&
|
|
520
|
+
typeof o.description === 'string'
|
|
521
|
+
? [{ file: o.file, section: o.section, description: o.description }]
|
|
522
|
+
: [];
|
|
523
|
+
})
|
|
524
|
+
: [];
|
|
525
|
+
return {
|
|
526
|
+
abstained: parsed.abstained === true,
|
|
527
|
+
gaps,
|
|
528
|
+
advantage: typeof parsed.advantage === 'number' ? parsed.advantage : null,
|
|
529
|
+
rewardMain: typeof parsed.rewardMain === 'number' ? parsed.rewardMain : 0,
|
|
530
|
+
rewardBaseline: typeof parsed.rewardBaseline === 'number' ? parsed.rewardBaseline : null,
|
|
531
|
+
textualGradient: typeof parsed.textualGradient === 'string' ? parsed.textualGradient : null,
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
//# sourceMappingURL=episode-orchestrator.js.map
|