synergyspec-selfevolving 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +151 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +423 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/project-config.d.ts +2 -0
  15. package/dist/core/project-config.js +28 -0
  16. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  17. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  18. package/dist/core/self-evolution/candidates.d.ts +0 -9
  19. package/dist/core/self-evolution/critic-agent.d.ts +150 -0
  20. package/dist/core/self-evolution/critic-agent.js +487 -0
  21. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  22. package/dist/core/self-evolution/edits-contract.js +89 -0
  23. package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
  24. package/dist/core/self-evolution/episode-orchestrator.js +534 -0
  25. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  26. package/dist/core/self-evolution/episode-store.js +573 -0
  27. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  28. package/dist/core/self-evolution/evolution-switches.js +5 -10
  29. package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
  30. package/dist/core/self-evolution/evolving-agent.js +449 -0
  31. package/dist/core/self-evolution/host-harness.d.ts +1 -2
  32. package/dist/core/self-evolution/host-harness.js +1 -2
  33. package/dist/core/self-evolution/index.d.ts +9 -6
  34. package/dist/core/self-evolution/index.js +18 -6
  35. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  36. package/dist/core/self-evolution/line-diff.js +130 -0
  37. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  38. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  39. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  40. package/dist/core/self-evolution/policy/index.js +13 -0
  41. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  42. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  43. package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
  44. package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
  45. package/dist/core/self-evolution/promote.d.ts +1 -1
  46. package/dist/core/self-evolution/promote.js +6 -33
  47. package/dist/core/self-evolution/promotion.js +1 -2
  48. package/dist/core/self-evolution/reward-agent.d.ts +234 -0
  49. package/dist/core/self-evolution/reward-agent.js +564 -0
  50. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  51. package/dist/core/self-evolution/scope-gate.js +107 -0
  52. package/dist/core/self-evolution/success-channel.js +2 -2
  53. package/dist/core/self-evolution/tool-evolution.js +2 -13
  54. package/dist/core/self-evolution/verdict.d.ts +8 -5
  55. package/dist/core/self-evolution/verdict.js +4 -7
  56. package/dist/core/templates/workflows/learn.d.ts +3 -2
  57. package/dist/core/templates/workflows/learn.js +18 -16
  58. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  59. package/dist/core/templates/workflows/self-evolving.js +62 -172
  60. package/dist/dashboard/data.d.ts +25 -51
  61. package/dist/dashboard/data.js +68 -180
  62. package/dist/dashboard/react-client.js +458 -503
  63. package/dist/dashboard/react-styles.js +3 -3
  64. package/dist/dashboard/server.js +23 -17
  65. package/dist/ui/ascii-patterns.d.ts +7 -15
  66. package/dist/ui/ascii-patterns.js +123 -54
  67. package/dist/ui/welcome-screen.d.ts +0 -14
  68. package/dist/ui/welcome-screen.js +16 -35
  69. package/package.json +1 -1
@@ -0,0 +1,534 @@
1
+ import { toActionSkeleton } from '../trajectory/skeleton.js';
2
+ import { getTrajectoryForChange } from '../trajectory/registry.js';
3
+ import { acquireInFlight, releaseInFlight, currentPolicyVersion, readPolicyLedger, initPolicyLineage, rollbackPolicyVersion, } from './policy/policy-store.js';
4
+ import { appendRejectBufferEntry, readRejectBuffer, } from './policy/reject-buffer.js';
5
+ import { createEpisode, advanceEpisodeStage, writeArmCapture, readEpisode, episodeDir, } from './episode-store.js';
6
+ import { shouldRunCriticAgent, runCriticAgent, } from './critic-agent.js';
7
+ import { runRewardAgent } from './reward-agent.js';
8
+ import { runEvolvingAgent, DEFAULT_EVOLVING_AGENT_EDIT_BUDGET, } from './evolving-agent.js';
9
+ /**
10
+ * Build the 主智能体 MAIN AGENT arm `{transcript?, skeleton?, objective}` from an
11
+ * already-computed learn report's {@link FitnessSample} + the discovered
12
+ * trajectory.
13
+ *
14
+ * REUSES the learn/fitness grading verbatim — it never re-grades:
15
+ * - `objective.passRate` prefers the OBSERVED pass rate
16
+ * (`fitnessSample.trajectoryFacts.observedPassRate` when a runner ran), else
17
+ * the authored `testMetrics.passRate`, else `null` (never fabricated);
18
+ * - `objective.healthPenalty` is `fitnessSample.healthSignal` (the raw
19
+ * "no signal ⇒ null" health reading, distinct from the `?? 0`-defaulted
20
+ * `loss.healthPenalty`);
21
+ * - `objective.loss` is the blended `fitnessSample.loss.loss` (or `null`);
22
+ * - `verified` / `observedStatus` come from `trajectoryFacts`.
23
+ * The shape is byte-identical to {@link ArmObjective} so both arms read uniformly.
24
+ */
25
+ export async function captureMainArm(opts) {
26
+ const sample = opts.report.fitnessSample;
27
+ const facts = sample?.trajectoryFacts;
28
+ // Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
29
+ // authored test-report summary; null when neither parsed (never fabricated) —
30
+ // the exact precedence learn.ts uses to compute the loss.
31
+ const observedPassRate = facts?.testRunObserved && facts.observedPassRate !== null
32
+ ? facts.observedPassRate
33
+ : null;
34
+ const passRate = observedPassRate ?? sample?.testMetrics?.passRate ?? null;
35
+ const objective = {
36
+ passRate,
37
+ ...(sample?.testMetrics
38
+ ? { testsTotal: sample.testMetrics.total, testsFailed: sample.testMetrics.failed }
39
+ : {}),
40
+ // The raw health signal — the same "no signal ⇒ null" reading learn records,
41
+ // NOT the `?? 0`-defaulted loss.healthPenalty.
42
+ healthPenalty: sample?.healthSignal ?? null,
43
+ // The blended functional⊕health per-change loss; null when no functional signal.
44
+ loss: sample?.loss ? sample.loss.loss : null,
45
+ verified: facts ? facts.verified : false,
46
+ observedStatus: facts ? facts.observedStatus : null,
47
+ measuredAt: new Date().toISOString(),
48
+ };
49
+ // Transcript + skeleton: prefer the handles / the report's own observedRun;
50
+ // re-discover the trajectory's skeleton only when none was handed in. The raw
51
+ // transcript jsonl is the CALLER's to pass (the objective already folds in the
52
+ // graded facts) — we never re-read session files here.
53
+ const transcript = opts.trajectoryHandles?.transcript;
54
+ let skeleton = opts.trajectoryHandles?.skeleton ?? opts.report.observedRun ?? undefined;
55
+ if (skeleton === undefined) {
56
+ const trajectory = opts.trajectoryHandles?.trajectorySource
57
+ ? await opts.trajectoryHandles.trajectorySource
58
+ .getTrajectory(opts.changeName)
59
+ .catch(() => null)
60
+ : await getTrajectoryForChange(opts.repoRoot, opts.changeName);
61
+ if (trajectory) {
62
+ const derived = toActionSkeleton(trajectory);
63
+ if (derived)
64
+ skeleton = derived;
65
+ }
66
+ }
67
+ return {
68
+ ...(transcript !== undefined ? { transcript } : {}),
69
+ ...(skeleton !== undefined ? { skeleton } : {}),
70
+ objective,
71
+ };
72
+ }
73
+ /**
74
+ * Derive the deterministic episode id seed up front (same recipe as
75
+ * {@link createEpisode}'s default: `<changeName>-<compact UTC yyyyMMddTHHmmss>`
76
+ * sanitized to `[a-z0-9-]`). Computing it before the lock lets us acquire the
77
+ * in-flight slot WITHOUT first creating an episode dir, so the busy path leaves
78
+ * no inert dir behind.
79
+ */
80
+ function deriveEpisodeId(changeName, now) {
81
+ const pad = (n, width = 2) => String(n).padStart(width, '0');
82
+ const ts = `${pad(now.getUTCFullYear(), 4)}${pad(now.getUTCMonth() + 1)}${pad(now.getUTCDate())}` +
83
+ `T${pad(now.getUTCHours())}${pad(now.getUTCMinutes())}${pad(now.getUTCSeconds())}`;
84
+ return `${changeName}-${ts}`
85
+ .toLowerCase()
86
+ .replace(/[^a-z0-9-]+/g, '-')
87
+ .replace(/-{2,}/g, '-')
88
+ .replace(/^-+|-+$/g, '');
89
+ }
90
+ /**
91
+ * Run ONE episode through the loop in the strict, durably-persisted order
92
+ * documented at the top of this module. See {@link RunEpisodeResult}.
93
+ *
94
+ * The in-flight lock is released in a finally guard so a throw mid-episode never
95
+ * wedges the target's slot.
96
+ */
97
+ export async function runEpisode(opts) {
98
+ const repoRoot = opts.repoRoot;
99
+ const targetId = opts.targetId;
100
+ const now = opts.now ?? new Date();
101
+ const threshold = opts.advantageRollbackThreshold ?? 0;
102
+ const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
103
+ // ── b (pre-lock half): ensure the 单一血统 single lineage exists ────────────
104
+ // initPolicyLineage must precede acquireInFlight: the lock records the lineage
105
+ // head as `sinceVersion`, which requires an initialized lineage.
106
+ if ((await currentPolicyVersion(repoRoot, targetId)) === null) {
107
+ await initPolicyLineage({
108
+ repoRoot,
109
+ targetId,
110
+ ...(opts.resolveFiles ? { resolveFiles: opts.resolveFiles } : {}),
111
+ });
112
+ }
113
+ const policyVersionMain = await currentPolicyVersion(repoRoot, targetId);
114
+ // The episode id is fixed up front (deterministic seed) so the lock entry and
115
+ // the episode dir agree and no inert episode dir is created on the busy path.
116
+ const episodeId = deriveEpisodeId(opts.changeName, now);
117
+ // ── a: acquireInFlight FIRST (one in-flight per target). A non-stale lock for
118
+ // the target ⇒ clean 'busy' result, BEFORE any episode dir is created. ───────
119
+ try {
120
+ await acquireInFlight({ repoRoot, targetId, episodeId, now });
121
+ }
122
+ catch (err) {
123
+ return {
124
+ episodeId: null,
125
+ busy: true,
126
+ reason: err instanceof Error ? err.message : String(err),
127
+ };
128
+ }
129
+ // From here the lock is HELD; release in the finally guard. `lockEpisodeId`
130
+ // tracks WHICH episode id currently owns the lock (re-keyed on a rare
131
+ // collision suffix) so the release always matches the holder.
132
+ let lockEpisodeId = episodeId;
133
+ try {
134
+ // ── b/c: create the episode (with the pre-acquired id). createEpisode
135
+ // collision-suffixes only if the dir already exists; the freshly-acquired
136
+ // lock + deterministic seed make a collision practically impossible, but if
137
+ // it does suffix we re-key the lock to the real id so they stay consistent. ─
138
+ const created = await createEpisode({
139
+ repoRoot,
140
+ changeName: opts.changeName,
141
+ changeDirPath: opts.changeDirPath,
142
+ targetId,
143
+ policyVersionMain,
144
+ episodeId,
145
+ now,
146
+ });
147
+ const realEpisodeId = created.episode.episodeId;
148
+ if (realEpisodeId !== episodeId) {
149
+ await releaseInFlight({ repoRoot, targetId, episodeId }).catch(() => { });
150
+ await acquireInFlight({ repoRoot, targetId, episodeId: realEpisodeId, now });
151
+ lockEpisodeId = realEpisodeId;
152
+ }
153
+ return await runEpisodeAfterCreate({
154
+ ...opts,
155
+ episodeId: realEpisodeId,
156
+ now,
157
+ threshold,
158
+ editBudget,
159
+ });
160
+ }
161
+ finally {
162
+ // ── h (lock half): release the in-flight slot ALWAYS, even on error. ───────
163
+ await releaseInFlight({ repoRoot, targetId, episodeId: lockEpisodeId }).catch(() => { });
164
+ }
165
+ }
166
+ /**
167
+ * The body of {@link runEpisode} AFTER the episode dir exists and the lock is
168
+ * held: steps c→h (arm capture through close). Split out so {@link runEpisode}'s
169
+ * outer try/finally owns the lock and the collision re-key cleanly.
170
+ */
171
+ async function runEpisodeAfterCreate(opts) {
172
+ const { repoRoot, targetId, episodeId, threshold, editBudget } = opts;
173
+ let decision = 'kept';
174
+ let advantage = null;
175
+ let baselineSkipped = false;
176
+ let evolution = null;
177
+ // ── c: record the 主智能体 MAIN AGENT arm ────────────────────────────────────
178
+ await writeArmCapture({
179
+ repoRoot,
180
+ episodeId,
181
+ arm: 'main-arm',
182
+ ...(opts.mainArm.transcript !== undefined
183
+ ? { transcript: { fileName: 'transcript.jsonl', content: opts.mainArm.transcript } }
184
+ : {}),
185
+ ...(opts.mainArm.skeleton !== undefined ? { skeleton: opts.mainArm.skeleton } : {}),
186
+ objective: opts.mainArm.objective,
187
+ });
188
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'main-arm-captured' });
189
+ // ── d: CRITIC AGENT(基线智能体 baseline agent)or skip ───────────────────────
190
+ const shouldCritic = await shouldRunCriticAgent({ repoRoot, targetId });
191
+ if (shouldCritic.run && shouldCritic.baselineVersion !== null) {
192
+ // runCriticAgent advances the episode to 'baseline-arm-captured'.
193
+ await runCriticAgent({
194
+ repoRoot,
195
+ targetId,
196
+ changeName: opts.changeName,
197
+ episodeId,
198
+ baselineVersion: shouldCritic.baselineVersion,
199
+ spawn: opts.spawn,
200
+ });
201
+ }
202
+ else {
203
+ baselineSkipped = true;
204
+ await advanceEpisodeStage({
205
+ repoRoot,
206
+ episodeId,
207
+ stage: 'baseline-skipped',
208
+ patch: { baselineSkippedReason: shouldCritic.reason },
209
+ });
210
+ }
211
+ // ── e: 奖励智能体 REWARD AGENT — score + diagnosis.json + advance 'scored' ────
212
+ const reward = await runRewardAgent({ repoRoot, episodeId, spawn: opts.spawn });
213
+ const diagnosis = reward.diagnosis;
214
+ advantage = diagnosis.advantage;
215
+ // ── f: DECISION (every step durably persisted before the next) ───────────────
216
+ if (diagnosis.abstained || diagnosis.gaps.length === 0) {
217
+ // 弃权 abstained / no nameable gap → no rollback decision; SKIP evolution.
218
+ decision = 'abstained';
219
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'abstained' });
220
+ }
221
+ else {
222
+ const badAdvantage = advantage !== null && advantage < threshold;
223
+ const ep = await readEpisode(repoRoot, episodeId);
224
+ const headBeforeRollback = await currentPolicyVersion(repoRoot, targetId);
225
+ // Resolve the rollback target: the policy the CRITIC AGENT reran
226
+ // (`policyVersionBaseline`) when it is a valid EARLIER version, else the
227
+ // version immediately before the head (the prior good policy the bad edit
228
+ // advanced past). `rollbackPolicyVersion` requires `toVersion < head`.
229
+ const rollbackTarget = resolveRollbackTarget(ep.policyVersionBaseline, headBeforeRollback);
230
+ if (badAdvantage && rollbackTarget !== null && headBeforeRollback !== null) {
231
+ // (i) ROLLBACK first — durable on disk. `advantage` is non-null inside the
232
+ // badAdvantage branch; `?? undefined` satisfies the optional `number` param.
233
+ await rollbackPolicyVersion({
234
+ repoRoot,
235
+ targetId,
236
+ episodeId,
237
+ toVersion: rollbackTarget,
238
+ advantage: advantage ?? undefined,
239
+ });
240
+ // (ii) THEN append the 否决缓冲 reject-buffer entry — durable on disk —
241
+ // BEFORE the 演进智能体 EVOLVING AGENT is even called, so the entry written
242
+ // THIS episode is in its fresh-from-disk prompt.
243
+ const rejectEntry = {
244
+ schemaVersion: 1,
245
+ at: new Date().toISOString(),
246
+ episodeId,
247
+ targetId,
248
+ // fromVersion = the version we rolled back TO (the prior good policy);
249
+ // toVersion = the (now rolled-back) version the rejected edit reached.
250
+ fromVersion: rollbackTarget,
251
+ toVersion: headBeforeRollback,
252
+ advantage,
253
+ rewardMain: diagnosis.rewardMain,
254
+ rewardBaseline: diagnosis.rewardBaseline,
255
+ textualGradientTried: diagnosis.textualGradient ?? '',
256
+ editSummary: buildRejectEditSummary(diagnosis),
257
+ reason: 'bad-advantage',
258
+ };
259
+ await appendRejectBufferEntry(repoRoot, rejectEntry);
260
+ decision = 'rolled-back';
261
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'rolled-back' });
262
+ }
263
+ else {
264
+ // Good advantage, OR no earlier version to roll back to (e.g. head is v0):
265
+ // keep the current head.
266
+ decision = 'kept';
267
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'kept' });
268
+ }
269
+ // ── g: ONLY AFTER (f) persisted 'rolled-back'/'kept' ───────────────────────
270
+ // runEvolvingAgent reads the reject-buffer FRESH from disk (the entry just
271
+ // written THIS episode is in its prompt). Never parallelized with (f).
272
+ evolution = await runEvolvingAgent({
273
+ repoRoot,
274
+ episodeId,
275
+ targetId,
276
+ editBudget,
277
+ spawn: opts.spawn,
278
+ });
279
+ }
280
+ // ── h (stage half): advance 'closed' (best-effort) ───────────────────────────
281
+ await closeEpisodeBestEffort(repoRoot, episodeId);
282
+ const newPolicyVersion = await currentPolicyVersion(repoRoot, targetId);
283
+ return { episodeId, baselineSkipped, advantage, decision, evolution, newPolicyVersion };
284
+ }
285
+ /**
286
+ * Advance the episode to 'closed' from whatever terminal-ish stage it reached
287
+ * (evolved | evolution-refused | abstained), best-effort: a stage that cannot
288
+ * legally reach 'closed' (e.g. the evolving agent was not-spawned, leaving the
289
+ * episode at 'kept'/'rolled-back') is left as-is rather than throwing, so the
290
+ * close never masks the real episode outcome.
291
+ */
292
+ async function closeEpisodeBestEffort(repoRoot, episodeId) {
293
+ const ep = await readEpisode(repoRoot, episodeId);
294
+ const closable = new Set([
295
+ 'evolved',
296
+ 'evolution-refused',
297
+ 'abstained',
298
+ ]);
299
+ if (closable.has(ep.stage)) {
300
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'closed' });
301
+ }
302
+ }
303
+ /**
304
+ * Resolve the version a bad-advantage rollback should restore. Prefers the
305
+ * policy the CRITIC AGENT reran (`policyVersionBaseline`) when it is a valid
306
+ * EARLIER version than the head; otherwise the version immediately before the
307
+ * head (the prior good policy the rejected edit advanced past). Returns `null`
308
+ * when there is no earlier version to roll back to (head is v0, or the head is
309
+ * unknown) — the caller then keeps the head.
310
+ */
311
+ function resolveRollbackTarget(baselineVersion, head) {
312
+ if (head === null || head < 1)
313
+ return null;
314
+ if (baselineVersion !== null &&
315
+ Number.isInteger(baselineVersion) &&
316
+ baselineVersion >= 0 &&
317
+ baselineVersion < head) {
318
+ return baselineVersion;
319
+ }
320
+ return head - 1;
321
+ }
322
+ /**
323
+ * Derive the 否决缓冲 reject-buffer `editSummary` from the diagnosis. The
324
+ * rejected edit's files were already rolled back (产物即弃), so the durable
325
+ * record is the diagnosed gap files (the edit's intended surface) + the 文本梯度
326
+ * textual gradient that lost (its rationale).
327
+ */
328
+ function buildRejectEditSummary(diagnosis) {
329
+ const files = uniqueStrings(diagnosis.gaps.map((g) => g.file));
330
+ return {
331
+ files,
332
+ linesAdded: 0,
333
+ linesRemoved: 0,
334
+ rationaleExcerpt: (diagnosis.textualGradient ?? '').slice(0, 280),
335
+ };
336
+ }
337
+ function uniqueStrings(values) {
338
+ const seen = new Set();
339
+ const out = [];
340
+ for (const v of values) {
341
+ if (v.length === 0 || seen.has(v))
342
+ continue;
343
+ seen.add(v);
344
+ out.push(v);
345
+ }
346
+ return out;
347
+ }
348
+ /**
349
+ * Append a 'bad-advantage' reject-buffer entry for this episode UNLESS one is
350
+ * already on disk for this episodeId. Idempotent so a crash-resume that re-enters
351
+ * the decision (or re-enters AFTER the rollback was applied but BEFORE/AFTER the
352
+ * reject-buffer write) never duplicates the rejection record — the 演进智能体
353
+ * EVOLVING AGENT must see exactly one entry per rejected edit. The 文本梯度
354
+ * textual gradient written here is the diagnosis's, so a freshly-written entry on
355
+ * resume carries the same gradient the in-process path would have.
356
+ */
357
+ async function ensureRejectBufferEntry(repoRoot, opts) {
358
+ const existing = await readRejectBuffer(repoRoot, opts.targetId);
359
+ if (existing.some((e) => e.episodeId === opts.episodeId))
360
+ return;
361
+ const entry = {
362
+ schemaVersion: 1,
363
+ at: new Date().toISOString(),
364
+ episodeId: opts.episodeId,
365
+ targetId: opts.targetId,
366
+ fromVersion: opts.fromVersion,
367
+ toVersion: opts.toVersion,
368
+ advantage: opts.advantage,
369
+ rewardMain: opts.diagnosis.rewardMain,
370
+ rewardBaseline: opts.diagnosis.rewardBaseline,
371
+ textualGradientTried: opts.diagnosis.textualGradient ?? '',
372
+ editSummary: buildRejectEditSummary(opts.diagnosis),
373
+ reason: 'bad-advantage',
374
+ };
375
+ await appendRejectBufferEntry(repoRoot, entry);
376
+ }
377
+ /**
378
+ * Re-enter a partially-run episode at its recorded stage and idempotently run
379
+ * the REMAINING steps. Best-effort — used by the CLI `episode resume`. The
380
+ * episode stage machine is monotonic, so this picks up from the first not-yet-
381
+ * done step rather than re-advancing a stage already entered:
382
+ *
383
+ * - 'scored' → run the decision (f) then the 演进智能体 (g).
384
+ * - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
385
+ * - 'evolved'/'evolution-refused'/'abstained' → close.
386
+ * - earlier stages → not auto-resumable here (the arms / reward
387
+ * agent need their own re-entry); reported as-is.
388
+ *
389
+ * NOTE: resume does NOT re-acquire the in-flight lock — the original
390
+ * {@link runEpisode} already released it; a resume is an operator-driven
391
+ * recovery, not a concurrent run.
392
+ */
393
+ export async function resumeEpisode(opts) {
394
+ const { repoRoot, episodeId } = opts;
395
+ const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
396
+ const threshold = opts.advantageRollbackThreshold ?? 0;
397
+ const ep = await readEpisode(repoRoot, episodeId);
398
+ const resumedFrom = ep.stage;
399
+ const targetId = ep.targetId;
400
+ let evolution = null;
401
+ if (ep.stage === 'scored') {
402
+ // Re-run the decision (f) from the on-disk diagnosis, then (g).
403
+ const diagnosis = await readDiagnosisForResume(repoRoot, episodeId);
404
+ if (diagnosis === null || diagnosis.abstained || diagnosis.gaps.length === 0) {
405
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'abstained' });
406
+ }
407
+ else {
408
+ const advantage = diagnosis.advantage;
409
+ const badAdvantage = advantage !== null && advantage < threshold;
410
+ // Crash-resume dedup: a 'rollback' ledger entry whose episodeId is THIS
411
+ // episode means runEpisode already applied the rollback before the host
412
+ // crashed (the rollback head is monotonic — re-calling rollbackPolicyVersion
413
+ // would stack a SECOND, duplicate rollback version). When present, reuse its
414
+ // recorded version axis and SKIP the re-rollback; only ensure the
415
+ // reject-buffer entry + the 'rolled-back' stage advance complete.
416
+ const ledger = await readPolicyLedger(repoRoot, targetId);
417
+ const priorRollback = ledger.find((e) => e.action === 'rollback' && e.episodeId === episodeId);
418
+ if (priorRollback) {
419
+ // The prior rollback already advanced to `priorRollback.version`, rolling
420
+ // FORWARD to the content of the version immediately before the rejected
421
+ // edit's head. Reconstruct the reject-buffer axis from that entry:
422
+ // toVersion = the (rolled-back) version the rejected edit reached
423
+ // = priorRollback.version - 1 (the head before the rollback)
424
+ // fromVersion = the prior good policy restored (one before that head)
425
+ const toVersion = priorRollback.version - 1;
426
+ const fromVersion = resolveRollbackTarget(ep.policyVersionBaseline, toVersion);
427
+ await ensureRejectBufferEntry(repoRoot, {
428
+ episodeId,
429
+ targetId,
430
+ fromVersion: fromVersion ?? toVersion,
431
+ toVersion,
432
+ advantage,
433
+ diagnosis,
434
+ });
435
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'rolled-back' });
436
+ }
437
+ else {
438
+ const headBeforeRollback = await currentPolicyVersion(repoRoot, targetId);
439
+ const rollbackTarget = resolveRollbackTarget(ep.policyVersionBaseline, headBeforeRollback);
440
+ if (badAdvantage && rollbackTarget !== null && headBeforeRollback !== null) {
441
+ await rollbackPolicyVersion({
442
+ repoRoot,
443
+ targetId,
444
+ episodeId,
445
+ toVersion: rollbackTarget,
446
+ advantage: advantage ?? undefined,
447
+ });
448
+ await ensureRejectBufferEntry(repoRoot, {
449
+ episodeId,
450
+ targetId,
451
+ fromVersion: rollbackTarget,
452
+ toVersion: headBeforeRollback,
453
+ advantage,
454
+ diagnosis,
455
+ });
456
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'rolled-back' });
457
+ }
458
+ else {
459
+ await advanceEpisodeStage({ repoRoot, episodeId, stage: 'kept' });
460
+ }
461
+ }
462
+ evolution = await runEvolvingAgent({
463
+ repoRoot,
464
+ episodeId,
465
+ targetId,
466
+ editBudget,
467
+ spawn: opts.spawn,
468
+ });
469
+ }
470
+ await closeEpisodeBestEffort(repoRoot, episodeId);
471
+ }
472
+ else if (ep.stage === 'rolled-back' || ep.stage === 'kept') {
473
+ evolution = await runEvolvingAgent({
474
+ repoRoot,
475
+ episodeId,
476
+ targetId,
477
+ editBudget,
478
+ spawn: opts.spawn,
479
+ });
480
+ await closeEpisodeBestEffort(repoRoot, episodeId);
481
+ }
482
+ else if (ep.stage === 'evolved' ||
483
+ ep.stage === 'evolution-refused' ||
484
+ ep.stage === 'abstained') {
485
+ await closeEpisodeBestEffort(repoRoot, episodeId);
486
+ }
487
+ // earlier stages: not auto-resumable here — reported as-is.
488
+ const after = await readEpisode(repoRoot, episodeId);
489
+ return { episodeId, resumedFrom, stage: after.stage, evolution };
490
+ }
491
+ /**
492
+ * Read the episode's diagnosis.json for resume's decision step, normalized to
493
+ * the minimal slice the decision needs. Returns `null` when unreadable (a
494
+ * missing/corrupt diagnosis ⇒ treat as abstain, the same fail-safe the EVOLVING
495
+ * AGENT's reader uses).
496
+ */
497
+ async function readDiagnosisForResume(repoRoot, episodeId) {
498
+ const { promises: fs } = await import('node:fs');
499
+ const pathMod = await import('node:path');
500
+ const file = pathMod.join(episodeDir(repoRoot, episodeId), 'diagnosis.json');
501
+ let raw;
502
+ try {
503
+ raw = await fs.readFile(file, 'utf8');
504
+ }
505
+ catch {
506
+ return null;
507
+ }
508
+ let parsed;
509
+ try {
510
+ parsed = JSON.parse(raw);
511
+ }
512
+ catch {
513
+ return null;
514
+ }
515
+ const gaps = Array.isArray(parsed.gaps)
516
+ ? parsed.gaps.flatMap((g) => {
517
+ const o = (g && typeof g === 'object' ? g : {});
518
+ return typeof o.file === 'string' &&
519
+ typeof o.section === 'string' &&
520
+ typeof o.description === 'string'
521
+ ? [{ file: o.file, section: o.section, description: o.description }]
522
+ : [];
523
+ })
524
+ : [];
525
+ return {
526
+ abstained: parsed.abstained === true,
527
+ gaps,
528
+ advantage: typeof parsed.advantage === 'number' ? parsed.advantage : null,
529
+ rewardMain: typeof parsed.rewardMain === 'number' ? parsed.rewardMain : 0,
530
+ rewardBaseline: typeof parsed.rewardBaseline === 'number' ? parsed.rewardBaseline : null,
531
+ textualGradient: typeof parsed.textualGradient === 'string' ? parsed.textualGradient : null,
532
+ };
533
+ }
534
+ //# sourceMappingURL=episode-orchestrator.js.map