synergyspec-selfevolving 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/commands/learn.js +13 -3
  2. package/dist/commands/self-evolution-episode.d.ts +6 -1
  3. package/dist/commands/self-evolution-episode.js +8 -1
  4. package/dist/commands/self-evolution.d.ts +2 -2
  5. package/dist/commands/self-evolution.js +10 -10
  6. package/dist/commands/workflow/status.js +5 -0
  7. package/dist/core/change-readiness.d.ts +1 -1
  8. package/dist/core/change-readiness.js +13 -5
  9. package/dist/core/fitness/test-metrics.d.ts +33 -0
  10. package/dist/core/fitness/test-metrics.js +67 -0
  11. package/dist/core/learn.js +11 -2
  12. package/dist/core/project-config.d.ts +3 -0
  13. package/dist/core/project-config.js +7 -1
  14. package/dist/core/self-evolution/critic-agent.js +13 -5
  15. package/dist/core/self-evolution/edits-contract.d.ts +15 -5
  16. package/dist/core/self-evolution/edits-contract.js +26 -16
  17. package/dist/core/self-evolution/episode-orchestrator.d.ts +11 -6
  18. package/dist/core/self-evolution/episode-orchestrator.js +88 -24
  19. package/dist/core/self-evolution/episode-store.d.ts +34 -11
  20. package/dist/core/self-evolution/episode-store.js +45 -10
  21. package/dist/core/self-evolution/evolving-agent.d.ts +4 -4
  22. package/dist/core/self-evolution/evolving-agent.js +26 -26
  23. package/dist/core/self-evolution/host-harness.d.ts +68 -2
  24. package/dist/core/self-evolution/host-harness.js +208 -21
  25. package/dist/core/self-evolution/policy/policy-store.d.ts +8 -6
  26. package/dist/core/self-evolution/policy/policy-store.js +124 -24
  27. package/dist/core/self-evolution/proposer-slice.d.ts +4 -3
  28. package/dist/core/self-evolution/reward-agent.d.ts +11 -1
  29. package/dist/core/self-evolution/reward-agent.js +53 -20
  30. package/dist/core/self-evolution/reward-aggregator.d.ts +18 -0
  31. package/dist/core/self-evolution/reward-aggregator.js +53 -3
  32. package/dist/core/self-evolution/reward-deepread.d.ts +64 -0
  33. package/dist/core/self-evolution/reward-deepread.js +112 -0
  34. package/dist/core/templates/workflows/learn.js +2 -1
  35. package/dist/core/templates/workflows/self-evolving.js +5 -2
  36. package/dist/core/trajectory/facts.d.ts +69 -2
  37. package/dist/core/trajectory/facts.js +179 -10
  38. package/dist/core/trajectory/skeleton.d.ts +10 -0
  39. package/dist/core/trajectory/skeleton.js +24 -3
  40. package/package.json +1 -1
@@ -431,11 +431,21 @@ async function runEpisodeAfterCreate(opts) {
431
431
  // runEvolvingAgent reads the reject-buffer FRESH from disk (the entry just
432
432
  // written THIS episode is in its prompt). Never parallelized with (f).
433
433
  // 步长: after a rollback, shrink the edit budget (smaller step after a step
434
- // that lost ground). 预测校准: pass the proposer's recent prediction record.
434
+ // that lost ground). 预测校准: pass the 演进智能体 EVOLVING AGENT's recent
435
+ // prediction record.
435
436
  const scheduledBudget = decision === 'rolled-back'
436
437
  ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
437
438
  : editBudget;
438
439
  const calibrationNote = await summarizeCalibration(repoRoot, targetId);
440
+ // Advance 'evolving' (with a heartbeat) BEFORE the spawn so a concurrent
441
+ // sibling reading the store sees a LIVE-but-slow holder, not a stale lock at
442
+ // 'kept'/'rolled-back'. runEvolvingAgent advances the terminal outcome.
443
+ await advanceEpisodeStage({
444
+ repoRoot,
445
+ episodeId,
446
+ stage: 'evolving',
447
+ patch: { evolvingHeartbeatAt: new Date().toISOString() },
448
+ });
439
449
  evolution = await runEvolvingAgent({
440
450
  repoRoot,
441
451
  episodeId,
@@ -477,16 +487,19 @@ async function runEpisodeAfterCreate(opts) {
477
487
  * Closable stages:
478
488
  * - evolved | evolution-refused | abstained — the 演进智能体 EVOLVING AGENT
479
489
  * reached a definite outcome (or the judge 弃权 abstained), the normal close.
480
- * - kept | rolled-back — the 演进智能体 returned not-spawned (its diagnosis
490
+ * - evolving — the 演进智能体 returned not-spawned (its diagnosis
481
491
  * abstained-after-gap-check, no gaps, or the target resolved to no editable
482
- * local files), so the episode never advanced past the decision. By the time
483
- * this runs (AFTER runEvolvingAgent returned), a stage still at 'kept'/
484
- * 'rolled-back' can ONLY mean not-spawned — a success advances 'evolved', a
492
+ * local files), so the episode never advanced past the 'evolving' marker.
493
+ * By the time this runs (AFTER runEvolvingAgent returned), a stage still at
494
+ * 'evolving' can ONLY mean not-spawned — a success advances 'evolved', a
485
495
  * refusal advances 'evolution-refused', and a throw is caught upstream and
486
496
  * records 'errored' + rethrows so this close is never reached. So a leftover
487
- * kept/rolled-back at close time IS the finished-nothing-to-evolve case and
488
- * must close, not rest forever at a non-terminal stage (the exact ambiguity
489
- * the 'errored' stage was meant to remove).
497
+ * 'evolving' at close time IS the finished-nothing-to-evolve case and must
498
+ * close, not rest forever at a non-terminal stage (the exact ambiguity the
499
+ * 'errored' stage was meant to remove).
500
+ * - kept | rolled-back — retained for back-compat: an OLD episode record (or a
501
+ * code path that did not advance the 'evolving' marker) that returned
502
+ * not-spawned never advances past the decision; close it the same way.
490
503
  *
491
504
  * Any other (genuinely non-closable) stage is left as-is rather than throwing, so
492
505
  * the close never masks the real episode outcome.
@@ -497,7 +510,10 @@ async function closeEpisodeBestEffort(repoRoot, episodeId) {
497
510
  'evolved',
498
511
  'evolution-refused',
499
512
  'abstained',
500
- // not-spawned 演进智能体 leaves the episode here close the finished episode.
513
+ // not-spawned 演进智能体 leaves the episode at the 'evolving' marker close
514
+ // the finished episode. 'kept'/'rolled-back' retained for back-compat with
515
+ // an old record / a path that never advanced the marker.
516
+ 'evolving',
501
517
  'kept',
502
518
  'rolled-back',
503
519
  ]);
@@ -586,14 +602,16 @@ async function ensureRejectBufferEntry(repoRoot, opts) {
586
602
  * done step rather than re-advancing a stage already entered:
587
603
  *
588
604
  * - 'scored' → run the decision (f) then the 演进智能体 (g).
589
- * - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
605
+ * - 'rolled-back' / 'kept' / 'evolving' → run the 演进智能体 EVOLVING AGENT (g)
606
+ * then close. ('evolving' means a crash AFTER the
607
+ * marker but before the agent settled an outcome.)
590
608
  * - 'evolved'/'evolution-refused'/'abstained' → close.
591
609
  * - 'errored' → RE-DRIVE from the last GOOD pre-error stage
592
610
  * (an episode may have errored on a TRANSIENT
593
611
  * cause — a one-off git/analyzer/agent timeout).
594
612
  * The pre-error stage is the last `stageHistory`
595
613
  * entry that is NOT 'errored'; when it is one of
596
- * {'scored','rolled-back','kept'} (the
614
+ * {'scored','rolled-back','kept','evolving'} (the
597
615
  * resume-entry stages) we advance errored → that
598
616
  * stage and fall through to the normal dispatch.
599
617
  * Otherwise the pre-error stage is not
@@ -617,7 +635,7 @@ export async function resumeEpisode(opts) {
617
635
  // for an 'errored' episode we attempt to RE-DRIVE from the last good pre-error
618
636
  // stage (a transient git/analyzer/agent failure should be retryable via an
619
637
  // operator resume). 'errored' stays terminal for every OTHER caller — only this
620
- // resume path may re-drive it, via the errored → {scored,rolled-back,kept}
638
+ // resume path may re-drive it, via the errored → {scored,rolled-back,kept,evolving}
621
639
  // transitions the stage machine allows ONLY for operator recovery.
622
640
  let stage = ep.stage;
623
641
  if (ep.stage === 'errored') {
@@ -626,7 +644,8 @@ export async function resumeEpisode(opts) {
626
644
  .find((h) => h.stage !== 'errored')?.stage;
627
645
  if (preError === 'scored' ||
628
646
  preError === 'rolled-back' ||
629
- preError === 'kept') {
647
+ preError === 'kept' ||
648
+ preError === 'evolving') {
630
649
  // Re-open the errored episode at its last auto-resumable stage, then fall
631
650
  // through to the normal dispatch for that stage.
632
651
  await advanceEpisodeStage({ repoRoot, episodeId, stage: preError });
@@ -636,7 +655,7 @@ export async function resumeEpisode(opts) {
636
655
  // 'baseline-skipped'); leave the episode at 'errored' and report it as-is.
637
656
  }
638
657
  // The decision (f) + 演进智能体 EVOLVING AGENT (g) re-runs below can THROW — a
639
- // wedged/crashed host CLI (CanonicalProposerInvocationError), a timeout, or an
658
+ // wedged/crashed host CLI (EvolvingAgentInvocationError), a timeout, or an
640
659
  // observed-GREEN gate throw. UNCAUGHT, that leaves the episode DURABLY stuck at
641
660
  // a non-terminal stage ('scored'/'rolled-back'/'kept' — the orphan state fix ❷
642
661
  // eliminates for runEpisode). Record the SAME terminal 'errored' stage here
@@ -644,7 +663,31 @@ export async function resumeEpisode(opts) {
644
663
  // re-throw. Resume holds NO in-flight lock, so this is a durable-stage fix, not
645
664
  // a leak fix. Best-effort write: a failed record must not mask the original throw.
646
665
  try {
647
- if (stage === 'scored') {
666
+ // TOCTOU guard: resume read the stage at entry (~L945), but it holds NO in-flight
667
+ // lock, so a CONCURRENT runEpisode for the same target can advance THIS episode to
668
+ // a TERMINAL stage between that read and the transitions below. Re-read the episode
669
+ // immediately before dispatching; if it is already finished, the transitions would
670
+ // throw an illegal-transition error (which the catch below would then mis-record as
671
+ // a fresh 'errored'). Short-circuit instead: report the already-finished episode
672
+ // via the normal completion return. (The errored→pre-error re-drive above already
673
+ // turned a re-drivable 'errored' into a non-terminal stage, so a stage that is
674
+ // STILL terminal here is genuinely finished, not auto-resumable.)
675
+ const TERMINAL_STAGES = new Set([
676
+ 'closed',
677
+ 'errored',
678
+ 'evolution-refused',
679
+ 'evolved',
680
+ 'abstained',
681
+ ]);
682
+ const fresh = await readEpisode(repoRoot, episodeId);
683
+ stage = fresh.stage;
684
+ if (TERMINAL_STAGES.has(stage)) {
685
+ // 'evolved'/'evolution-refused'/'abstained' still want their best-effort close;
686
+ // 'closed'/'errored' are no-ops for closeEpisodeBestEffort. No transition is
687
+ // attempted, so the race cannot surface as an illegal-transition throw.
688
+ await closeEpisodeBestEffort(repoRoot, episodeId);
689
+ }
690
+ else if (stage === 'scored') {
648
691
  // Re-run the decision (f) from the on-disk diagnosis, then (g).
649
692
  const diagnosis = await readDiagnosisForResume(repoRoot, episodeId);
650
693
  if (shouldSkipEvolution(diagnosis)) {
@@ -719,6 +762,15 @@ export async function resumeEpisode(opts) {
719
762
  ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
720
763
  : editBudget;
721
764
  const calibrationNote = await summarizeCalibration(repoRoot, targetId);
765
+ // Advance the 'evolving' marker (heartbeat) before the spawn, mirroring
766
+ // runEpisode's (g). Idempotent across a crash-resume: 'evolving' is reached
767
+ // from both 'rolled-back' and 'kept'.
768
+ await advanceEpisodeStage({
769
+ repoRoot,
770
+ episodeId,
771
+ stage: 'evolving',
772
+ patch: { evolvingHeartbeatAt: new Date().toISOString() },
773
+ });
722
774
  evolution = await runEvolvingAgent({
723
775
  repoRoot,
724
776
  episodeId,
@@ -732,7 +784,7 @@ export async function resumeEpisode(opts) {
732
784
  }
733
785
  await closeEpisodeBestEffort(repoRoot, episodeId);
734
786
  }
735
- else if (stage === 'rolled-back' || stage === 'kept') {
787
+ else if (stage === 'rolled-back' || stage === 'kept' || stage === 'evolving') {
736
788
  // The decision already ran (and the original episode settled the prediction);
737
789
  // re-settle idempotently for the crash window, then schedule + calibrate.
738
790
  try {
@@ -741,10 +793,26 @@ export async function resumeEpisode(opts) {
741
793
  catch {
742
794
  // best-effort: advisory only
743
795
  }
744
- const scheduledBudget = stage === 'rolled-back'
796
+ // Resuming from 'evolving' means the decision is in history (not the resume
797
+ // stage); read it from stageHistory so the 步长 schedule still shrinks after a
798
+ // rollback. Resuming from 'rolled-back'/'kept' uses the resume stage directly.
799
+ const wasRolledBack = stage === 'rolled-back' ||
800
+ (stage === 'evolving' && ep.stageHistory.some((h) => h.stage === 'rolled-back'));
801
+ const scheduledBudget = wasRolledBack
745
802
  ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
746
803
  : editBudget;
747
804
  const calibrationNote = await summarizeCalibration(repoRoot, targetId);
805
+ // Advance the 'evolving' marker before the spawn when resuming from the
806
+ // decision stage. When already at 'evolving' (a crash mid-spawn re-drive),
807
+ // the marker is present — skip the (now illegal) self-transition.
808
+ if (stage !== 'evolving') {
809
+ await advanceEpisodeStage({
810
+ repoRoot,
811
+ episodeId,
812
+ stage: 'evolving',
813
+ patch: { evolvingHeartbeatAt: new Date().toISOString() },
814
+ });
815
+ }
748
816
  evolution = await runEvolvingAgent({
749
817
  repoRoot,
750
818
  episodeId,
@@ -757,13 +825,9 @@ export async function resumeEpisode(opts) {
757
825
  });
758
826
  await closeEpisodeBestEffort(repoRoot, episodeId);
759
827
  }
760
- else if (stage === 'evolved' ||
761
- stage === 'evolution-refused' ||
762
- stage === 'abstained') {
763
- await closeEpisodeBestEffort(repoRoot, episodeId);
764
- }
765
- // earlier stages (and a non-auto-resumable 'errored'): not auto-resumable here
766
- // — reported as-is.
828
+ // Terminal stages (incl. a non-auto-resumable 'errored') are handled by the
829
+ // TOCTOU guard above; earlier stages are not auto-resumable here — reported
830
+ // as-is via the completion return below.
767
831
  }
768
832
  catch (err) {
769
833
  // A thrown decision/evolving step records a DURABLE terminal 'errored' stage so
@@ -57,10 +57,11 @@
57
57
  * -> (baseline-arm-captured | baseline-skipped) // CRITIC AGENT(基线智能体 baseline agent)arm
58
58
  * -> scored // 奖励智能体 REWARD AGENT wrote diagnosis.json
59
59
  * -> (rolled-back | kept) // rollback decision on the main arm's edits
60
+ * -> evolving // 演进智能体 EVOLVING AGENT holds the in-flight lock
60
61
  * -> (evolved | evolution-refused | abstained) // 演进智能体 EVOLVING AGENT outcome
61
62
  * -> closed // terminal
62
63
  *
63
- * (rolled-back | kept)
64
+ * (rolled-back | kept | evolving)
64
65
  * -> closed // terminal — see below
65
66
  *
66
67
  * (any non-terminal stage)
@@ -73,12 +74,23 @@
73
74
  * 弃权 abstains when no nameable gap → no rollback decision needed → the
74
75
  * 演进智能体 EVOLVING AGENT is never spawned.
75
76
  *
76
- * `rolled-back`/`kept` may also reach `closed` DIRECTLY: when the 演进智能体
77
- * EVOLVING AGENT was NOT spawned (its diagnosis abstained-after-gap-check, named
78
- * no gaps, or the target resolved to no editable local files) the episode never
79
- * advances past the decision, so the orchestrator's best-effort close terminates
80
- * the finished-nothing-to-evolve episode rather than leaving it resting forever
81
- * at a non-terminal stage.
77
+ * `evolving` is advanced by the orchestrator BEFORE it spawns the 演进智能体
78
+ * EVOLVING AGENT, while that agent holds the in-flight lock. It exists so a
79
+ * concurrent sibling reading the store distinguishes a LIVE-but-slow holder
80
+ * (stage `evolving`) from an episode that merely reached the decision (`kept`/
81
+ * `rolled-back`) — without it the stage stays `kept` for the whole evolving
82
+ * spawn, and a sibling can misread a running holder as stale. The
83
+ * `evolvingHeartbeatAt` field records when the stage was entered. Old episode
84
+ * records that predate this stage never carry it; they resume exactly as before
85
+ * (the `rolled-back`/`kept` → outcome transitions are retained for them).
86
+ *
87
+ * `rolled-back`/`kept`/`evolving` may also reach `closed` DIRECTLY: when the
88
+ * 演进智能体 EVOLVING AGENT was NOT spawned (its diagnosis abstained-after-gap-
89
+ * check, named no gaps, or the target resolved to no editable local files) the
90
+ * episode never advances past the decision (it stays `kept`/`rolled-back`, or —
91
+ * for the not-spawned-after-evolving-marker case — `evolving`), so the
92
+ * orchestrator's best-effort close terminates the finished-nothing-to-evolve
93
+ * episode rather than leaving it resting forever at a non-terminal stage.
82
94
  *
83
95
  * `errored` is a SECOND terminal stage reachable from EVERY non-terminal stage.
84
96
  * A thrown step — an agent spawn that crashes or times out (主智能体 MAIN AGENT /
@@ -92,10 +104,10 @@
92
104
  * `errored` is terminal for every target EXCEPT an operator-driven resume: a
93
105
  * transient cause (a one-off git/analyzer/agent timeout) is retryable, so an
94
106
  * `episode resume` may RE-DRIVE an errored episode back to its last good
95
- * pre-error stage — `errored -> {scored, rolled-back, kept}` (the resume-entry
96
- * stages). No other caller may leave `errored`.
107
+ * pre-error stage — `errored -> {scored, rolled-back, kept, evolving}` (the
108
+ * resume-entry stages). No other caller may leave `errored`.
97
109
  */
98
- export type EpisodeStage = 'created' | 'main-arm-captured' | 'baseline-arm-captured' | 'baseline-skipped' | 'scored' | 'rolled-back' | 'kept' | 'evolved' | 'evolution-refused' | 'abstained' | 'closed' | 'errored';
110
+ export type EpisodeStage = 'created' | 'main-arm-captured' | 'baseline-arm-captured' | 'baseline-skipped' | 'scored' | 'rolled-back' | 'kept' | 'evolving' | 'evolved' | 'evolution-refused' | 'abstained' | 'closed' | 'errored';
99
111
  /**
100
112
  * Iterable list of every legal {@link EpisodeStage} value. Order follows the
101
113
  * documented state machine for readability, not behavior.
@@ -143,6 +155,14 @@ export interface EpisodeRecord {
143
155
  stageHistory: EpisodeStageHistoryEntry[];
144
156
  /** Why the baseline arm was skipped (set with stage `baseline-skipped`). */
145
157
  baselineSkippedReason?: string;
158
+ /**
159
+ * ISO 8601 UTC timestamp the episode entered the `evolving` stage (the moment
160
+ * the 演进智能体 EVOLVING AGENT spawn began holding the in-flight lock). A
161
+ * heartbeat for liveness reads — a concurrent sibling can tell a recently-
162
+ * entered `evolving` holder apart from one that genuinely wedged. Absent on
163
+ * old records (and on every stage before `evolving`).
164
+ */
165
+ evolvingHeartbeatAt?: string;
146
166
  /** advantage = reward(主臂) − reward(基线臂); null when the 奖励智能体 REWARD AGENT 弃权 abstained. */
147
167
  advantage?: number | null;
148
168
  /**
@@ -163,6 +183,8 @@ export interface EpisodeStagePatch {
163
183
  advantage?: number | null;
164
184
  /** Cause note merged alongside the terminal `errored` stage. */
165
185
  terminalError?: string;
186
+ /** Heartbeat timestamp merged alongside the `evolving` stage. */
187
+ evolvingHeartbeatAt?: string;
166
188
  }
167
189
  /**
168
190
  * True iff `(from -> to)` is a legal transition in the episode stage machine.
@@ -234,7 +256,8 @@ export interface AdvanceEpisodeStageOptions {
234
256
  * advancing to a stage not reachable from the current one throws.
235
257
  * - Appends `{stage, at}` to `stageHistory`.
236
258
  * - Merges the allowlisted `patch` fields (`policyVersionBaseline`,
237
- * `baselineSkippedReason`, `advantage`, `terminalError`) in the same write.
259
+ * `baselineSkippedReason`, `advantage`, `terminalError`, `evolvingHeartbeatAt`)
260
+ * in the same write.
238
261
  * - Bumps `updatedAt`.
239
262
  */
240
263
  export declare function advanceEpisodeStage(opts: AdvanceEpisodeStageOptions): Promise<EpisodeRecord>;
@@ -61,6 +61,7 @@ export const EPISODE_STAGES = [
61
61
  'scored',
62
62
  'rolled-back',
63
63
  'kept',
64
+ 'evolving',
64
65
  'evolved',
65
66
  'evolution-refused',
66
67
  'abstained',
@@ -77,8 +78,9 @@ const EPISODE_ID_PATTERN = /^[a-z0-9][a-z0-9-]*$/;
77
78
  // step: agent spawn crash/timeout or un-repairable gate), so a failed episode
78
79
  // is never orphaned mid-flight. `closed` and `errored` are the two terminals;
79
80
  // `errored` is terminal EXCEPT for an operator resume re-drive back to its last
80
- // good pre-error stage (scored/rolled-back/kept). `rolled-back`/`kept` may also
81
- // close directly (the not-spawned 演进智能体 finished-nothing-to-evolve case).
81
+ // good pre-error stage (scored/rolled-back/kept/evolving). `rolled-back`/`kept`/
82
+ // `evolving` may also close directly (the not-spawned 演进智能体
83
+ // finished-nothing-to-evolve case).
82
84
  const LEGAL_STAGE_TRANSITIONS = new Map([
83
85
  ['created', new Set(['main-arm-captured', 'errored'])],
84
86
  [
@@ -91,15 +93,38 @@ const LEGAL_STAGE_TRANSITIONS = new Map([
91
93
  // abstained, so no rollback decision is needed and the 演进智能体
92
94
  // EVOLVING AGENT is never spawned.
93
95
  ['scored', new Set(['rolled-back', 'kept', 'abstained', 'errored'])],
94
- // 'rolled-back'/'kept' may also reach 'closed' DIRECTLY when the 演进智能体
95
- // EVOLVING AGENT was not-spawned (the finished-nothing-to-evolve case), so the
96
- // episode never rests forever at a non-terminal stage.
96
+ // 'rolled-back'/'kept' advance to 'evolving' BEFORE the 演进智能体 EVOLVING
97
+ // AGENT spawn (so a sibling can tell a live holder from a stale lock). They
98
+ // also retain the DIRECT transitions to the evolving outcomes + 'closed' so
99
+ // (a) an OLD episode record resumed from 'rolled-back'/'kept' (no 'evolving'
100
+ // stage) behaves exactly as before, and (b) the not-spawned
101
+ // finished-nothing-to-evolve case can still close directly.
97
102
  [
98
103
  'rolled-back',
99
- new Set(['evolved', 'evolution-refused', 'abstained', 'closed', 'errored']),
104
+ new Set([
105
+ 'evolving',
106
+ 'evolved',
107
+ 'evolution-refused',
108
+ 'abstained',
109
+ 'closed',
110
+ 'errored',
111
+ ]),
100
112
  ],
101
113
  [
102
114
  'kept',
115
+ new Set([
116
+ 'evolving',
117
+ 'evolved',
118
+ 'evolution-refused',
119
+ 'abstained',
120
+ 'closed',
121
+ 'errored',
122
+ ]),
123
+ ],
124
+ // The 演进智能体 EVOLVING AGENT outcome (or a not-spawned close), or 'errored'
125
+ // on a thrown spawn/gate.
126
+ [
127
+ 'evolving',
103
128
  new Set(['evolved', 'evolution-refused', 'abstained', 'closed', 'errored']),
104
129
  ],
105
130
  ['evolved', new Set(['closed'])],
@@ -107,8 +132,9 @@ const LEGAL_STAGE_TRANSITIONS = new Map([
107
132
  ['abstained', new Set(['closed'])],
108
133
  ['closed', new Set()],
109
134
  // 'errored' is terminal EXCEPT for an operator resume re-drive back to the
110
- // last good pre-error stage (scored/rolled-back/kept); no other caller leaves it.
111
- ['errored', new Set(['scored', 'rolled-back', 'kept'])],
135
+ // last good pre-error stage (scored/rolled-back/kept/evolving); no other
136
+ // caller leaves it.
137
+ ['errored', new Set(['scored', 'rolled-back', 'kept', 'evolving'])],
112
138
  ]);
113
139
  /**
114
140
  * True iff `(from -> to)` is a legal transition in the episode stage machine.
@@ -387,6 +413,7 @@ const ALLOWED_PATCH_KEYS = new Set([
387
413
  'baselineSkippedReason',
388
414
  'advantage',
389
415
  'terminalError',
416
+ 'evolvingHeartbeatAt',
390
417
  ]);
391
418
  /** Validate an {@link EpisodeStagePatch} fail-closed; returns the merge slice. */
392
419
  function validateStagePatch(patch, episodeId) {
@@ -394,7 +421,7 @@ function validateStagePatch(patch, episodeId) {
394
421
  for (const key of Object.keys(patch)) {
395
422
  if (!ALLOWED_PATCH_KEYS.has(key)) {
396
423
  throw new Error(`Illegal episode patch field for ${episodeId}: "${key}" ` +
397
- `(allowed: policyVersionBaseline, baselineSkippedReason, advantage, terminalError)`);
424
+ `(allowed: policyVersionBaseline, baselineSkippedReason, advantage, terminalError, evolvingHeartbeatAt)`);
398
425
  }
399
426
  }
400
427
  if ('policyVersionBaseline' in patch) {
@@ -425,6 +452,13 @@ function validateStagePatch(patch, episodeId) {
425
452
  }
426
453
  merge.terminalError = v;
427
454
  }
455
+ if ('evolvingHeartbeatAt' in patch) {
456
+ const v = patch.evolvingHeartbeatAt;
457
+ if (typeof v !== 'string' || v.length === 0) {
458
+ throw new Error(`Invalid patch for ${episodeId}: evolvingHeartbeatAt must be a non-empty string`);
459
+ }
460
+ merge.evolvingHeartbeatAt = v;
461
+ }
428
462
  return merge;
429
463
  }
430
464
  /**
@@ -436,7 +470,8 @@ function validateStagePatch(patch, episodeId) {
436
470
  * advancing to a stage not reachable from the current one throws.
437
471
  * - Appends `{stage, at}` to `stageHistory`.
438
472
  * - Merges the allowlisted `patch` fields (`policyVersionBaseline`,
439
- * `baselineSkippedReason`, `advantage`, `terminalError`) in the same write.
473
+ * `baselineSkippedReason`, `advantage`, `terminalError`, `evolvingHeartbeatAt`)
474
+ * in the same write.
440
475
  * - Bumps `updatedAt`.
441
476
  */
442
477
  export async function advanceEpisodeStage(opts) {
@@ -104,7 +104,7 @@ export interface AssembleEvolvingAgentPromptInput {
104
104
  /** Pre-rendered DO-NOT-PRUNE block (成功保护). Omitted when empty. */
105
105
  doNotPrune?: string;
106
106
  /**
107
- * One-line 预测校准 prediction-calibration note: the proposer's recent
107
+ * One-line 预测校准 prediction-calibration note: the evolving agent's recent
108
108
  * checkable predictions' hit/miss record, settled by later measurements.
109
109
  * Read-only context (it never scores); omitted when there is no settled
110
110
  * prediction history, so prompts on early episodes stay byte-identical.
@@ -136,9 +136,9 @@ export type ParsedEvolvingAgentResponse = EvolvingAgentRefusal | EvolvingAgentEd
136
136
  /**
137
137
  * Parse the model's single `json:patch` block. Accepts EITHER the refusal shape
138
138
  * (`{edits: [], refusal: string}`) OR a concrete edit (`{rationale, prediction,
139
- * edits[]}`). Throws {@link CanonicalProposerOutputInvalid} on a malformed
139
+ * edits[]}`). Throws {@link EvolvingAgentOutputInvalid} on a malformed
140
140
  * block, the wrong block count, a missing/invalid prediction, or
141
- * {@link CanonicalProposerNoOp} on empty edits WITHOUT a refusal reason.
141
+ * {@link EvolvingAgentNoOp} on empty edits WITHOUT a refusal reason.
142
142
  *
143
143
  * Edits are NOT yet scope-validated here (the caller runs the static gate over
144
144
  * them); this only enforces the SHAPE of the contract.
@@ -181,7 +181,7 @@ export interface RunEvolvingAgentOptions {
181
181
  */
182
182
  exemplarPaths?: string[];
183
183
  /**
184
- * One-line 预测校准 prediction-calibration note surfaced to the proposer
184
+ * One-line 预测校准 prediction-calibration note surfaced to the evolving agent
185
185
  * (read-only, advisory). Defaults to absent; the orchestrator computes it from
186
186
  * the prediction-reconcile ledger via `summarizeCalibration`.
187
187
  */
@@ -1,8 +1,8 @@
1
1
  import { promises as fs } from 'node:fs';
2
2
  import * as path from 'node:path';
3
- import { runHeadlessAgent, DEFAULT_AGENT_TIMEOUT_MS, } from './host-harness.js';
3
+ import { runHeadlessAgent, resolveAgentTimeoutMs, } from './host-harness.js';
4
4
  import { evaluateToolEvolutionCandidate, } from './tool-evolution.js';
5
- import { validateCandidateEdits, CanonicalProposerNoOp, CanonicalProposerOutputInvalid, CanonicalProposerInvocationError, renderUnifiedDiff, } from './edits-contract.js';
5
+ import { validateCandidateEdits, EvolvingAgentNoOp, EvolvingAgentOutputInvalid, EvolvingAgentInvocationError, renderUnifiedDiff, } from './edits-contract.js';
6
6
  import { requireCanonicalTarget } from './canonical-targets.js';
7
7
  import { resolveTargetLocalFiles } from './local-targets.js';
8
8
  import { renderDoNotPruneBlock, readProtections, listExemplarFiles, } from './success-channel.js';
@@ -203,9 +203,9 @@ const PREDICTION_METRICS = new Set(['loss', 'passRate', 'healthPenalty']);
203
203
  /**
204
204
  * Parse the model's single `json:patch` block. Accepts EITHER the refusal shape
205
205
  * (`{edits: [], refusal: string}`) OR a concrete edit (`{rationale, prediction,
206
- * edits[]}`). Throws {@link CanonicalProposerOutputInvalid} on a malformed
206
+ * edits[]}`). Throws {@link EvolvingAgentOutputInvalid} on a malformed
207
207
  * block, the wrong block count, a missing/invalid prediction, or
208
- * {@link CanonicalProposerNoOp} on empty edits WITHOUT a refusal reason.
208
+ * {@link EvolvingAgentNoOp} on empty edits WITHOUT a refusal reason.
209
209
  *
210
210
  * Edits are NOT yet scope-validated here (the caller runs the static gate over
211
211
  * them); this only enforces the SHAPE of the contract.
@@ -217,25 +217,25 @@ export function parseEvolvingAgentResponse(text) {
217
217
  while ((m = fenceRe.exec(text)) !== null)
218
218
  matches.push(m[1]);
219
219
  if (matches.length === 0) {
220
- throw new CanonicalProposerOutputInvalid('no `json:patch` fenced block found in response');
220
+ throw new EvolvingAgentOutputInvalid('no `json:patch` fenced block found in response');
221
221
  }
222
222
  if (matches.length > 1) {
223
- throw new CanonicalProposerOutputInvalid(`expected exactly 1 \`json:patch\` block, found ${matches.length}`);
223
+ throw new EvolvingAgentOutputInvalid(`expected exactly 1 \`json:patch\` block, found ${matches.length}`);
224
224
  }
225
225
  let parsed;
226
226
  try {
227
227
  parsed = JSON.parse(matches[0].trim());
228
228
  }
229
229
  catch (err) {
230
- throw new CanonicalProposerOutputInvalid(`failed to parse JSON inside patch block: ${err instanceof Error ? err.message : String(err)}`);
230
+ throw new EvolvingAgentOutputInvalid(`failed to parse JSON inside patch block: ${err instanceof Error ? err.message : String(err)}`);
231
231
  }
232
232
  if (!parsed || typeof parsed !== 'object') {
233
- throw new CanonicalProposerOutputInvalid('patch block must be a JSON object');
233
+ throw new EvolvingAgentOutputInvalid('patch block must be a JSON object');
234
234
  }
235
235
  const o = parsed;
236
236
  const rawEdits = o.edits;
237
237
  if (!Array.isArray(rawEdits)) {
238
- throw new CanonicalProposerOutputInvalid('patch block must contain an `edits` array');
238
+ throw new EvolvingAgentOutputInvalid('patch block must contain an `edits` array');
239
239
  }
240
240
  // Refusal shape: empty edits + a refusal string.
241
241
  const refusal = o.refusal;
@@ -244,7 +244,7 @@ export function parseEvolvingAgentResponse(text) {
244
244
  return { kind: 'refusal', reason: refusal.trim() };
245
245
  }
246
246
  // Empty edits with no refusal reason is a malformed no-op, not a refusal.
247
- throw new CanonicalProposerNoOp();
247
+ throw new EvolvingAgentNoOp();
248
248
  }
249
249
  // Concrete-edit shape: validate prediction + edit shapes.
250
250
  const prediction = parsePrediction(o.prediction);
@@ -253,7 +253,7 @@ export function parseEvolvingAgentResponse(text) {
253
253
  const relPath = e?.relPath;
254
254
  const content = e?.content;
255
255
  if (typeof relPath !== 'string' || typeof content !== 'string') {
256
- throw new CanonicalProposerOutputInvalid('edit must have string relPath and string content');
256
+ throw new EvolvingAgentOutputInvalid('edit must have string relPath and string content');
257
257
  }
258
258
  edits.push({ relPath: relPath.replace(/\\/g, '/'), content });
259
259
  }
@@ -262,17 +262,17 @@ export function parseEvolvingAgentResponse(text) {
262
262
  }
263
263
  function parsePrediction(raw) {
264
264
  if (!raw || typeof raw !== 'object') {
265
- throw new CanonicalProposerOutputInvalid('a concrete edit requires a `prediction` object {metric, direction, checkBy}');
265
+ throw new EvolvingAgentOutputInvalid('a concrete edit requires a `prediction` object {metric, direction, checkBy}');
266
266
  }
267
267
  const p = raw;
268
268
  if (typeof p.metric !== 'string' || !PREDICTION_METRICS.has(p.metric)) {
269
- throw new CanonicalProposerOutputInvalid("prediction.metric must be 'loss' | 'passRate' | 'healthPenalty'");
269
+ throw new EvolvingAgentOutputInvalid("prediction.metric must be 'loss' | 'passRate' | 'healthPenalty'");
270
270
  }
271
271
  if (p.direction !== 'down' && p.direction !== 'up') {
272
- throw new CanonicalProposerOutputInvalid("prediction.direction must be 'down' | 'up'");
272
+ throw new EvolvingAgentOutputInvalid("prediction.direction must be 'down' | 'up'");
273
273
  }
274
274
  if (typeof p.checkBy !== 'string' || p.checkBy.trim().length === 0) {
275
- throw new CanonicalProposerOutputInvalid('prediction.checkBy must be a non-empty string');
275
+ throw new EvolvingAgentOutputInvalid('prediction.checkBy must be a non-empty string');
276
276
  }
277
277
  return {
278
278
  metric: p.metric,
@@ -403,7 +403,7 @@ export async function runEvolvingAgent(opts) {
403
403
  const { episodeId, targetId } = opts;
404
404
  const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
405
405
  const maxRepairAttempts = Math.max(0, opts.maxRepairAttempts ?? 2);
406
- const timeoutMs = opts.timeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
406
+ const timeoutMs = opts.timeoutMs ?? resolveAgentTimeoutMs(opts.harness);
407
407
  // Fail closed: the episode must exist (and tells us nothing else we need yet).
408
408
  const episode = await readEpisode(repoRoot, episodeId);
409
409
  void episode;
@@ -473,8 +473,8 @@ export async function runEvolvingAgent(opts) {
473
473
  harness: opts.harness,
474
474
  });
475
475
  if (run.exitCode !== 0 || run.stdout.length === 0) {
476
- // Agent crash is NOT repaired (mirrors the proposer's invocation contract).
477
- throw new CanonicalProposerInvocationError(run.stderr);
476
+ // Agent crash is NOT repaired (the evolving agent's invocation contract).
477
+ throw new EvolvingAgentInvocationError(run.stderr);
478
478
  }
479
479
  try {
480
480
  const candidate = parseEvolvingAgentResponse(run.stdout);
@@ -483,12 +483,12 @@ export async function runEvolvingAgent(opts) {
483
483
  break;
484
484
  }
485
485
  // Static-shape edit: validate scope-to-target + frozen freeze here so a
486
- // bad path is a REPAIRABLE failure (same class as the proposer).
486
+ // bad path is a REPAIRABLE failure (the evolving agent's repair contract).
487
487
  validateCandidateEdits(candidate.edits, allowedFiles);
488
488
  // ≤ L budget (repairable).
489
489
  const changed = countChangedLines(candidate.edits, currentFiles);
490
490
  if (changed > editBudget) {
491
- throw new CanonicalProposerOutputInvalid(`edit changes ${changed} lines, over the ${editBudget}-line budget (L) — make a smaller, more targeted edit`);
491
+ throw new EvolvingAgentOutputInvalid(`edit changes ${changed} lines, over the ${editBudget}-line budget (L) — make a smaller, more targeted edit`);
492
492
  }
493
493
  // 范围⊆诊断 (gate-3, repairable).
494
494
  const scope = checkScopeWithinDiagnosis({
@@ -500,7 +500,7 @@ export async function runEvolvingAgent(opts) {
500
500
  const where = scope.violations
501
501
  .map((v) => `${v.file} §"${v.section}"`)
502
502
  .join(', ');
503
- throw new CanonicalProposerOutputInvalid(`edit touches sections outside the diagnosis (范围⊆诊断 violated): ${where} — only edit the diagnosed sections`);
503
+ throw new EvolvingAgentOutputInvalid(`edit touches sections outside the diagnosis (范围⊆诊断 violated): ${where} — only edit the diagnosed sections`);
504
504
  }
505
505
  // static guard (tool-evolution) — RUN INSIDE the repair loop so a
506
506
  // content-driven failure (missing rationale / validation evidence / diff)
@@ -517,14 +517,14 @@ export async function runEvolvingAgent(opts) {
517
517
  const errs = findings
518
518
  .filter((f) => f.severity === 'error')
519
519
  .map((f) => `${f.code}: ${f.message}`);
520
- throw new CanonicalProposerOutputInvalid(`static gate failed (score ${toolReport.score.toFixed(2)}): ${errs.join('; ') || 'score below threshold'}`);
520
+ throw new EvolvingAgentOutputInvalid(`static gate failed (score ${toolReport.score.toFixed(2)}): ${errs.join('; ') || 'score below threshold'}`);
521
521
  }
522
522
  parsed = candidate;
523
523
  scopeResult = scope;
524
524
  break;
525
525
  }
526
526
  catch (err) {
527
- if (err instanceof CanonicalProposerOutputInvalid && attempt < maxRepairAttempts) {
527
+ if (err instanceof EvolvingAgentOutputInvalid && attempt < maxRepairAttempts) {
528
528
  feedback = gateFeedback(err.message);
529
529
  continue;
530
530
  }
@@ -546,7 +546,7 @@ export async function runEvolvingAgent(opts) {
546
546
  // scopeResult was set alongside the accepted parse; reasserted defensively.
547
547
  if (!scopeResult || !scopeResult.pass) {
548
548
  // Unreachable on the accept path; fail closed rather than evolve out of scope.
549
- throw new CanonicalProposerOutputInvalid('范围⊆诊断 scope gate did not pass');
549
+ throw new EvolvingAgentOutputInvalid('范围⊆诊断 scope gate did not pass');
550
550
  }
551
551
  // ── 3. POST-LOOP GATE: observed-GREEN ───────────────────────────────────────
552
552
  // static / 范围⊆诊断 / budget / valid-prediction were all enforced inside the
@@ -556,11 +556,11 @@ export async function runEvolvingAgent(opts) {
556
556
  // would be a category error.
557
557
  const objective = await readMainArmObjective(repoRoot, episodeId);
558
558
  if (!objective) {
559
- throw new CanonicalProposerOutputInvalid('observed-GREEN gate: main-arm/objective.json is missing or unreadable — cannot confirm a verified green run');
559
+ throw new EvolvingAgentOutputInvalid('observed-GREEN gate: main-arm/objective.json is missing or unreadable — cannot confirm a verified green run');
560
560
  }
561
561
  const evidence = isArmObjectiveGreen(objective);
562
562
  if (!evidence.ok) {
563
- throw new CanonicalProposerOutputInvalid(`observed-GREEN gate failed: ${evidence.reason}`);
563
+ throw new EvolvingAgentOutputInvalid(`observed-GREEN gate failed: ${evidence.reason}`);
564
564
  }
565
565
  // ── 4. Write back the next policy version. NO candidate dir / sidecar / verdict. ─
566
566
  const ledgerEntry = await advancePolicyVersion({