npm - synergyspec-selfevolving - Versions diffs - 2.1.2 → 2.1.3 - Mend

synergyspec-selfevolving 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/dist/commands/learn.js +13 -3
package/dist/commands/self-evolution-episode.d.ts +6 -1
package/dist/commands/self-evolution-episode.js +8 -1
package/dist/commands/self-evolution.d.ts +2 -2
package/dist/commands/self-evolution.js +10 -10
package/dist/commands/workflow/status.js +5 -0
package/dist/core/change-readiness.d.ts +1 -1
package/dist/core/change-readiness.js +13 -5
package/dist/core/fitness/test-metrics.d.ts +33 -0
package/dist/core/fitness/test-metrics.js +67 -0
package/dist/core/learn.js +11 -2
package/dist/core/project-config.d.ts +3 -0
package/dist/core/project-config.js +7 -1
package/dist/core/self-evolution/critic-agent.js +13 -5
package/dist/core/self-evolution/edits-contract.d.ts +15 -5
package/dist/core/self-evolution/edits-contract.js +26 -16
package/dist/core/self-evolution/episode-orchestrator.d.ts +11 -6
package/dist/core/self-evolution/episode-orchestrator.js +88 -24
package/dist/core/self-evolution/episode-store.d.ts +34 -11
package/dist/core/self-evolution/episode-store.js +45 -10
package/dist/core/self-evolution/evolving-agent.d.ts +4 -4
package/dist/core/self-evolution/evolving-agent.js +26 -26
package/dist/core/self-evolution/host-harness.d.ts +68 -2
package/dist/core/self-evolution/host-harness.js +208 -21
package/dist/core/self-evolution/policy/policy-store.d.ts +8 -6
package/dist/core/self-evolution/policy/policy-store.js +124 -24
package/dist/core/self-evolution/proposer-slice.d.ts +4 -3
package/dist/core/self-evolution/reward-agent.d.ts +11 -1
package/dist/core/self-evolution/reward-agent.js +53 -20
package/dist/core/self-evolution/reward-aggregator.d.ts +18 -0
package/dist/core/self-evolution/reward-aggregator.js +53 -3
package/dist/core/self-evolution/reward-deepread.d.ts +64 -0
package/dist/core/self-evolution/reward-deepread.js +112 -0
package/dist/core/templates/workflows/learn.js +2 -1
package/dist/core/templates/workflows/self-evolving.js +5 -2
package/dist/core/trajectory/facts.d.ts +69 -2
package/dist/core/trajectory/facts.js +179 -10
package/dist/core/trajectory/skeleton.d.ts +10 -0
package/dist/core/trajectory/skeleton.js +24 -3
package/package.json +1 -1

package/dist/core/self-evolution/episode-orchestrator.js CHANGED Viewed

@@ -431,11 +431,21 @@ async function runEpisodeAfterCreate(opts) {
             // runEvolvingAgent reads the reject-buffer FRESH from disk (the entry just
             // written THIS episode is in its prompt). Never parallelized with (f).
             // 步长: after a rollback, shrink the edit budget (smaller step after a step
-            // that lost ground). 预测校准: pass the proposer's recent prediction record.
+            // that lost ground). 预测校准: pass the 演进智能体 EVOLVING AGENT's recent
+            // prediction record.
             const scheduledBudget = decision === 'rolled-back'
                 ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
                 : editBudget;
             const calibrationNote = await summarizeCalibration(repoRoot, targetId);
+            // Advance 'evolving' (with a heartbeat) BEFORE the spawn so a concurrent
+            // sibling reading the store sees a LIVE-but-slow holder, not a stale lock at
+            // 'kept'/'rolled-back'. runEvolvingAgent advances the terminal outcome.
+            await advanceEpisodeStage({
+                repoRoot,
+                episodeId,
+                stage: 'evolving',
+                patch: { evolvingHeartbeatAt: new Date().toISOString() },
+            });
             evolution = await runEvolvingAgent({
                 repoRoot,
                 episodeId,
@@ -477,16 +487,19 @@ async function runEpisodeAfterCreate(opts) {
  * Closable stages:
  *   - evolved | evolution-refused | abstained — the 演进智能体 EVOLVING AGENT
  *     reached a definite outcome (or the judge 弃权 abstained), the normal close.
- *   - kept | rolled-back — the 演进智能体 returned not-spawned (its diagnosis
+ *   - evolving — the 演进智能体 returned not-spawned (its diagnosis
  *     abstained-after-gap-check, no gaps, or the target resolved to no editable
- *     local files), so the episode never advanced past the decision. By the time
- *     this runs (AFTER runEvolvingAgent returned), a stage still at 'kept'/
- *     'rolled-back' can ONLY mean not-spawned — a success advances 'evolved', a
+ *     local files), so the episode never advanced past the 'evolving' marker.
+ *     By the time this runs (AFTER runEvolvingAgent returned), a stage still at
+ *     'evolving' can ONLY mean not-spawned — a success advances 'evolved', a
  *     refusal advances 'evolution-refused', and a throw is caught upstream and
  *     records 'errored' + rethrows so this close is never reached. So a leftover
- *     kept/rolled-back at close time IS the finished-nothing-to-evolve case and
- *     must close, not rest forever at a non-terminal stage (the exact ambiguity
- *     the 'errored' stage was meant to remove).
+ *     'evolving' at close time IS the finished-nothing-to-evolve case and must
+ *     close, not rest forever at a non-terminal stage (the exact ambiguity the
+ *     'errored' stage was meant to remove).
+ *   - kept | rolled-back — retained for back-compat: an OLD episode record (or a
+ *     code path that did not advance the 'evolving' marker) that returned
+ *     not-spawned never advances past the decision; close it the same way.
  *
  * Any other (genuinely non-closable) stage is left as-is rather than throwing, so
  * the close never masks the real episode outcome.
@@ -497,7 +510,10 @@ async function closeEpisodeBestEffort(repoRoot, episodeId) {
         'evolved',
         'evolution-refused',
         'abstained',
-        // not-spawned 演进智能体 leaves the episode here — close the finished episode.
+        // not-spawned 演进智能体 leaves the episode at the 'evolving' marker — close
+        // the finished episode. 'kept'/'rolled-back' retained for back-compat with
+        // an old record / a path that never advanced the marker.
+        'evolving',
         'kept',
         'rolled-back',
     ]);
@@ -586,14 +602,16 @@ async function ensureRejectBufferEntry(repoRoot, opts) {
  * done step rather than re-advancing a stage already entered:
  *
  *   - 'scored'                    → run the decision (f) then the 演进智能体 (g).
- *   - 'rolled-back' / 'kept'      → run the 演进智能体 EVOLVING AGENT (g) then close.
+ *   - 'rolled-back' / 'kept' / 'evolving' → run the 演进智能体 EVOLVING AGENT (g)
+ *                                   then close. ('evolving' means a crash AFTER the
+ *                                   marker but before the agent settled an outcome.)
  *   - 'evolved'/'evolution-refused'/'abstained' → close.
  *   - 'errored'                   → RE-DRIVE from the last GOOD pre-error stage
  *                                   (an episode may have errored on a TRANSIENT
  *                                   cause — a one-off git/analyzer/agent timeout).
  *                                   The pre-error stage is the last `stageHistory`
  *                                   entry that is NOT 'errored'; when it is one of
- *                                   {'scored','rolled-back','kept'} (the
+ *                                   {'scored','rolled-back','kept','evolving'} (the
  *                                   resume-entry stages) we advance errored → that
  *                                   stage and fall through to the normal dispatch.
  *                                   Otherwise the pre-error stage is not
@@ -617,7 +635,7 @@ export async function resumeEpisode(opts) {
     // for an 'errored' episode we attempt to RE-DRIVE from the last good pre-error
     // stage (a transient git/analyzer/agent failure should be retryable via an
     // operator resume). 'errored' stays terminal for every OTHER caller — only this
-    // resume path may re-drive it, via the errored → {scored,rolled-back,kept}
+    // resume path may re-drive it, via the errored → {scored,rolled-back,kept,evolving}
     // transitions the stage machine allows ONLY for operator recovery.
     let stage = ep.stage;
     if (ep.stage === 'errored') {
@@ -626,7 +644,8 @@ export async function resumeEpisode(opts) {
             .find((h) => h.stage !== 'errored')?.stage;
         if (preError === 'scored' ||
             preError === 'rolled-back' ||
-            preError === 'kept') {
+            preError === 'kept' ||
+            preError === 'evolving') {
             // Re-open the errored episode at its last auto-resumable stage, then fall
             // through to the normal dispatch for that stage.
             await advanceEpisodeStage({ repoRoot, episodeId, stage: preError });
@@ -636,7 +655,7 @@ export async function resumeEpisode(opts) {
         // 'baseline-skipped'); leave the episode at 'errored' and report it as-is.
     }
     // The decision (f) + 演进智能体 EVOLVING AGENT (g) re-runs below can THROW — a
-    // wedged/crashed host CLI (CanonicalProposerInvocationError), a timeout, or an
+    // wedged/crashed host CLI (EvolvingAgentInvocationError), a timeout, or an
     // observed-GREEN gate throw. UNCAUGHT, that leaves the episode DURABLY stuck at
     // a non-terminal stage ('scored'/'rolled-back'/'kept' — the orphan state fix ❷
     // eliminates for runEpisode). Record the SAME terminal 'errored' stage here
@@ -644,7 +663,31 @@ export async function resumeEpisode(opts) {
     // re-throw. Resume holds NO in-flight lock, so this is a durable-stage fix, not
     // a leak fix. Best-effort write: a failed record must not mask the original throw.
     try {
-        if (stage === 'scored') {
+        // TOCTOU guard: resume read the stage at entry (~L945), but it holds NO in-flight
+        // lock, so a CONCURRENT runEpisode for the same target can advance THIS episode to
+        // a TERMINAL stage between that read and the transitions below. Re-read the episode
+        // immediately before dispatching; if it is already finished, the transitions would
+        // throw an illegal-transition error (which the catch below would then mis-record as
+        // a fresh 'errored'). Short-circuit instead: report the already-finished episode
+        // via the normal completion return. (The errored→pre-error re-drive above already
+        // turned a re-drivable 'errored' into a non-terminal stage, so a stage that is
+        // STILL terminal here is genuinely finished, not auto-resumable.)
+        const TERMINAL_STAGES = new Set([
+            'closed',
+            'errored',
+            'evolution-refused',
+            'evolved',
+            'abstained',
+        ]);
+        const fresh = await readEpisode(repoRoot, episodeId);
+        stage = fresh.stage;
+        if (TERMINAL_STAGES.has(stage)) {
+            // 'evolved'/'evolution-refused'/'abstained' still want their best-effort close;
+            // 'closed'/'errored' are no-ops for closeEpisodeBestEffort. No transition is
+            // attempted, so the race cannot surface as an illegal-transition throw.
+            await closeEpisodeBestEffort(repoRoot, episodeId);
+        }
+        else if (stage === 'scored') {
             // Re-run the decision (f) from the on-disk diagnosis, then (g).
             const diagnosis = await readDiagnosisForResume(repoRoot, episodeId);
             if (shouldSkipEvolution(diagnosis)) {
@@ -719,6 +762,15 @@ export async function resumeEpisode(opts) {
                     ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
                     : editBudget;
                 const calibrationNote = await summarizeCalibration(repoRoot, targetId);
+                // Advance the 'evolving' marker (heartbeat) before the spawn, mirroring
+                // runEpisode's (g). Idempotent across a crash-resume: 'evolving' is reached
+                // from both 'rolled-back' and 'kept'.
+                await advanceEpisodeStage({
+                    repoRoot,
+                    episodeId,
+                    stage: 'evolving',
+                    patch: { evolvingHeartbeatAt: new Date().toISOString() },
+                });
                 evolution = await runEvolvingAgent({
                     repoRoot,
                     episodeId,
@@ -732,7 +784,7 @@ export async function resumeEpisode(opts) {
             }
             await closeEpisodeBestEffort(repoRoot, episodeId);
         }
-        else if (stage === 'rolled-back' || stage === 'kept') {
+        else if (stage === 'rolled-back' || stage === 'kept' || stage === 'evolving') {
             // The decision already ran (and the original episode settled the prediction);
             // re-settle idempotently for the crash window, then schedule + calibrate.
             try {
@@ -741,10 +793,26 @@ export async function resumeEpisode(opts) {
             catch {
                 // best-effort: advisory only
             }
-            const scheduledBudget = stage === 'rolled-back'
+            // Resuming from 'evolving' means the decision is in history (not the resume
+            // stage); read it from stageHistory so the 步长 schedule still shrinks after a
+            // rollback. Resuming from 'rolled-back'/'kept' uses the resume stage directly.
+            const wasRolledBack = stage === 'rolled-back' ||
+                (stage === 'evolving' && ep.stageHistory.some((h) => h.stage === 'rolled-back'));
+            const scheduledBudget = wasRolledBack
                 ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
                 : editBudget;
             const calibrationNote = await summarizeCalibration(repoRoot, targetId);
+            // Advance the 'evolving' marker before the spawn when resuming from the
+            // decision stage. When already at 'evolving' (a crash mid-spawn re-drive),
+            // the marker is present — skip the (now illegal) self-transition.
+            if (stage !== 'evolving') {
+                await advanceEpisodeStage({
+                    repoRoot,
+                    episodeId,
+                    stage: 'evolving',
+                    patch: { evolvingHeartbeatAt: new Date().toISOString() },
+                });
+            }
             evolution = await runEvolvingAgent({
                 repoRoot,
                 episodeId,
@@ -757,13 +825,9 @@ export async function resumeEpisode(opts) {
             });
             await closeEpisodeBestEffort(repoRoot, episodeId);
         }
-        else if (stage === 'evolved' ||
-            stage === 'evolution-refused' ||
-            stage === 'abstained') {
-            await closeEpisodeBestEffort(repoRoot, episodeId);
-        }
-        // earlier stages (and a non-auto-resumable 'errored'): not auto-resumable here
-        // — reported as-is.
+        // Terminal stages (incl. a non-auto-resumable 'errored') are handled by the
+        // TOCTOU guard above; earlier stages are not auto-resumable here — reported
+        // as-is via the completion return below.
     }
     catch (err) {
         // A thrown decision/evolving step records a DURABLE terminal 'errored' stage so

package/dist/core/self-evolution/episode-store.d.ts CHANGED Viewed

@@ -57,10 +57,11 @@
  *     -> (baseline-arm-captured | baseline-skipped)   // CRITIC AGENT（基线智能体 baseline agent）arm
  *     -> scored                                       // 奖励智能体 REWARD AGENT wrote diagnosis.json
  *     -> (rolled-back | kept)                         // rollback decision on the main arm's edits
+ *     -> evolving                                     // 演进智能体 EVOLVING AGENT holds the in-flight lock
  *     -> (evolved | evolution-refused | abstained)    // 演进智能体 EVOLVING AGENT outcome
  *     -> closed                                       // terminal
  *
- *   (rolled-back | kept)
+ *   (rolled-back | kept | evolving)
  *     -> closed                                       // terminal — see below
  *
  *   (any non-terminal stage)
@@ -73,12 +74,23 @@
  * 弃权 abstains when no nameable gap → no rollback decision needed → the
  * 演进智能体 EVOLVING AGENT is never spawned.
  *
- * `rolled-back`/`kept` may also reach `closed` DIRECTLY: when the 演进智能体
- * EVOLVING AGENT was NOT spawned (its diagnosis abstained-after-gap-check, named
- * no gaps, or the target resolved to no editable local files) the episode never
- * advances past the decision, so the orchestrator's best-effort close terminates
- * the finished-nothing-to-evolve episode rather than leaving it resting forever
- * at a non-terminal stage.
+ * `evolving` is advanced by the orchestrator BEFORE it spawns the 演进智能体
+ * EVOLVING AGENT, while that agent holds the in-flight lock. It exists so a
+ * concurrent sibling reading the store distinguishes a LIVE-but-slow holder
+ * (stage `evolving`) from an episode that merely reached the decision (`kept`/
+ * `rolled-back`) — without it the stage stays `kept` for the whole evolving
+ * spawn, and a sibling can misread a running holder as stale. The
+ * `evolvingHeartbeatAt` field records when the stage was entered. Old episode
+ * records that predate this stage never carry it; they resume exactly as before
+ * (the `rolled-back`/`kept` → outcome transitions are retained for them).
+ *
+ * `rolled-back`/`kept`/`evolving` may also reach `closed` DIRECTLY: when the
+ * 演进智能体 EVOLVING AGENT was NOT spawned (its diagnosis abstained-after-gap-
+ * check, named no gaps, or the target resolved to no editable local files) the
+ * episode never advances past the decision (it stays `kept`/`rolled-back`, or —
+ * for the not-spawned-after-evolving-marker case — `evolving`), so the
+ * orchestrator's best-effort close terminates the finished-nothing-to-evolve
+ * episode rather than leaving it resting forever at a non-terminal stage.
  *
  * `errored` is a SECOND terminal stage reachable from EVERY non-terminal stage.
  * A thrown step — an agent spawn that crashes or times out (主智能体 MAIN AGENT /
@@ -92,10 +104,10 @@
  * `errored` is terminal for every target EXCEPT an operator-driven resume: a
  * transient cause (a one-off git/analyzer/agent timeout) is retryable, so an
  * `episode resume` may RE-DRIVE an errored episode back to its last good
- * pre-error stage — `errored -> {scored, rolled-back, kept}` (the resume-entry
- * stages). No other caller may leave `errored`.
+ * pre-error stage — `errored -> {scored, rolled-back, kept, evolving}` (the
+ * resume-entry stages). No other caller may leave `errored`.
  */
-export type EpisodeStage = 'created' | 'main-arm-captured' | 'baseline-arm-captured' | 'baseline-skipped' | 'scored' | 'rolled-back' | 'kept' | 'evolved' | 'evolution-refused' | 'abstained' | 'closed' | 'errored';
+export type EpisodeStage = 'created' | 'main-arm-captured' | 'baseline-arm-captured' | 'baseline-skipped' | 'scored' | 'rolled-back' | 'kept' | 'evolving' | 'evolved' | 'evolution-refused' | 'abstained' | 'closed' | 'errored';
 /**
  * Iterable list of every legal {@link EpisodeStage} value. Order follows the
  * documented state machine for readability, not behavior.
@@ -143,6 +155,14 @@ export interface EpisodeRecord {
     stageHistory: EpisodeStageHistoryEntry[];
     /** Why the baseline arm was skipped (set with stage `baseline-skipped`). */
     baselineSkippedReason?: string;
+    /**
+     * ISO 8601 UTC timestamp the episode entered the `evolving` stage (the moment
+     * the 演进智能体 EVOLVING AGENT spawn began holding the in-flight lock). A
+     * heartbeat for liveness reads — a concurrent sibling can tell a recently-
+     * entered `evolving` holder apart from one that genuinely wedged. Absent on
+     * old records (and on every stage before `evolving`).
+     */
+    evolvingHeartbeatAt?: string;
     /** advantage ＝ reward(主臂) − reward(基线臂); null when the 奖励智能体 REWARD AGENT 弃权 abstained. */
     advantage?: number | null;
     /**
@@ -163,6 +183,8 @@ export interface EpisodeStagePatch {
     advantage?: number | null;
     /** Cause note merged alongside the terminal `errored` stage. */
     terminalError?: string;
+    /** Heartbeat timestamp merged alongside the `evolving` stage. */
+    evolvingHeartbeatAt?: string;
 }
 /**
  * True iff `(from -> to)` is a legal transition in the episode stage machine.
@@ -234,7 +256,8 @@ export interface AdvanceEpisodeStageOptions {
  *   advancing to a stage not reachable from the current one throws.
  * - Appends `{stage, at}` to `stageHistory`.
  * - Merges the allowlisted `patch` fields (`policyVersionBaseline`,
- *   `baselineSkippedReason`, `advantage`, `terminalError`) in the same write.
+ *   `baselineSkippedReason`, `advantage`, `terminalError`, `evolvingHeartbeatAt`)
+ *   in the same write.
  * - Bumps `updatedAt`.
  */
 export declare function advanceEpisodeStage(opts: AdvanceEpisodeStageOptions): Promise<EpisodeRecord>;

package/dist/core/self-evolution/episode-store.js CHANGED Viewed

@@ -61,6 +61,7 @@ export const EPISODE_STAGES = [
     'scored',
     'rolled-back',
     'kept',
+    'evolving',
     'evolved',
     'evolution-refused',
     'abstained',
@@ -77,8 +78,9 @@ const EPISODE_ID_PATTERN = /^[a-z0-9][a-z0-9-]*$/;
 // step: agent spawn crash/timeout or un-repairable gate), so a failed episode
 // is never orphaned mid-flight. `closed` and `errored` are the two terminals;
 // `errored` is terminal EXCEPT for an operator resume re-drive back to its last
-// good pre-error stage (scored/rolled-back/kept). `rolled-back`/`kept` may also
-// close directly (the not-spawned 演进智能体 finished-nothing-to-evolve case).
+// good pre-error stage (scored/rolled-back/kept/evolving). `rolled-back`/`kept`/
+// `evolving` may also close directly (the not-spawned 演进智能体
+// finished-nothing-to-evolve case).
 const LEGAL_STAGE_TRANSITIONS = new Map([
     ['created', new Set(['main-arm-captured', 'errored'])],
     [
@@ -91,15 +93,38 @@ const LEGAL_STAGE_TRANSITIONS = new Map([
     // abstained, so no rollback decision is needed and the 演进智能体
     // EVOLVING AGENT is never spawned.
     ['scored', new Set(['rolled-back', 'kept', 'abstained', 'errored'])],
-    // 'rolled-back'/'kept' may also reach 'closed' DIRECTLY when the 演进智能体
-    // EVOLVING AGENT was not-spawned (the finished-nothing-to-evolve case), so the
-    // episode never rests forever at a non-terminal stage.
+    // 'rolled-back'/'kept' advance to 'evolving' BEFORE the 演进智能体 EVOLVING
+    // AGENT spawn (so a sibling can tell a live holder from a stale lock). They
+    // also retain the DIRECT transitions to the evolving outcomes + 'closed' so
+    // (a) an OLD episode record resumed from 'rolled-back'/'kept' (no 'evolving'
+    // stage) behaves exactly as before, and (b) the not-spawned
+    // finished-nothing-to-evolve case can still close directly.
     [
         'rolled-back',
-        new Set(['evolved', 'evolution-refused', 'abstained', 'closed', 'errored']),
+        new Set([
+            'evolving',
+            'evolved',
+            'evolution-refused',
+            'abstained',
+            'closed',
+            'errored',
+        ]),
     ],
     [
         'kept',
+        new Set([
+            'evolving',
+            'evolved',
+            'evolution-refused',
+            'abstained',
+            'closed',
+            'errored',
+        ]),
+    ],
+    // The 演进智能体 EVOLVING AGENT outcome (or a not-spawned close), or 'errored'
+    // on a thrown spawn/gate.
+    [
+        'evolving',
         new Set(['evolved', 'evolution-refused', 'abstained', 'closed', 'errored']),
     ],
     ['evolved', new Set(['closed'])],
@@ -107,8 +132,9 @@ const LEGAL_STAGE_TRANSITIONS = new Map([
     ['abstained', new Set(['closed'])],
     ['closed', new Set()],
     // 'errored' is terminal EXCEPT for an operator resume re-drive back to the
-    // last good pre-error stage (scored/rolled-back/kept); no other caller leaves it.
-    ['errored', new Set(['scored', 'rolled-back', 'kept'])],
+    // last good pre-error stage (scored/rolled-back/kept/evolving); no other
+    // caller leaves it.
+    ['errored', new Set(['scored', 'rolled-back', 'kept', 'evolving'])],
 ]);
 /**
  * True iff `(from -> to)` is a legal transition in the episode stage machine.
@@ -387,6 +413,7 @@ const ALLOWED_PATCH_KEYS = new Set([
     'baselineSkippedReason',
     'advantage',
     'terminalError',
+    'evolvingHeartbeatAt',
 ]);
 /** Validate an {@link EpisodeStagePatch} fail-closed; returns the merge slice. */
 function validateStagePatch(patch, episodeId) {
@@ -394,7 +421,7 @@ function validateStagePatch(patch, episodeId) {
     for (const key of Object.keys(patch)) {
         if (!ALLOWED_PATCH_KEYS.has(key)) {
             throw new Error(`Illegal episode patch field for ${episodeId}: "${key}" ` +
-                `(allowed: policyVersionBaseline, baselineSkippedReason, advantage, terminalError)`);
+                `(allowed: policyVersionBaseline, baselineSkippedReason, advantage, terminalError, evolvingHeartbeatAt)`);
         }
     }
     if ('policyVersionBaseline' in patch) {
@@ -425,6 +452,13 @@ function validateStagePatch(patch, episodeId) {
         }
         merge.terminalError = v;
     }
+    if ('evolvingHeartbeatAt' in patch) {
+        const v = patch.evolvingHeartbeatAt;
+        if (typeof v !== 'string' || v.length === 0) {
+            throw new Error(`Invalid patch for ${episodeId}: evolvingHeartbeatAt must be a non-empty string`);
+        }
+        merge.evolvingHeartbeatAt = v;
+    }
     return merge;
 }
 /**
@@ -436,7 +470,8 @@ function validateStagePatch(patch, episodeId) {
  *   advancing to a stage not reachable from the current one throws.
  * - Appends `{stage, at}` to `stageHistory`.
  * - Merges the allowlisted `patch` fields (`policyVersionBaseline`,
- *   `baselineSkippedReason`, `advantage`, `terminalError`) in the same write.
+ *   `baselineSkippedReason`, `advantage`, `terminalError`, `evolvingHeartbeatAt`)
+ *   in the same write.
  * - Bumps `updatedAt`.
  */
 export async function advanceEpisodeStage(opts) {

package/dist/core/self-evolution/evolving-agent.d.ts CHANGED Viewed

@@ -104,7 +104,7 @@ export interface AssembleEvolvingAgentPromptInput {
     /** Pre-rendered DO-NOT-PRUNE block (成功保护). Omitted when empty. */
     doNotPrune?: string;
     /**
-     * One-line 预测校准 prediction-calibration note: the proposer's recent
+     * One-line 预测校准 prediction-calibration note: the evolving agent's recent
      * checkable predictions' hit/miss record, settled by later measurements.
      * Read-only context (it never scores); omitted when there is no settled
      * prediction history, so prompts on early episodes stay byte-identical.
@@ -136,9 +136,9 @@ export type ParsedEvolvingAgentResponse = EvolvingAgentRefusal | EvolvingAgentEd
 /**
  * Parse the model's single `json:patch` block. Accepts EITHER the refusal shape
  * (`{edits: [], refusal: string}`) OR a concrete edit (`{rationale, prediction,
- * edits[]}`). Throws {@link CanonicalProposerOutputInvalid} on a malformed
+ * edits[]}`). Throws {@link EvolvingAgentOutputInvalid} on a malformed
  * block, the wrong block count, a missing/invalid prediction, or
- * {@link CanonicalProposerNoOp} on empty edits WITHOUT a refusal reason.
+ * {@link EvolvingAgentNoOp} on empty edits WITHOUT a refusal reason.
  *
  * Edits are NOT yet scope-validated here (the caller runs the static gate over
  * them); this only enforces the SHAPE of the contract.
@@ -181,7 +181,7 @@ export interface RunEvolvingAgentOptions {
      */
     exemplarPaths?: string[];
     /**
-     * One-line 预测校准 prediction-calibration note surfaced to the proposer
+     * One-line 预测校准 prediction-calibration note surfaced to the evolving agent
      * (read-only, advisory). Defaults to absent; the orchestrator computes it from
      * the prediction-reconcile ledger via `summarizeCalibration`.
      */

package/dist/core/self-evolution/evolving-agent.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { promises as fs } from 'node:fs';
 import * as path from 'node:path';
-import { runHeadlessAgent, DEFAULT_AGENT_TIMEOUT_MS, } from './host-harness.js';
+import { runHeadlessAgent, resolveAgentTimeoutMs, } from './host-harness.js';
 import { evaluateToolEvolutionCandidate, } from './tool-evolution.js';
-import { validateCandidateEdits, CanonicalProposerNoOp, CanonicalProposerOutputInvalid, CanonicalProposerInvocationError, renderUnifiedDiff, } from './edits-contract.js';
+import { validateCandidateEdits, EvolvingAgentNoOp, EvolvingAgentOutputInvalid, EvolvingAgentInvocationError, renderUnifiedDiff, } from './edits-contract.js';
 import { requireCanonicalTarget } from './canonical-targets.js';
 import { resolveTargetLocalFiles } from './local-targets.js';
 import { renderDoNotPruneBlock, readProtections, listExemplarFiles, } from './success-channel.js';
@@ -203,9 +203,9 @@ const PREDICTION_METRICS = new Set(['loss', 'passRate', 'healthPenalty']);
 /**
  * Parse the model's single `json:patch` block. Accepts EITHER the refusal shape
  * (`{edits: [], refusal: string}`) OR a concrete edit (`{rationale, prediction,
- * edits[]}`). Throws {@link CanonicalProposerOutputInvalid} on a malformed
+ * edits[]}`). Throws {@link EvolvingAgentOutputInvalid} on a malformed
  * block, the wrong block count, a missing/invalid prediction, or
- * {@link CanonicalProposerNoOp} on empty edits WITHOUT a refusal reason.
+ * {@link EvolvingAgentNoOp} on empty edits WITHOUT a refusal reason.
  *
  * Edits are NOT yet scope-validated here (the caller runs the static gate over
  * them); this only enforces the SHAPE of the contract.
@@ -217,25 +217,25 @@ export function parseEvolvingAgentResponse(text) {
     while ((m = fenceRe.exec(text)) !== null)
         matches.push(m[1]);
     if (matches.length === 0) {
-        throw new CanonicalProposerOutputInvalid('no `json:patch` fenced block found in response');
+        throw new EvolvingAgentOutputInvalid('no `json:patch` fenced block found in response');
     }
     if (matches.length > 1) {
-        throw new CanonicalProposerOutputInvalid(`expected exactly 1 \`json:patch\` block, found ${matches.length}`);
+        throw new EvolvingAgentOutputInvalid(`expected exactly 1 \`json:patch\` block, found ${matches.length}`);
     }
     let parsed;
     try {
         parsed = JSON.parse(matches[0].trim());
     }
     catch (err) {
-        throw new CanonicalProposerOutputInvalid(`failed to parse JSON inside patch block: ${err instanceof Error ? err.message : String(err)}`);
+        throw new EvolvingAgentOutputInvalid(`failed to parse JSON inside patch block: ${err instanceof Error ? err.message : String(err)}`);
     }
     if (!parsed || typeof parsed !== 'object') {
-        throw new CanonicalProposerOutputInvalid('patch block must be a JSON object');
+        throw new EvolvingAgentOutputInvalid('patch block must be a JSON object');
     }
     const o = parsed;
     const rawEdits = o.edits;
     if (!Array.isArray(rawEdits)) {
-        throw new CanonicalProposerOutputInvalid('patch block must contain an `edits` array');
+        throw new EvolvingAgentOutputInvalid('patch block must contain an `edits` array');
     }
     // Refusal shape: empty edits + a refusal string.
     const refusal = o.refusal;
@@ -244,7 +244,7 @@ export function parseEvolvingAgentResponse(text) {
             return { kind: 'refusal', reason: refusal.trim() };
         }
         // Empty edits with no refusal reason is a malformed no-op, not a refusal.
-        throw new CanonicalProposerNoOp();
+        throw new EvolvingAgentNoOp();
     }
     // Concrete-edit shape: validate prediction + edit shapes.
     const prediction = parsePrediction(o.prediction);
@@ -253,7 +253,7 @@ export function parseEvolvingAgentResponse(text) {
         const relPath = e?.relPath;
         const content = e?.content;
         if (typeof relPath !== 'string' || typeof content !== 'string') {
-            throw new CanonicalProposerOutputInvalid('edit must have string relPath and string content');
+            throw new EvolvingAgentOutputInvalid('edit must have string relPath and string content');
         }
         edits.push({ relPath: relPath.replace(/\\/g, '/'), content });
     }
@@ -262,17 +262,17 @@ export function parseEvolvingAgentResponse(text) {
 }
 function parsePrediction(raw) {
     if (!raw || typeof raw !== 'object') {
-        throw new CanonicalProposerOutputInvalid('a concrete edit requires a `prediction` object {metric, direction, checkBy}');
+        throw new EvolvingAgentOutputInvalid('a concrete edit requires a `prediction` object {metric, direction, checkBy}');
     }
     const p = raw;
     if (typeof p.metric !== 'string' || !PREDICTION_METRICS.has(p.metric)) {
-        throw new CanonicalProposerOutputInvalid("prediction.metric must be 'loss' | 'passRate' | 'healthPenalty'");
+        throw new EvolvingAgentOutputInvalid("prediction.metric must be 'loss' | 'passRate' | 'healthPenalty'");
     }
     if (p.direction !== 'down' && p.direction !== 'up') {
-        throw new CanonicalProposerOutputInvalid("prediction.direction must be 'down' | 'up'");
+        throw new EvolvingAgentOutputInvalid("prediction.direction must be 'down' | 'up'");
     }
     if (typeof p.checkBy !== 'string' || p.checkBy.trim().length === 0) {
-        throw new CanonicalProposerOutputInvalid('prediction.checkBy must be a non-empty string');
+        throw new EvolvingAgentOutputInvalid('prediction.checkBy must be a non-empty string');
     }
     return {
         metric: p.metric,
@@ -403,7 +403,7 @@ export async function runEvolvingAgent(opts) {
     const { episodeId, targetId } = opts;
     const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
     const maxRepairAttempts = Math.max(0, opts.maxRepairAttempts ?? 2);
-    const timeoutMs = opts.timeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
+    const timeoutMs = opts.timeoutMs ?? resolveAgentTimeoutMs(opts.harness);
     // Fail closed: the episode must exist (and tells us nothing else we need yet).
     const episode = await readEpisode(repoRoot, episodeId);
     void episode;
@@ -473,8 +473,8 @@ export async function runEvolvingAgent(opts) {
             harness: opts.harness,
         });
         if (run.exitCode !== 0 || run.stdout.length === 0) {
-            // Agent crash is NOT repaired (mirrors the proposer's invocation contract).
-            throw new CanonicalProposerInvocationError(run.stderr);
+            // Agent crash is NOT repaired (the evolving agent's invocation contract).
+            throw new EvolvingAgentInvocationError(run.stderr);
         }
         try {
             const candidate = parseEvolvingAgentResponse(run.stdout);
@@ -483,12 +483,12 @@ export async function runEvolvingAgent(opts) {
                 break;
             }
             // Static-shape edit: validate scope-to-target + frozen freeze here so a
-            // bad path is a REPAIRABLE failure (same class as the proposer).
+            // bad path is a REPAIRABLE failure (the evolving agent's repair contract).
             validateCandidateEdits(candidate.edits, allowedFiles);
             // ≤ L budget (repairable).
             const changed = countChangedLines(candidate.edits, currentFiles);
             if (changed > editBudget) {
-                throw new CanonicalProposerOutputInvalid(`edit changes ${changed} lines, over the ${editBudget}-line budget (L) — make a smaller, more targeted edit`);
+                throw new EvolvingAgentOutputInvalid(`edit changes ${changed} lines, over the ${editBudget}-line budget (L) — make a smaller, more targeted edit`);
             }
             // 范围⊆诊断 (gate-3, repairable).
             const scope = checkScopeWithinDiagnosis({
@@ -500,7 +500,7 @@ export async function runEvolvingAgent(opts) {
                 const where = scope.violations
                     .map((v) => `${v.file} §"${v.section}"`)
                     .join(', ');
-                throw new CanonicalProposerOutputInvalid(`edit touches sections outside the diagnosis (范围⊆诊断 violated): ${where} — only edit the diagnosed sections`);
+                throw new EvolvingAgentOutputInvalid(`edit touches sections outside the diagnosis (范围⊆诊断 violated): ${where} — only edit the diagnosed sections`);
             }
             // static guard (tool-evolution) — RUN INSIDE the repair loop so a
             // content-driven failure (missing rationale / validation evidence / diff)
@@ -517,14 +517,14 @@ export async function runEvolvingAgent(opts) {
                 const errs = findings
                     .filter((f) => f.severity === 'error')
                     .map((f) => `${f.code}: ${f.message}`);
-                throw new CanonicalProposerOutputInvalid(`static gate failed (score ${toolReport.score.toFixed(2)}): ${errs.join('; ') || 'score below threshold'}`);
+                throw new EvolvingAgentOutputInvalid(`static gate failed (score ${toolReport.score.toFixed(2)}): ${errs.join('; ') || 'score below threshold'}`);
             }
             parsed = candidate;
             scopeResult = scope;
             break;
         }
         catch (err) {
-            if (err instanceof CanonicalProposerOutputInvalid && attempt < maxRepairAttempts) {
+            if (err instanceof EvolvingAgentOutputInvalid && attempt < maxRepairAttempts) {
                 feedback = gateFeedback(err.message);
                 continue;
             }
@@ -546,7 +546,7 @@ export async function runEvolvingAgent(opts) {
     // scopeResult was set alongside the accepted parse; reasserted defensively.
     if (!scopeResult || !scopeResult.pass) {
         // Unreachable on the accept path; fail closed rather than evolve out of scope.
-        throw new CanonicalProposerOutputInvalid('范围⊆诊断 scope gate did not pass');
+        throw new EvolvingAgentOutputInvalid('范围⊆诊断 scope gate did not pass');
     }
     // ── 3. POST-LOOP GATE: observed-GREEN ───────────────────────────────────────
     // static / 范围⊆诊断 / budget / valid-prediction were all enforced inside the
@@ -556,11 +556,11 @@ export async function runEvolvingAgent(opts) {
     // would be a category error.
     const objective = await readMainArmObjective(repoRoot, episodeId);
     if (!objective) {
-        throw new CanonicalProposerOutputInvalid('observed-GREEN gate: main-arm/objective.json is missing or unreadable — cannot confirm a verified green run');
+        throw new EvolvingAgentOutputInvalid('observed-GREEN gate: main-arm/objective.json is missing or unreadable — cannot confirm a verified green run');
     }
     const evidence = isArmObjectiveGreen(objective);
     if (!evidence.ok) {
-        throw new CanonicalProposerOutputInvalid(`observed-GREEN gate failed: ${evidence.reason}`);
+        throw new EvolvingAgentOutputInvalid(`observed-GREEN gate failed: ${evidence.reason}`);
     }
     // ── 4. Write back the next policy version. NO candidate dir / sidecar / verdict. ─
     const ledgerEntry = await advancePolicyVersion({