npm - synergyspec-selfevolving - Versions diffs - 2.1.2 → 2.1.4 - Mend

synergyspec-selfevolving 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/dist/commands/learn.js +13 -3
package/dist/commands/self-evolution-episode.d.ts +6 -1
package/dist/commands/self-evolution-episode.js +8 -1
package/dist/commands/self-evolution.d.ts +2 -2
package/dist/commands/self-evolution.js +10 -10
package/dist/commands/workflow/status.js +5 -0
package/dist/core/change-readiness.d.ts +1 -1
package/dist/core/change-readiness.js +66 -11
package/dist/core/fitness/test-metrics.d.ts +33 -0
package/dist/core/fitness/test-metrics.js +67 -0
package/dist/core/learn.js +11 -2
package/dist/core/project-config.d.ts +3 -0
package/dist/core/project-config.js +7 -1
package/dist/core/self-evolution/critic-agent.js +13 -5
package/dist/core/self-evolution/edits-contract.d.ts +15 -5
package/dist/core/self-evolution/edits-contract.js +26 -16
package/dist/core/self-evolution/episode-orchestrator.d.ts +16 -9
package/dist/core/self-evolution/episode-orchestrator.js +126 -35
package/dist/core/self-evolution/episode-store.d.ts +34 -11
package/dist/core/self-evolution/episode-store.js +45 -10
package/dist/core/self-evolution/evolving-agent.d.ts +12 -12
package/dist/core/self-evolution/evolving-agent.js +46 -48
package/dist/core/self-evolution/host-harness.d.ts +68 -2
package/dist/core/self-evolution/host-harness.js +208 -21
package/dist/core/self-evolution/policy/policy-store.d.ts +8 -6
package/dist/core/self-evolution/policy/policy-store.js +124 -24
package/dist/core/self-evolution/proposer-slice.d.ts +4 -3
package/dist/core/self-evolution/reward-agent.d.ts +11 -1
package/dist/core/self-evolution/reward-agent.js +53 -20
package/dist/core/self-evolution/reward-aggregator.d.ts +18 -0
package/dist/core/self-evolution/reward-aggregator.js +53 -3
package/dist/core/self-evolution/reward-deepread.d.ts +64 -0
package/dist/core/self-evolution/reward-deepread.js +112 -0
package/dist/core/templates/workflows/learn.js +3 -2
package/dist/core/templates/workflows/self-evolving.js +5 -2
package/dist/core/trajectory/facts.d.ts +69 -2
package/dist/core/trajectory/facts.js +179 -10
package/dist/core/trajectory/skeleton.d.ts +10 -0
package/dist/core/trajectory/skeleton.js +24 -3
package/package.json +4 -3
package/schemas/spec-driven/templates/design.md +2 -1

package/dist/core/self-evolution/edits-contract.js CHANGED Viewed

@@ -14,26 +14,36 @@
  * canonical file.
  */
 import { GATE_DEFINING_FILES } from './candidate-gates.js';
-export class CanonicalProposerOutputInvalid extends Error {
+export class EvolvingAgentOutputInvalid extends Error {
     constructor(message) {
-        super(`canonical proposer output invalid: ${message}`);
-        this.name = 'CanonicalProposerOutputInvalid';
+        super(`evolving agent output invalid: ${message}`);
+        this.name = 'EvolvingAgentOutputInvalid';
     }
 }
 /** The model declined to edit anything (empty edits). Not an error — a no-op. */
-export class CanonicalProposerNoOp extends Error {
+export class EvolvingAgentNoOp extends Error {
     constructor() {
-        super('canonical proposer returned no edits');
-        this.name = 'CanonicalProposerNoOp';
+        super('evolving agent returned no edits');
+        this.name = 'EvolvingAgentNoOp';
     }
 }
 /** The headless agent invocation itself failed (crash / empty output). */
-export class CanonicalProposerInvocationError extends Error {
+export class EvolvingAgentInvocationError extends Error {
     constructor(stderr) {
-        super(`canonical proposer invocation failed: ${stderr}`);
-        this.name = 'CanonicalProposerInvocationError';
+        super(`evolving agent invocation failed: ${stderr}`);
+        this.name = 'EvolvingAgentInvocationError';
     }
 }
+/**
+ * @deprecated v2.0.0 removed the GA "canonical proposer"; these names are
+ * retained only as transitional aliases for any external importer. Use the
+ * `EvolvingAgent*` classes — they are the same constructors.
+ */
+export const CanonicalProposerOutputInvalid = EvolvingAgentOutputInvalid;
+/** @deprecated alias of {@link EvolvingAgentNoOp}. */
+export const CanonicalProposerNoOp = EvolvingAgentNoOp;
+/** @deprecated alias of {@link EvolvingAgentInvocationError}. */
+export const CanonicalProposerInvocationError = EvolvingAgentInvocationError;
 /**
  * Validate already-structured candidate edits against the allowed (target-
  * scoped) file set and the frozen gate-defining files. Author-agnostic: this is
@@ -44,33 +54,33 @@ export class CanonicalProposerInvocationError extends Error {
  * the loop-v2 演进智能体 EVOLVING AGENT call this so their safety contract is
  * byte-identical. relPaths are normalized to POSIX separators.
  *
- * Throws {@link CanonicalProposerNoOp} when `rawEdits` is empty and
- * {@link CanonicalProposerOutputInvalid} for any shape / frozen / scope
+ * Throws {@link EvolvingAgentNoOp} when `rawEdits` is empty and
+ * {@link EvolvingAgentOutputInvalid} for any shape / frozen / scope
  * violation. Path traversal and absolute paths are rejected transitively: they
  * can never be a member of `allowedFiles`, so they fail the scope check.
  */
 export function validateCandidateEdits(rawEdits, allowedFiles) {
     if (rawEdits.length === 0) {
-        throw new CanonicalProposerNoOp();
+        throw new EvolvingAgentNoOp();
     }
     const allowed = new Set(allowedFiles.map((p) => p.replace(/\\/g, '/')));
     const frozen = new Set(GATE_DEFINING_FILES.map((p) => p.replace(/\\/g, '/')));
     const validated = [];
     for (const e of rawEdits) {
         if (!e || typeof e !== 'object') {
-            throw new CanonicalProposerOutputInvalid('edit entry must be an object');
+            throw new EvolvingAgentOutputInvalid('edit entry must be an object');
         }
         const relPath = e.relPath;
         const content = e.content;
         if (typeof relPath !== 'string' || typeof content !== 'string') {
-            throw new CanonicalProposerOutputInvalid('edit must have string relPath and string content');
+            throw new EvolvingAgentOutputInvalid('edit must have string relPath and string content');
         }
         const norm = relPath.replace(/\\/g, '/');
         if (frozen.has(norm)) {
-            throw new CanonicalProposerOutputInvalid(`edit relPath "${relPath}" is a gate-defining/frozen file and may never be proposed`);
+            throw new EvolvingAgentOutputInvalid(`edit relPath "${relPath}" is a gate-defining/frozen file and may never be proposed`);
         }
         if (!allowed.has(norm)) {
-            throw new CanonicalProposerOutputInvalid(`edit relPath "${relPath}" is outside the target's declared files`);
+            throw new EvolvingAgentOutputInvalid(`edit relPath "${relPath}" is outside the target's declared files`);
         }
         validated.push({ relPath: norm, content });
     }

package/dist/core/self-evolution/episode-orchestrator.d.ts CHANGED Viewed

@@ -28,17 +28,22 @@
  *                                            reject-buffer entry — BOTH durably on
  *                                            disk — THEN advance 'rolled-back'.
  *        - otherwise                        → advance 'kept'.
- *   g. 演进智能体 EVOLVING AGENT           — ONLY after (f) persisted: runEvolvingAgent
- *      (optimizer.step)                     reads the reject-buffer FRESH from disk
- *                                            (so THIS episode's just-written entry is
- *                                            in its prompt) and either not-spawned /
+ *   g. 演进智能体 EVOLVING AGENT           — ONLY after (f) persisted: require the
+ *      (optimizer.step)                     main-arm observed-GREEN evidence, then
+ *                                            advance the 'evolving' marker
+ *                                            (heartbeat) so a concurrent sibling
+ *                                            sees a live holder, then
+ *                                            runEvolvingAgent reads the
+ *                                            reject-buffer FRESH from disk (so THIS
+ *                                            episode's just-written entry is in its
+ *                                            prompt) and either not-spawned /
  *                                            refused / evolved.
  *   h. advance 'closed' + releaseInFlight  — ALWAYS, even on error.
  *
  * ORDERING GUARANTEE: the rollback + reject-buffer write are SEQUENTIAL awaits
- * that BOTH complete (and the stage reads 'rolled-back'/'kept') before
- * {@link runEvolvingAgent} is even called. (f) and (g) are never parallelized
- * and never share a Promise.all.
+ * that BOTH complete (and the stage reads 'rolled-back'/'kept') before the
+ * observed-GREEN preflight and {@link runEvolvingAgent}. (f) and (g) are never
+ * parallelized and never share a Promise.all.
  *
  * This module orchestrates; it never spawns an agent itself — the three agents
  * own their own {@link runHeadlessAgent} spawns (the `spawn` seam threads to all
@@ -239,14 +244,16 @@ export interface ResumeEpisodeResult {
  * done step rather than re-advancing a stage already entered:
  *
  *   - 'scored'                    → run the decision (f) then the 演进智能体 (g).
- *   - 'rolled-back' / 'kept'      → run the 演进智能体 EVOLVING AGENT (g) then close.
+ *   - 'rolled-back' / 'kept' / 'evolving' → run the 演进智能体 EVOLVING AGENT (g)
+ *                                   then close. ('evolving' means a crash AFTER the
+ *                                   marker but before the agent settled an outcome.)
  *   - 'evolved'/'evolution-refused'/'abstained' → close.
  *   - 'errored'                   → RE-DRIVE from the last GOOD pre-error stage
  *                                   (an episode may have errored on a TRANSIENT
  *                                   cause — a one-off git/analyzer/agent timeout).
  *                                   The pre-error stage is the last `stageHistory`
  *                                   entry that is NOT 'errored'; when it is one of
- *                                   {'scored','rolled-back','kept'} (the
+ *                                   {'scored','rolled-back','kept','evolving'} (the
  *                                   resume-entry stages) we advance errored → that
  *                                   stage and fall through to the normal dispatch.
  *                                   Otherwise the pre-error stage is not

package/dist/core/self-evolution/episode-orchestrator.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { promises as fs } from 'node:fs';
+import * as path from 'node:path';
 import { toActionSkeleton } from '../trajectory/skeleton.js';
 import { getTrajectoryForChange } from '../trajectory/registry.js';
 import { acquireInFlight, releaseInFlight, currentPolicyVersion, readPolicyLedger, initPolicyLineage, rollbackPolicyVersion, } from './policy/policy-store.js';
@@ -6,7 +8,7 @@ import { createEpisode, advanceEpisodeStage, writeArmCapture, readEpisode, episo
 import { shouldRunCriticAgent, runCriticAgent, } from './critic-agent.js';
 import { runRewardAgentEnsemble } from './reward-aggregator.js';
 import { detectTestTamper } from './tamper-check.js';
-import { runEvolvingAgent, DEFAULT_EVOLVING_AGENT_EDIT_BUDGET, MIN_EVOLVING_AGENT_EDIT_BUDGET, } from './evolving-agent.js';
+import { runEvolvingAgent, DEFAULT_EVOLVING_AGENT_EDIT_BUDGET, MIN_EVOLVING_AGENT_EDIT_BUDGET, isArmObjectiveGreen, } from './evolving-agent.js';
 import { reconcilePrediction, summarizeCalibration, } from './policy/prediction-reconcile.js';
 /**
  * Build the 主智能体 MAIN AGENT arm `{transcript?, skeleton?, objective}` from an
@@ -183,13 +185,65 @@ function deriveEpisodeId(changeName, now) {
 /**
  * Build the `terminalError` note for a thrown step. A timeout reads identically
  * to a hard crash on disk otherwise, so a message that names a host-agent timeout
- * (the spawn timeout puts `headless agent timed out after Nms` into stderr → the
- * error message) is PREFIXED with a `timeout:` marker. A timed-out episode is
- * then distinguishable from a genuine crash in episode.json. Pure.
+ * (absolute wall: `headless agent timed out after Nms`; idle wall: `idle timeout`)
+ * is PREFIXED with a `timeout:` marker. A timed-out episode is then
+ * distinguishable from a genuine crash in episode.json. Pure.
  */
 function terminalErrorLabel(err) {
     const msg = err instanceof Error ? err.message : String(err);
-    return /timed out/i.test(msg) ? `timeout: ${msg}` : msg;
+    return /\b(timed out|idle timeout)\b/i.test(msg) ? `timeout: ${msg}` : msg;
+}
+function observedGreenFailureReason(objective) {
+    if (!objective) {
+        return 'observed-GREEN gate: main-arm/objective.json is missing or unreadable - cannot confirm a verified green run';
+    }
+    const evidence = isArmObjectiveGreen(objective);
+    return evidence.ok ? null : `observed-GREEN gate failed: ${evidence.reason}`;
+}
+async function readMainArmObjectiveForEpisode(repoRoot, episodeId) {
+    const file = path.join(episodeDir(repoRoot, episodeId), 'main-arm', 'objective.json');
+    let raw;
+    try {
+        raw = await fs.readFile(file, 'utf8');
+    }
+    catch (err) {
+        if (err.code === 'ENOENT')
+            return null;
+        throw err;
+    }
+    try {
+        return JSON.parse(raw);
+    }
+    catch {
+        return null;
+    }
+}
+async function runEvolvingStepIfObservedGreen(opts) {
+    const objective = Object.prototype.hasOwnProperty.call(opts, 'objective')
+        ? (opts.objective ?? null)
+        : await readMainArmObjectiveForEpisode(opts.repoRoot, opts.episodeId);
+    const gateFailure = observedGreenFailureReason(objective);
+    if (gateFailure) {
+        return { kind: 'not-spawned', reason: gateFailure };
+    }
+    if (opts.markEvolving) {
+        await advanceEpisodeStage({
+            repoRoot: opts.repoRoot,
+            episodeId: opts.episodeId,
+            stage: 'evolving',
+            patch: { evolvingHeartbeatAt: new Date().toISOString() },
+        });
+    }
+    return await runEvolvingAgent({
+        repoRoot: opts.repoRoot,
+        episodeId: opts.episodeId,
+        targetId: opts.targetId,
+        editBudget: opts.editBudget,
+        ...(opts.calibrationNote ? { calibrationNote: opts.calibrationNote } : {}),
+        spawn: opts.spawn,
+        ...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
+        ...(opts.harness ? { harness: opts.harness } : {}),
+    });
 }
 /**
  * Run ONE episode through the loop in the strict, durably-persisted order
@@ -431,12 +485,13 @@ async function runEpisodeAfterCreate(opts) {
             // runEvolvingAgent reads the reject-buffer FRESH from disk (the entry just
             // written THIS episode is in its prompt). Never parallelized with (f).
             // 步长: after a rollback, shrink the edit budget (smaller step after a step
-            // that lost ground). 预测校准: pass the proposer's recent prediction record.
+            // that lost ground). 预测校准: pass the 演进智能体 EVOLVING AGENT's recent
+            // prediction record.
             const scheduledBudget = decision === 'rolled-back'
                 ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
                 : editBudget;
             const calibrationNote = await summarizeCalibration(repoRoot, targetId);
-            evolution = await runEvolvingAgent({
+            evolution = await runEvolvingStepIfObservedGreen({
                 repoRoot,
                 episodeId,
                 targetId,
@@ -445,6 +500,8 @@ async function runEpisodeAfterCreate(opts) {
                 spawn: opts.spawn,
                 ...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
                 ...(opts.harness ? { harness: opts.harness } : {}),
+                markEvolving: true,
+                objective: opts.mainArm.objective,
             });
         }
     }
@@ -477,16 +534,19 @@ async function runEpisodeAfterCreate(opts) {
  * Closable stages:
  *   - evolved | evolution-refused | abstained — the 演进智能体 EVOLVING AGENT
  *     reached a definite outcome (or the judge 弃权 abstained), the normal close.
- *   - kept | rolled-back — the 演进智能体 returned not-spawned (its diagnosis
+ *   - evolving — the 演进智能体 returned not-spawned (its diagnosis
  *     abstained-after-gap-check, no gaps, or the target resolved to no editable
- *     local files), so the episode never advanced past the decision. By the time
- *     this runs (AFTER runEvolvingAgent returned), a stage still at 'kept'/
- *     'rolled-back' can ONLY mean not-spawned — a success advances 'evolved', a
+ *     local files), so the episode never advanced past the 'evolving' marker.
+ *     By the time this runs (AFTER runEvolvingAgent returned), a stage still at
+ *     'evolving' can ONLY mean not-spawned — a success advances 'evolved', a
  *     refusal advances 'evolution-refused', and a throw is caught upstream and
  *     records 'errored' + rethrows so this close is never reached. So a leftover
- *     kept/rolled-back at close time IS the finished-nothing-to-evolve case and
- *     must close, not rest forever at a non-terminal stage (the exact ambiguity
- *     the 'errored' stage was meant to remove).
+ *     'evolving' at close time IS the finished-nothing-to-evolve case and must
+ *     close, not rest forever at a non-terminal stage (the exact ambiguity the
+ *     'errored' stage was meant to remove).
+ *   - kept | rolled-back — retained for back-compat: an OLD episode record (or a
+ *     code path that did not advance the 'evolving' marker) that returned
+ *     not-spawned never advances past the decision; close it the same way.
  *
  * Any other (genuinely non-closable) stage is left as-is rather than throwing, so
  * the close never masks the real episode outcome.
@@ -497,7 +557,10 @@ async function closeEpisodeBestEffort(repoRoot, episodeId) {
         'evolved',
         'evolution-refused',
         'abstained',
-        // not-spawned 演进智能体 leaves the episode here — close the finished episode.
+        // not-spawned 演进智能体 leaves the episode at the 'evolving' marker — close
+        // the finished episode. 'kept'/'rolled-back' retained for back-compat with
+        // an old record / a path that never advanced the marker.
+        'evolving',
         'kept',
         'rolled-back',
     ]);
@@ -586,14 +649,16 @@ async function ensureRejectBufferEntry(repoRoot, opts) {
  * done step rather than re-advancing a stage already entered:
  *
  *   - 'scored'                    → run the decision (f) then the 演进智能体 (g).
- *   - 'rolled-back' / 'kept'      → run the 演进智能体 EVOLVING AGENT (g) then close.
+ *   - 'rolled-back' / 'kept' / 'evolving' → run the 演进智能体 EVOLVING AGENT (g)
+ *                                   then close. ('evolving' means a crash AFTER the
+ *                                   marker but before the agent settled an outcome.)
  *   - 'evolved'/'evolution-refused'/'abstained' → close.
  *   - 'errored'                   → RE-DRIVE from the last GOOD pre-error stage
  *                                   (an episode may have errored on a TRANSIENT
  *                                   cause — a one-off git/analyzer/agent timeout).
  *                                   The pre-error stage is the last `stageHistory`
  *                                   entry that is NOT 'errored'; when it is one of
- *                                   {'scored','rolled-back','kept'} (the
+ *                                   {'scored','rolled-back','kept','evolving'} (the
  *                                   resume-entry stages) we advance errored → that
  *                                   stage and fall through to the normal dispatch.
  *                                   Otherwise the pre-error stage is not
@@ -617,7 +682,7 @@ export async function resumeEpisode(opts) {
     // for an 'errored' episode we attempt to RE-DRIVE from the last good pre-error
     // stage (a transient git/analyzer/agent failure should be retryable via an
     // operator resume). 'errored' stays terminal for every OTHER caller — only this
-    // resume path may re-drive it, via the errored → {scored,rolled-back,kept}
+    // resume path may re-drive it, via the errored → {scored,rolled-back,kept,evolving}
     // transitions the stage machine allows ONLY for operator recovery.
     let stage = ep.stage;
     if (ep.stage === 'errored') {
@@ -626,7 +691,8 @@ export async function resumeEpisode(opts) {
             .find((h) => h.stage !== 'errored')?.stage;
         if (preError === 'scored' ||
             preError === 'rolled-back' ||
-            preError === 'kept') {
+            preError === 'kept' ||
+            preError === 'evolving') {
             // Re-open the errored episode at its last auto-resumable stage, then fall
             // through to the normal dispatch for that stage.
             await advanceEpisodeStage({ repoRoot, episodeId, stage: preError });
@@ -636,7 +702,7 @@ export async function resumeEpisode(opts) {
         // 'baseline-skipped'); leave the episode at 'errored' and report it as-is.
     }
     // The decision (f) + 演进智能体 EVOLVING AGENT (g) re-runs below can THROW — a
-    // wedged/crashed host CLI (CanonicalProposerInvocationError), a timeout, or an
+    // wedged/crashed host CLI (EvolvingAgentInvocationError), a timeout, or an
     // observed-GREEN gate throw. UNCAUGHT, that leaves the episode DURABLY stuck at
     // a non-terminal stage ('scored'/'rolled-back'/'kept' — the orphan state fix ❷
     // eliminates for runEpisode). Record the SAME terminal 'errored' stage here
@@ -644,7 +710,31 @@ export async function resumeEpisode(opts) {
     // re-throw. Resume holds NO in-flight lock, so this is a durable-stage fix, not
     // a leak fix. Best-effort write: a failed record must not mask the original throw.
     try {
-        if (stage === 'scored') {
+        // TOCTOU guard: resume read the stage at entry (~L945), but it holds NO in-flight
+        // lock, so a CONCURRENT runEpisode for the same target can advance THIS episode to
+        // a TERMINAL stage between that read and the transitions below. Re-read the episode
+        // immediately before dispatching; if it is already finished, the transitions would
+        // throw an illegal-transition error (which the catch below would then mis-record as
+        // a fresh 'errored'). Short-circuit instead: report the already-finished episode
+        // via the normal completion return. (The errored→pre-error re-drive above already
+        // turned a re-drivable 'errored' into a non-terminal stage, so a stage that is
+        // STILL terminal here is genuinely finished, not auto-resumable.)
+        const TERMINAL_STAGES = new Set([
+            'closed',
+            'errored',
+            'evolution-refused',
+            'evolved',
+            'abstained',
+        ]);
+        const fresh = await readEpisode(repoRoot, episodeId);
+        stage = fresh.stage;
+        if (TERMINAL_STAGES.has(stage)) {
+            // 'evolved'/'evolution-refused'/'abstained' still want their best-effort close;
+            // 'closed'/'errored' are no-ops for closeEpisodeBestEffort. No transition is
+            // attempted, so the race cannot surface as an illegal-transition throw.
+            await closeEpisodeBestEffort(repoRoot, episodeId);
+        }
+        else if (stage === 'scored') {
             // Re-run the decision (f) from the on-disk diagnosis, then (g).
             const diagnosis = await readDiagnosisForResume(repoRoot, episodeId);
             if (shouldSkipEvolution(diagnosis)) {
@@ -719,7 +809,7 @@ export async function resumeEpisode(opts) {
                     ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
                     : editBudget;
                 const calibrationNote = await summarizeCalibration(repoRoot, targetId);
-                evolution = await runEvolvingAgent({
+                evolution = await runEvolvingStepIfObservedGreen({
                     repoRoot,
                     episodeId,
                     targetId,
@@ -728,11 +818,12 @@ export async function resumeEpisode(opts) {
                     spawn: opts.spawn,
                     ...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
                     ...(opts.harness ? { harness: opts.harness } : {}),
+                    markEvolving: true,
                 });
             }
             await closeEpisodeBestEffort(repoRoot, episodeId);
         }
-        else if (stage === 'rolled-back' || stage === 'kept') {
+        else if (stage === 'rolled-back' || stage === 'kept' || stage === 'evolving') {
             // The decision already ran (and the original episode settled the prediction);
             // re-settle idempotently for the crash window, then schedule + calibrate.
             try {
@@ -741,11 +832,16 @@ export async function resumeEpisode(opts) {
             catch {
                 // best-effort: advisory only
             }
-            const scheduledBudget = stage === 'rolled-back'
+            // Resuming from 'evolving' means the decision is in history (not the resume
+            // stage); read it from stageHistory so the 步长 schedule still shrinks after a
+            // rollback. Resuming from 'rolled-back'/'kept' uses the resume stage directly.
+            const wasRolledBack = stage === 'rolled-back' ||
+                (stage === 'evolving' && ep.stageHistory.some((h) => h.stage === 'rolled-back'));
+            const scheduledBudget = wasRolledBack
                 ? scheduledEditBudget(await readPolicyLedger(repoRoot, targetId), editBudget)
                 : editBudget;
             const calibrationNote = await summarizeCalibration(repoRoot, targetId);
-            evolution = await runEvolvingAgent({
+            evolution = await runEvolvingStepIfObservedGreen({
                 repoRoot,
                 episodeId,
                 targetId,
@@ -754,16 +850,13 @@ export async function resumeEpisode(opts) {
                 spawn: opts.spawn,
                 ...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
                 ...(opts.harness ? { harness: opts.harness } : {}),
+                markEvolving: stage !== 'evolving',
             });
             await closeEpisodeBestEffort(repoRoot, episodeId);
         }
-        else if (stage === 'evolved' ||
-            stage === 'evolution-refused' ||
-            stage === 'abstained') {
-            await closeEpisodeBestEffort(repoRoot, episodeId);
-        }
-        // earlier stages (and a non-auto-resumable 'errored'): not auto-resumable here
-        // — reported as-is.
+        // Terminal stages (incl. a non-auto-resumable 'errored') are handled by the
+        // TOCTOU guard above; earlier stages are not auto-resumable here — reported
+        // as-is via the completion return below.
     }
     catch (err) {
         // A thrown decision/evolving step records a DURABLE terminal 'errored' stage so
@@ -790,9 +883,7 @@ export async function resumeEpisode(opts) {
  * AGENT's reader uses).
  */
 async function readDiagnosisForResume(repoRoot, episodeId) {
-    const { promises: fs } = await import('node:fs');
-    const pathMod = await import('node:path');
-    const file = pathMod.join(episodeDir(repoRoot, episodeId), 'diagnosis.json');
+    const file = path.join(episodeDir(repoRoot, episodeId), 'diagnosis.json');
     let raw;
     try {
         raw = await fs.readFile(file, 'utf8');

package/dist/core/self-evolution/episode-store.d.ts CHANGED Viewed

@@ -57,10 +57,11 @@
  *     -> (baseline-arm-captured | baseline-skipped)   // CRITIC AGENT（基线智能体 baseline agent）arm
  *     -> scored                                       // 奖励智能体 REWARD AGENT wrote diagnosis.json
  *     -> (rolled-back | kept)                         // rollback decision on the main arm's edits
+ *     -> evolving                                     // 演进智能体 EVOLVING AGENT holds the in-flight lock
  *     -> (evolved | evolution-refused | abstained)    // 演进智能体 EVOLVING AGENT outcome
  *     -> closed                                       // terminal
  *
- *   (rolled-back | kept)
+ *   (rolled-back | kept | evolving)
  *     -> closed                                       // terminal — see below
  *
  *   (any non-terminal stage)
@@ -73,12 +74,23 @@
  * 弃权 abstains when no nameable gap → no rollback decision needed → the
  * 演进智能体 EVOLVING AGENT is never spawned.
  *
- * `rolled-back`/`kept` may also reach `closed` DIRECTLY: when the 演进智能体
- * EVOLVING AGENT was NOT spawned (its diagnosis abstained-after-gap-check, named
- * no gaps, or the target resolved to no editable local files) the episode never
- * advances past the decision, so the orchestrator's best-effort close terminates
- * the finished-nothing-to-evolve episode rather than leaving it resting forever
- * at a non-terminal stage.
+ * `evolving` is advanced by the orchestrator BEFORE it spawns the 演进智能体
+ * EVOLVING AGENT, while that agent holds the in-flight lock. It exists so a
+ * concurrent sibling reading the store distinguishes a LIVE-but-slow holder
+ * (stage `evolving`) from an episode that merely reached the decision (`kept`/
+ * `rolled-back`) — without it the stage stays `kept` for the whole evolving
+ * spawn, and a sibling can misread a running holder as stale. The
+ * `evolvingHeartbeatAt` field records when the stage was entered. Old episode
+ * records that predate this stage never carry it; they resume exactly as before
+ * (the `rolled-back`/`kept` → outcome transitions are retained for them).
+ *
+ * `rolled-back`/`kept`/`evolving` may also reach `closed` DIRECTLY: when the
+ * 演进智能体 EVOLVING AGENT was NOT spawned (its diagnosis abstained-after-gap-
+ * check, named no gaps, or the target resolved to no editable local files) the
+ * episode never advances past the decision (it stays `kept`/`rolled-back`, or —
+ * for the not-spawned-after-evolving-marker case — `evolving`), so the
+ * orchestrator's best-effort close terminates the finished-nothing-to-evolve
+ * episode rather than leaving it resting forever at a non-terminal stage.
  *
  * `errored` is a SECOND terminal stage reachable from EVERY non-terminal stage.
  * A thrown step — an agent spawn that crashes or times out (主智能体 MAIN AGENT /
@@ -92,10 +104,10 @@
  * `errored` is terminal for every target EXCEPT an operator-driven resume: a
  * transient cause (a one-off git/analyzer/agent timeout) is retryable, so an
  * `episode resume` may RE-DRIVE an errored episode back to its last good
- * pre-error stage — `errored -> {scored, rolled-back, kept}` (the resume-entry
- * stages). No other caller may leave `errored`.
+ * pre-error stage — `errored -> {scored, rolled-back, kept, evolving}` (the
+ * resume-entry stages). No other caller may leave `errored`.
  */
-export type EpisodeStage = 'created' | 'main-arm-captured' | 'baseline-arm-captured' | 'baseline-skipped' | 'scored' | 'rolled-back' | 'kept' | 'evolved' | 'evolution-refused' | 'abstained' | 'closed' | 'errored';
+export type EpisodeStage = 'created' | 'main-arm-captured' | 'baseline-arm-captured' | 'baseline-skipped' | 'scored' | 'rolled-back' | 'kept' | 'evolving' | 'evolved' | 'evolution-refused' | 'abstained' | 'closed' | 'errored';
 /**
  * Iterable list of every legal {@link EpisodeStage} value. Order follows the
  * documented state machine for readability, not behavior.
@@ -143,6 +155,14 @@ export interface EpisodeRecord {
     stageHistory: EpisodeStageHistoryEntry[];
     /** Why the baseline arm was skipped (set with stage `baseline-skipped`). */
     baselineSkippedReason?: string;
+    /**
+     * ISO 8601 UTC timestamp the episode entered the `evolving` stage (the moment
+     * the 演进智能体 EVOLVING AGENT spawn began holding the in-flight lock). A
+     * heartbeat for liveness reads — a concurrent sibling can tell a recently-
+     * entered `evolving` holder apart from one that genuinely wedged. Absent on
+     * old records (and on every stage before `evolving`).
+     */
+    evolvingHeartbeatAt?: string;
     /** advantage ＝ reward(主臂) − reward(基线臂); null when the 奖励智能体 REWARD AGENT 弃权 abstained. */
     advantage?: number | null;
     /**
@@ -163,6 +183,8 @@ export interface EpisodeStagePatch {
     advantage?: number | null;
     /** Cause note merged alongside the terminal `errored` stage. */
     terminalError?: string;
+    /** Heartbeat timestamp merged alongside the `evolving` stage. */
+    evolvingHeartbeatAt?: string;
 }
 /**
  * True iff `(from -> to)` is a legal transition in the episode stage machine.
@@ -234,7 +256,8 @@ export interface AdvanceEpisodeStageOptions {
  *   advancing to a stage not reachable from the current one throws.
  * - Appends `{stage, at}` to `stageHistory`.
  * - Merges the allowlisted `patch` fields (`policyVersionBaseline`,
- *   `baselineSkippedReason`, `advantage`, `terminalError`) in the same write.
+ *   `baselineSkippedReason`, `advantage`, `terminalError`, `evolvingHeartbeatAt`)
+ *   in the same write.
  * - Bumps `updatedAt`.
  */
 export declare function advanceEpisodeStage(opts: AdvanceEpisodeStageOptions): Promise<EpisodeRecord>;

package/dist/core/self-evolution/episode-store.js CHANGED Viewed

@@ -61,6 +61,7 @@ export const EPISODE_STAGES = [
     'scored',
     'rolled-back',
     'kept',
+    'evolving',
     'evolved',
     'evolution-refused',
     'abstained',
@@ -77,8 +78,9 @@ const EPISODE_ID_PATTERN = /^[a-z0-9][a-z0-9-]*$/;
 // step: agent spawn crash/timeout or un-repairable gate), so a failed episode
 // is never orphaned mid-flight. `closed` and `errored` are the two terminals;
 // `errored` is terminal EXCEPT for an operator resume re-drive back to its last
-// good pre-error stage (scored/rolled-back/kept). `rolled-back`/`kept` may also
-// close directly (the not-spawned 演进智能体 finished-nothing-to-evolve case).
+// good pre-error stage (scored/rolled-back/kept/evolving). `rolled-back`/`kept`/
+// `evolving` may also close directly (the not-spawned 演进智能体
+// finished-nothing-to-evolve case).
 const LEGAL_STAGE_TRANSITIONS = new Map([
     ['created', new Set(['main-arm-captured', 'errored'])],
     [
@@ -91,15 +93,38 @@ const LEGAL_STAGE_TRANSITIONS = new Map([
     // abstained, so no rollback decision is needed and the 演进智能体
     // EVOLVING AGENT is never spawned.
     ['scored', new Set(['rolled-back', 'kept', 'abstained', 'errored'])],
-    // 'rolled-back'/'kept' may also reach 'closed' DIRECTLY when the 演进智能体
-    // EVOLVING AGENT was not-spawned (the finished-nothing-to-evolve case), so the
-    // episode never rests forever at a non-terminal stage.
+    // 'rolled-back'/'kept' advance to 'evolving' BEFORE the 演进智能体 EVOLVING
+    // AGENT spawn (so a sibling can tell a live holder from a stale lock). They
+    // also retain the DIRECT transitions to the evolving outcomes + 'closed' so
+    // (a) an OLD episode record resumed from 'rolled-back'/'kept' (no 'evolving'
+    // stage) behaves exactly as before, and (b) the not-spawned
+    // finished-nothing-to-evolve case can still close directly.
     [
         'rolled-back',
-        new Set(['evolved', 'evolution-refused', 'abstained', 'closed', 'errored']),
+        new Set([
+            'evolving',
+            'evolved',
+            'evolution-refused',
+            'abstained',
+            'closed',
+            'errored',
+        ]),
     ],
     [
         'kept',
+        new Set([
+            'evolving',
+            'evolved',
+            'evolution-refused',
+            'abstained',
+            'closed',
+            'errored',
+        ]),
+    ],
+    // The 演进智能体 EVOLVING AGENT outcome (or a not-spawned close), or 'errored'
+    // on a thrown spawn/gate.
+    [
+        'evolving',
         new Set(['evolved', 'evolution-refused', 'abstained', 'closed', 'errored']),
     ],
     ['evolved', new Set(['closed'])],
@@ -107,8 +132,9 @@ const LEGAL_STAGE_TRANSITIONS = new Map([
     ['abstained', new Set(['closed'])],
     ['closed', new Set()],
     // 'errored' is terminal EXCEPT for an operator resume re-drive back to the
-    // last good pre-error stage (scored/rolled-back/kept); no other caller leaves it.
-    ['errored', new Set(['scored', 'rolled-back', 'kept'])],
+    // last good pre-error stage (scored/rolled-back/kept/evolving); no other
+    // caller leaves it.
+    ['errored', new Set(['scored', 'rolled-back', 'kept', 'evolving'])],
 ]);
 /**
  * True iff `(from -> to)` is a legal transition in the episode stage machine.
@@ -387,6 +413,7 @@ const ALLOWED_PATCH_KEYS = new Set([
     'baselineSkippedReason',
     'advantage',
     'terminalError',
+    'evolvingHeartbeatAt',
 ]);
 /** Validate an {@link EpisodeStagePatch} fail-closed; returns the merge slice. */
 function validateStagePatch(patch, episodeId) {
@@ -394,7 +421,7 @@ function validateStagePatch(patch, episodeId) {
     for (const key of Object.keys(patch)) {
         if (!ALLOWED_PATCH_KEYS.has(key)) {
             throw new Error(`Illegal episode patch field for ${episodeId}: "${key}" ` +
-                `(allowed: policyVersionBaseline, baselineSkippedReason, advantage, terminalError)`);
+                `(allowed: policyVersionBaseline, baselineSkippedReason, advantage, terminalError, evolvingHeartbeatAt)`);
         }
     }
     if ('policyVersionBaseline' in patch) {
@@ -425,6 +452,13 @@ function validateStagePatch(patch, episodeId) {
         }
         merge.terminalError = v;
     }
+    if ('evolvingHeartbeatAt' in patch) {
+        const v = patch.evolvingHeartbeatAt;
+        if (typeof v !== 'string' || v.length === 0) {
+            throw new Error(`Invalid patch for ${episodeId}: evolvingHeartbeatAt must be a non-empty string`);
+        }
+        merge.evolvingHeartbeatAt = v;
+    }
     return merge;
 }
 /**
@@ -436,7 +470,8 @@ function validateStagePatch(patch, episodeId) {
  *   advancing to a stage not reachable from the current one throws.
  * - Appends `{stage, at}` to `stageHistory`.
  * - Merges the allowlisted `patch` fields (`policyVersionBaseline`,
- *   `baselineSkippedReason`, `advantage`, `terminalError`) in the same write.
+ *   `baselineSkippedReason`, `advantage`, `terminalError`, `evolvingHeartbeatAt`)
+ *   in the same write.
  * - Bumps `updatedAt`.
  */
 export async function advanceEpisodeStage(opts) {