npm - synergyspec-selfevolving - Versions diffs - 2.1.1 → 2.1.2 - Mend

synergyspec-selfevolving 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/dist/commands/learn.js +29 -3
package/dist/commands/self-evolution-episode.js +37 -1
package/dist/core/fitness/health/local-source.d.ts +11 -0
package/dist/core/fitness/health/local-source.js +53 -1
package/dist/core/project-config.d.ts +5 -0
package/dist/core/project-config.js +23 -1
package/dist/core/self-evolution/critic-agent.d.ts +16 -1
package/dist/core/self-evolution/critic-agent.js +87 -17
package/dist/core/self-evolution/episode-orchestrator.d.ts +28 -0
package/dist/core/self-evolution/episode-orchestrator.js +349 -216
package/dist/core/self-evolution/episode-store.d.ts +41 -2
package/dist/core/self-evolution/episode-store.js +33 -9
package/dist/core/self-evolution/evolving-agent.d.ts +51 -2
package/dist/core/self-evolution/evolving-agent.js +45 -4
package/dist/core/self-evolution/host-harness.d.ts +43 -0
package/dist/core/self-evolution/host-harness.js +192 -0
package/dist/core/self-evolution/reward-agent.d.ts +68 -0
package/dist/core/self-evolution/reward-agent.js +76 -21
package/dist/core/self-evolution/reward-aggregator.d.ts +26 -7
package/dist/core/self-evolution/reward-aggregator.js +78 -20
package/dist/core/self-evolution/verdict.d.ts +3 -2
package/dist/core/self-evolution/verdict.js +4 -1
package/dist/dashboard/react-client.js +2 -1
package/package.json +1 -1

package/dist/commands/learn.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { findTranscriptsForChange, resolveChangeDir, validateExplicitTrajectoryH
 import { getTrajectoryForChange } from '../core/trajectory/registry.js';
 import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/facts.js';
 import { toActionSkeleton } from '../core/trajectory/skeleton.js';
-import { resolveHostHarness } from '../core/self-evolution/host-harness.js';
+import { resolveHostHarness, resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
 import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
 import { captureMainArm, runEpisode, } from '../core/self-evolution/episode-orchestrator.js';
 import { buildLLMSummaryCandidates, ingestLearnHandoff, } from '../core/learn/llm-summary.js';
@@ -36,6 +36,21 @@ export function registerLearnCommand(program, deps = {}) {
         .action(async (change, options) => {
         try {
             const projectRoot = process.cwd();
+            // SEED the host harness for the env-less episode subagent. learn runs at
+            // HOST level, where the OPENCODE_*/CODEX_* env that distinguishes the
+            // host harness IS present; the downstream loop-v2 episode (and its
+            // reward/evolving agent spawns) can run in an env-less Task subagent that
+            // would otherwise default to the 'claude' binary. resolveHostHarnessForRepo
+            // self-persists the confidently-resolved harness to
+            // `.synergyspec-selfevolving/host-harness.json`, so the subagent reads it
+            // back instead of guessing. Best-effort: a persistence failure must never
+            // fail the learn run (a missing seed only degrades to today's behavior).
+            try {
+                await resolveHostHarnessForRepo(projectRoot);
+            }
+            catch {
+                // best-effort seed only.
+            }
             // USER-TYPED handle flags are validated up front and fail LOUD
             // (exit 1) on a miss — unlike the env-var channel, which keeps the
             // fail-closed refusal semantics inside discovery (empty result, the
@@ -180,18 +195,29 @@ export function registerLearnCommand(program, deps = {}) {
                     report,
                 });
                 // Thread the loop-v2 reward judge-quality config (samples / noiseFloor /
-                // orderSwap / tamperCheck). Omitted ⇒ the orchestrator's single-sample,
-                // flag-only default (no extra spawns).
+                // orderSwap / tamperCheck / divergenceCheck). Omitted ⇒ the orchestrator's
+                // single-sample, divergence-routing default (no extra spawns).
                 const episodeConfig = readProjectConfig(projectRoot);
+                // Pass the host-resolved harness EXPLICITLY into the in-process episode
+                // (learn runs host-level where the harness is confidently resolvable),
+                // so the orchestrator's reward/evolving agent spawns never fall back to
+                // the default 'claude' binary on a non-claude host.
+                const harness = await resolveHostHarnessForRepo(projectRoot);
                 episodeOutcome = await runEpisodeImpl({
                     repoRoot: projectRoot,
                     targetId: concreteEvolveTarget.targetId,
                     changeName: report.changeName,
                     changeDirPath: report.changeDir,
                     mainArm,
+                    harness,
                     ...(episodeConfig?.selfEvolution?.reward
                         ? { reward: episodeConfig.selfEvolution.reward }
                         : {}),
+                    // Per-agent headless-spawn ceiling (ms). Omitted ⇒ the orchestrator's
+                    // built-in DEFAULT_AGENT_TIMEOUT_MS default applies.
+                    ...(episodeConfig?.selfEvolution?.agentTimeoutMs !== undefined
+                        ? { agentTimeoutMs: episodeConfig.selfEvolution.agentTimeoutMs }
+                        : {}),
                 });
             }
             if (options.json) {

package/dist/commands/self-evolution-episode.js CHANGED Viewed

@@ -8,6 +8,7 @@ readPolicyLedger, readRejectBuffer, currentPolicyVersion, rollbackPolicyVersion,
 lookupCanonicalTarget, listCanonicalTargets, DESIGN_ARTIFACT_TARGET_ID, } from '../core/self-evolution/index.js';
 import { generateLearnReport } from '../core/learn.js';
 import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
+import { resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
 import { validateChangeExists } from './workflow/shared.js';
 import { readProjectConfig } from '../core/project-config.js';
 /**
@@ -150,12 +151,21 @@ export async function runEpisodeCommand(args, opts) {
     let outcome;
     try {
         const episodeConfig = readProjectConfig(opts.repoRoot);
+        // Resolve the HOST harness once here (where the host's OPENCODE_*/CODEX_* env
+        // is still present) and thread it EXPLICITLY into the episode. resolveHost-
+        // HarnessForRepo self-persists the env-resolved choice to
+        // `.synergyspec-selfevolving/host-harness.json`, so even when the
+        // orchestrator's reward/evolving agents later spawn from an env-less Task
+        // subagent they read the seeded harness instead of defaulting to the
+        // 'claude' binary (the ydata proposer-spawn failure).
+        const harness = await resolveHostHarnessForRepo(opts.repoRoot);
         const episodeOptions = {
             repoRoot: opts.repoRoot,
             targetId,
             changeName,
             changeDirPath,
             mainArm,
+            harness,
             ...(args.noBaseline ? { skipBaseline: true } : {}),
             ...(episodeConfig?.selfEvolution?.reward
                 ? { reward: episodeConfig.selfEvolution.reward }
@@ -163,6 +173,12 @@ export async function runEpisodeCommand(args, opts) {
             ...(episodeConfig?.selfEvolution?.critic
                 ? { critic: episodeConfig.selfEvolution.critic }
                 : {}),
+            // Per-agent headless-spawn ceiling (ms). Omitted ⇒ the orchestrator's
+            // built-in DEFAULT_AGENT_TIMEOUT_MS applies; configured to let a repo whose
+            // critic re-do baseline legitimately runs long raise the per-agent ceiling.
+            ...(episodeConfig?.selfEvolution?.agentTimeoutMs !== undefined
+                ? { agentTimeoutMs: episodeConfig.selfEvolution.agentTimeoutMs }
+                : {}),
         };
         outcome = await runEpisode(episodeOptions);
     }
@@ -234,7 +250,27 @@ export async function runResumeEpisodeCommand(args, opts) {
     const resumeEpisode = opts.resumeEpisode ?? resumeEpisodeImpl;
     let result;
     try {
-        result = await resumeEpisode({ repoRoot: opts.repoRoot, episodeId: args.episodeId });
+        // Resolve the HOST harness HERE (where the host's OPENCODE_*/CODEX_* env is
+        // still present) and thread it EXPLICITLY into the resumed episode. Resume is
+        // the operator re-entry MOST likely to run env-less (a recovery from another
+        // shell), so without this the resumed 演进智能体 EVOLVING AGENT re-spawns
+        // against the absent default 'claude' binary on an opencode/codex host — the
+        // ses_1330/1331 ENAMETOOLONG/spawn failure the harness sidecar exists to
+        // prevent. resolveHostHarnessForRepo self-persists the resolved choice, so
+        // an env-less Task subagent reads the seeded harness instead of defaulting.
+        const harness = await resolveHostHarnessForRepo(opts.repoRoot);
+        // Thread the configured per-agent headless-spawn ceiling (ms) into the
+        // resumed 演进智能体 EVOLVING AGENT. Omitted ⇒ the built-in
+        // DEFAULT_AGENT_TIMEOUT_MS default applies.
+        const resumeConfig = readProjectConfig(opts.repoRoot);
+        result = await resumeEpisode({
+            repoRoot: opts.repoRoot,
+            episodeId: args.episodeId,
+            harness,
+            ...(resumeConfig?.selfEvolution?.agentTimeoutMs !== undefined
+                ? { agentTimeoutMs: resumeConfig.selfEvolution.agentTimeoutMs }
+                : {}),
+        });
     }
     catch (err) {
         const message = err instanceof Error ? err.message : String(err);

package/dist/core/fitness/health/local-source.d.ts CHANGED Viewed

@@ -36,6 +36,9 @@ export interface LocalPythonMetricSourceOptions {
     /** Path to a slop-rules YAML for the ast-grep engine. When omitted, resolved
      *  to the `slop_rules.yaml` vendored next to the analyzer script. */
     rulesPath?: string;
+    /** Wall-clock ceiling (ms) for one analyzer spawn before it is killed and the
+     *  reading degraded to `null`. Defaults to {@link DEFAULT_ANALYZER_TIMEOUT_MS}. */
+    timeoutMs?: number;
 }
 /**
  * Locate the ast-grep binary the analyzer's Python slop-rule engine should
@@ -59,6 +62,7 @@ export declare class LocalPythonMetricSource implements MetricSource {
     private readonly scriptPath;
     private readonly astGrepBin;
     private readonly rulesPath;
+    private readonly timeoutMs;
     constructor(options?: LocalPythonMetricSourceOptions);
     /**
      * Run the analyzer over `codeDir` and return its metrics, or `null` on any
@@ -84,6 +88,13 @@ export declare class LocalPythonMetricSource implements MetricSource {
      * rules file exists (the analyzer's own PATH fallback still uses it even with
      * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
      * run, or `null` if the process cannot be spawned or exits non-zero.
+     *
+     * A {@link timeoutMs} wall-clock ceiling guards against a hung analyzer (an
+     * import deadlock, an ast-grep stall on a pathological file, an AV/junction
+     * traversal stall): on expiry the child is killed (SIGTERM, escalating to
+     * SIGKILL after {@link KILL_GRACE_MS}) and the reading degrades to `null` —
+     * the same "no signal" contract every other failure path already honours, so
+     * the awaiting episode never hangs with the in-flight lock held.
      */
     private runAnalyzer;
 }

package/dist/core/fitness/health/local-source.js CHANGED Viewed

@@ -25,6 +25,19 @@ import { createRequire } from 'node:module';
 import path from 'node:path';
 /** The exact set of numeric keys the analyzer emits. Order is irrelevant. */
 const HEALTH_KEYS = ['structural_erosion', 'verbosity'];
+/**
+ * Wall-clock ceiling for one analyzer spawn. The Python/ast-grep analyzer is
+ * normally sub-second; a run that exceeds this is treated as hung (e.g. a Python
+ * import deadlock, ast-grep stalling on a pathological file, or an AV/junction
+ * traversal stall on Windows) and degraded to the "no signal ⇒ null" contract.
+ */
+const DEFAULT_ANALYZER_TIMEOUT_MS = 120_000;
+/**
+ * Grace window between the polite SIGTERM and the forced SIGKILL when a timed-out
+ * analyzer has not exited yet. Short on purpose: the goal is to stop holding the
+ * in-flight episode lock, not to let a wedged child linger.
+ */
+const KILL_GRACE_MS = 2_000;
 /**
  * Locate `scripts/code-health.py` relative to this module. Built output lives
  * at `dist/core/fitness/health/local-source.js`; the script stays at the
@@ -218,12 +231,14 @@ export class LocalPythonMetricSource {
     scriptPath;
     astGrepBin;
     rulesPath;
+    timeoutMs;
     constructor(options = {}) {
         this.pythonBin = options.pythonBin ?? defaultPythonBin();
         this.spawnImpl = options.spawnImpl ?? nodeSpawn;
         this.scriptPath = options.scriptPath ?? defaultScriptPath();
         this.astGrepBin = options.astGrepBin ?? defaultAstGrepBin();
         this.rulesPath = options.rulesPath ?? defaultRulesPath(this.scriptPath);
+        this.timeoutMs = options.timeoutMs ?? DEFAULT_ANALYZER_TIMEOUT_MS;
     }
     /**
      * Run the analyzer over `codeDir` and return its metrics, or `null` on any
@@ -269,14 +284,29 @@ export class LocalPythonMetricSource {
      * rules file exists (the analyzer's own PATH fallback still uses it even with
      * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
      * run, or `null` if the process cannot be spawned or exits non-zero.
+     *
+     * A {@link timeoutMs} wall-clock ceiling guards against a hung analyzer (an
+     * import deadlock, an ast-grep stall on a pathological file, an AV/junction
+     * traversal stall): on expiry the child is killed (SIGTERM, escalating to
+     * SIGKILL after {@link KILL_GRACE_MS}) and the reading degrades to `null` —
+     * the same "no signal" contract every other failure path already honours, so
+     * the awaiting episode never hangs with the in-flight lock held.
      */
     runAnalyzer(codeDir) {
         return new Promise((resolve) => {
             let settled = false;
+            let timeoutTimer;
+            let killTimer;
             const done = (value) => {
                 if (settled)
                     return;
                 settled = true;
+                // Stop waiting for the (now irrelevant) timeout. The SIGKILL-escalation
+                // timer is intentionally NOT cleared here: it must outlive the resolve
+                // so a child that ignored SIGTERM is still force-reaped; it self-clears
+                // when the child finally closes/errors below.
+                if (timeoutTimer !== undefined)
+                    clearTimeout(timeoutTimer);
                 resolve(value);
             };
             const args = [this.scriptPath, codeDir];
@@ -293,18 +323,40 @@ export class LocalPythonMetricSource {
                 done(null);
                 return;
             }
+            // Once the child truly exits (normally OR after a kill), no escalation is
+            // needed; drop the SIGKILL-escalation timer so the event loop can drain.
+            const dropKillTimer = () => {
+                if (killTimer !== undefined) {
+                    clearTimeout(killTimer);
+                    killTimer = undefined;
+                }
+            };
             const out = [];
             child.stdout?.on('data', (chunk) => out.push(Buffer.from(chunk)));
             // stderr is intentionally ignored: the analyzer prints only JSON to
             // stdout and we treat any failure uniformly as "no signal".
-            child.on('error', () => done(null));
+            child.on('error', () => {
+                dropKillTimer();
+                done(null);
+            });
             child.on('close', (code) => {
+                dropKillTimer();
                 if (code !== 0) {
                     done(null);
                     return;
                 }
                 done(Buffer.concat(out).toString('utf8'));
             });
+            // Hung-analyzer guard: kill the child and degrade to null on expiry. The
+            // child's own 'close'/'error' (fired by the kill) is ignored once settled.
+            timeoutTimer = setTimeout(() => {
+                child.kill?.('SIGTERM');
+                // Escalate to SIGKILL if SIGTERM did not land in the grace window.
+                killTimer = setTimeout(() => child.kill?.('SIGKILL'), KILL_GRACE_MS);
+                killTimer.unref?.();
+                done(null);
+            }, this.timeoutMs);
+            timeoutTimer.unref?.();
         });
     }
 }

package/dist/core/project-config.d.ts CHANGED Viewed

@@ -27,6 +27,7 @@ export declare const ProjectConfigSchema: z.ZodObject<{
         focus: z.ZodOptional<z.ZodBoolean>;
         advantageRollbackThreshold: z.ZodOptional<z.ZodNumber>;
         editBudget: z.ZodOptional<z.ZodNumber>;
+        agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
         reward: z.ZodOptional<z.ZodObject<{
             samples: z.ZodOptional<z.ZodNumber>;
             noiseFloor: z.ZodOptional<z.ZodNumber>;
@@ -37,6 +38,10 @@ export declare const ProjectConfigSchema: z.ZodObject<{
                 flag: "flag";
                 block: "block";
             }>>;
+            divergenceCheck: z.ZodOptional<z.ZodEnum<{
+                flag: "flag";
+                route: "route";
+            }>>;
         }, z.core.$strip>>;
         critic: z.ZodOptional<z.ZodObject<{
             baselineMode: z.ZodOptional<z.ZodEnum<{

package/dist/core/project-config.js CHANGED Viewed

@@ -60,6 +60,13 @@ export const ProjectConfigSchema = z.object({
         // 演进智能体 EVOLVING AGENT's ONE bounded edit may total. Default 40.
         // Optional/omitted ⇒ the agent's DEFAULT_EVOLVING_AGENT_EDIT_BUDGET applies.
         editBudget: z.number().optional(),
+        // Loop v2: per-agent headless-spawn ceiling in MILLISECONDS, threaded into
+        // ALL THREE agents (CRITIC AGENT（基线智能体 baseline agent）, 奖励智能体
+        // REWARD AGENT, 演进智能体 EVOLVING AGENT). A wedged host CLI is killed after
+        // this so it cannot hang the episode and leak the in-flight lock. Raise it
+        // for a repo whose critic re-do baseline legitimately runs long. Must be a
+        // positive integer; omitted ⇒ the built-in DEFAULT_AGENT_TIMEOUT_MS default.
+        agentTimeoutMs: z.number().int().positive().optional(),
         // Loop v2 — 奖励智能体 REWARD AGENT judge-quality knobs. ALL optional; omitted
         // ⇒ the historical single-sample, flag-only behaviour (no extra LLM spawns).
         reward: z
@@ -77,6 +84,13 @@ export const ProjectConfigSchema = z.object({
             // ④ Test-tamper handling: 'off' (no check), 'flag' (annotate only,
             //   default), or 'block' (force insufficient-signal + reject-buffer).
             tamperCheck: z.enum(['off', 'flag', 'block']).optional(),
+            // ④ Judge⇄verifier divergence handling: 'flag' (record the number +
+            //   annotate a correctness contradiction, informational only) or 'route'
+            //   (default) — ALSO demote such a duel to insufficient-signal so the loop
+            //   abstains instead of evolving on it. Routing fires ONLY when the judge
+            //   confidently prefers the worse-pass-rate arm (the complement to
+            //   gate-not-blend), never on a legitimate health/verbosity override.
+            divergenceCheck: z.enum(['flag', 'route']).optional(),
         })
             .optional(),
         // Loop v2 — CRITIC AGENT（基线智能体 baseline agent）baseline construction.
@@ -282,6 +296,13 @@ export function readProjectConfig(projectRoot) {
                 else if (rawSE.editBudget !== undefined) {
                     console.warn(`Invalid 'selfEvolution.editBudget' in config (must be a number), ignoring`);
                 }
+                const agentTimeoutResult = z.number().int().positive().safeParse(rawSE.agentTimeoutMs);
+                if (agentTimeoutResult.success) {
+                    selfEvolution.agentTimeoutMs = agentTimeoutResult.data;
+                }
+                else if (rawSE.agentTimeoutMs !== undefined) {
+                    console.warn(`Invalid 'selfEvolution.agentTimeoutMs' in config (must be a positive integer of milliseconds), ignoring`);
+                }
                 // Loop v2 — 奖励智能体 REWARD AGENT knobs. Resilient: each sub-field is
                 // validated independently; a bad value is dropped with a warning (the
                 // judge/aggregator default applies). Omitted ⇒ undefined (single-sample,
@@ -297,7 +318,8 @@ export function readProjectConfig(projectRoot) {
                 }
                 else if (rawSE.reward !== undefined) {
                     console.warn(`Invalid 'selfEvolution.reward' in config (samples/noiseFloor numbers, ` +
-                        `orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block), ignoring`);
+                        `orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block, ` +
+                        `divergenceCheck flag|route), ignoring`);
                 }
                 // Loop v2 — CRITIC AGENT knobs. Resilient: a bad value is dropped with a
                 // warning (the critic default 're-do' then applies). Omitted ⇒ undefined

package/dist/core/self-evolution/critic-agent.d.ts CHANGED Viewed

@@ -40,6 +40,7 @@
  */
 import { spawn as nodeSpawn } from 'node:child_process';
 import type { ObservedTestFailure } from '../trajectory/facts.js';
+import { type AgentHarness } from './host-harness.js';
 /** Error thrown when the worktree could not be created (git AND copy fallback failed). */
 export declare class CriticWorktreeError extends Error {
     constructor(message: string);
@@ -160,12 +161,26 @@ export interface RunCriticAgentOptions {
     baselineMode?: CriticBaselineMode;
     /** Injectable spawn seam for tests; defaults to node's spawn. */
     spawn?: typeof nodeSpawn;
-    /** Hard timeout per agent run (ms). Default 600000 (10 min). */
+    /** Hard timeout per agent run (ms). Default {@link DEFAULT_AGENT_TIMEOUT_MS} (10 min). */
     timeoutMs?: number;
+    /**
+     * Which host harness to spawn (claude|codex|opencode). When omitted,
+     * {@link runHeadlessAgent} resolves it from the ambient env. Threaded so an
+     * env-less subagent run can pass the recovered harness explicitly.
+     */
+    harness?: AgentHarness;
     /** Override `os.homedir()` for tests (claude transcript discovery). */
     homeDir?: string;
     /** TEST seam: inject the worktree root instead of git/copy, skipping setup teardown of git. */
     now?: Date;
+    /**
+     * Hard ceiling (ms) for each git worktree subcommand (create/remove/prune).
+     * Default {@link GIT_TIMEOUT_MS} (60s). A git hang past this is SIGTERM→SIGKILLed
+     * and rejected, which the worktree create/teardown paths absorb gracefully.
+     * Exposed mainly as a TEST seam (small value ⇒ a never-closing git fake settles
+     * fast instead of wedging the suite).
+     */
+    gitTimeoutMs?: number;
 }
 export interface RunCriticAgentResult {
     /** Absolute path of the `baseline-arm/` dir the capture landed in. */

package/dist/core/self-evolution/critic-agent.js CHANGED Viewed

@@ -47,7 +47,7 @@ import { readProjectConfig } from '../project-config.js';
 import { claudeProjectsDir } from '../learn/trajectory-discovery.js';
 import { claudeSourceFactory } from '../trajectory/adapters/claude.js';
 import { toActionSkeleton } from '../trajectory/skeleton.js';
-import { runHeadlessAgent } from './host-harness.js';
+import { runHeadlessAgent, DEFAULT_AGENT_TIMEOUT_MS } from './host-harness.js';
 import { currentPolicyVersion, readPolicyLedger, readPolicySnapshotFiles, } from './policy/index.js';
 import { advanceEpisodeStage, writeArmCapture } from './episode-store.js';
 /** Error thrown when the worktree could not be created (git AND copy fallback failed). */
@@ -188,6 +188,20 @@ async function resetChangeArtifactsForRedo(changeDir) {
 const NODE_MODULES = 'node_modules';
 const CONFIG_DIR = '.synergyspec-selfevolving';
 const SCHEMAS_REL = path.join('synergyspec-selfevolving', 'schemas');
+/**
+ * Hard ceiling (ms) for a single git worktree subcommand. Local worktree
+ * create/remove/prune ops are fast (sub-second), so a generous 60s ceiling only
+ * trips on a genuine HANG — a credential/GPG prompt, an `index.lock` held by a
+ * concurrent git, a stalled network FS, or a wedged hook. Without it `runGit`
+ * settles ONLY on the child's 'close'/'error', so such a hang would wedge the
+ * critic inside the in-flight-lock window (the same orphan/leak class the agent
+ * spawn already guards; the agent-spawn timeout cannot help here — the stall is
+ * in worktree setup/teardown, OUTSIDE {@link runHeadlessAgent}). On timeout the
+ * child is SIGTERM→SIGKILLed and the promise REJECTS, which the callers absorb
+ * gracefully: {@link createIsolatedWorktree} falls back to the copy path, and
+ * {@link teardownWorktree}'s git calls are best-effort (`.catch(() => {})`).
+ */
+const GIT_TIMEOUT_MS = 60_000;
 /**
  * Run the CRITIC AGENT（基线智能体 baseline agent）'s full baseline arm and
  * persist its capture. ALWAYS tears the worktree down (产物即弃). On success it
@@ -198,7 +212,8 @@ const SCHEMAS_REL = path.join('synergyspec-selfevolving', 'schemas');
 export async function runCriticAgent(opts) {
     const repoRoot = path.resolve(opts.repoRoot);
     const spawnImpl = opts.spawn ?? nodeSpawn;
-    const timeoutMs = opts.timeoutMs ?? 600000;
+    const timeoutMs = opts.timeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
+    const gitTimeoutMs = opts.gitTimeoutMs ?? GIT_TIMEOUT_MS;
     const homeDir = opts.homeDir ?? os.homedir();
     const baselineMode = opts.baselineMode ?? 're-do';
     if (!Number.isInteger(opts.baselineVersion) || opts.baselineVersion < 0) {
@@ -212,7 +227,7 @@ export async function runCriticAgent(opts) {
     let worktreeMode = 'git-worktree';
     try {
         // 1) Isolated worktree OUTSIDE the repo (git worktree --detach, else copy).
-        worktreeMode = await createIsolatedWorktree(repoRoot, worktreePath, spawnImpl);
+        worktreeMode = await createIsolatedWorktree(repoRoot, worktreePath, spawnImpl, gitTimeoutMs);
         // 're-do' fidelity needs the detached-HEAD tree (pre-change code). The copy
         // fallback (non-git repo) brings the LIVE tree — including the change's
         // uncommitted implementation — so it cannot reach the pre-change state and
@@ -242,6 +257,7 @@ export async function runCriticAgent(opts) {
             cwd: worktreePath,
             spawn: spawnImpl,
             timeoutMs,
+            ...(opts.harness ? { harness: opts.harness } : {}),
         });
         // 5) Build + persist the baseline arm.
         const measuredAt = new Date().toISOString();
@@ -341,7 +357,7 @@ export async function runCriticAgent(opts) {
     }
     finally {
         // 6) 产物即弃: ALWAYS tear the worktree down — even when a step above threw.
-        await teardownWorktree(repoRoot, worktreePath, worktreeMode, spawnImpl);
+        await teardownWorktree(repoRoot, worktreePath, worktreeMode, spawnImpl, gitTimeoutMs);
     }
 }
 // ---------------------------------------------------------------------------
@@ -353,12 +369,12 @@ export async function runCriticAgent(opts) {
  * (not a repo, git missing, etc.) falls back to a recursive file copy of the
  * repo excluding `node_modules` and `.git`. Returns which mode succeeded.
  */
-async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl) {
+async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl, gitTimeoutMs = GIT_TIMEOUT_MS) {
     // Best-effort: a stale worktree dir from an interrupted run would make both
     // git-add and copy fail; clear it first (产物即弃 — nothing here is durable).
     await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
     try {
-        await runGit(repoRoot, ['worktree', 'add', '--detach', worktreePath, 'HEAD'], spawnImpl);
+        await runGit(repoRoot, ['worktree', 'add', '--detach', worktreePath, 'HEAD'], spawnImpl, gitTimeoutMs);
         return 'git-worktree';
     }
     catch {
@@ -378,27 +394,81 @@ async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl) {
  * For the copy fallback: recursive rmdir. Never throws — teardown failures must
  * not mask a real error from the run.
  */
-async function teardownWorktree(repoRoot, worktreePath, mode, spawnImpl) {
+async function teardownWorktree(repoRoot, worktreePath, mode, spawnImpl, gitTimeoutMs = GIT_TIMEOUT_MS) {
     if (mode === 'git-worktree') {
-        await runGit(repoRoot, ['worktree', 'remove', '--force', worktreePath], spawnImpl).catch(() => { });
-        await runGit(repoRoot, ['worktree', 'prune'], spawnImpl).catch(() => { });
+        await runGit(repoRoot, ['worktree', 'remove', '--force', worktreePath], spawnImpl, gitTimeoutMs).catch(() => { });
+        await runGit(repoRoot, ['worktree', 'prune'], spawnImpl, gitTimeoutMs).catch(() => { });
     }
     // The node_modules entry is a junction/symlink; `rm -rf` removes the link, not
     // the real tree behind it. Belt-and-suspenders rmdir for both modes.
     await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
 }
-/** Run a git subcommand in `repoRoot`; rejects on a non-zero exit or spawn error. */
-async function runGit(repoRoot, args, spawnImpl) {
+/**
+ * Run a git subcommand in `repoRoot`; rejects on a non-zero exit, a spawn error,
+ * OR a hang past `timeoutMs` (SIGTERM, then SIGKILL ~2s later — mirrors
+ * {@link runHeadlessAgent}'s escalation). Spawned with a NON-INTERACTIVE env so a
+ * credential/GPG prompt fails fast instead of blocking forever:
+ *   - `GIT_TERMINAL_PROMPT=0` / `GIT_ASKPASS=''` / `GCM_INTERACTIVE='never'` —
+ *     no auth prompt is ever opened (it errors out instead), and
+ *   - `GIT_OPTIONAL_LOCKS=0` — git skips the optional index-lock acquisition that
+ *     a concurrent git could otherwise block on.
+ * Both guards keep `runGit`'s existing resolve/reject contract: a hang becomes a
+ * rejection the callers already absorb (copy fallback / best-effort teardown),
+ * so the critic degrades gracefully rather than wedging.
+ */
+async function runGit(repoRoot, args, spawnImpl, timeoutMs = GIT_TIMEOUT_MS) {
     await new Promise((resolve, reject) => {
-        const child = spawnImpl('git', args, { cwd: repoRoot, shell: false });
+        const child = spawnImpl('git', args, {
+            cwd: repoRoot,
+            shell: false,
+            env: {
+                ...process.env,
+                GIT_TERMINAL_PROMPT: '0',
+                GIT_OPTIONAL_LOCKS: '0',
+                GIT_ASKPASS: '',
+                GCM_INTERACTIVE: 'never',
+            },
+        });
         const err = [];
+        let settled = false;
+        let timer;
+        const finish = (fn) => {
+            if (settled)
+                return;
+            settled = true;
+            if (timer)
+                clearTimeout(timer);
+            fn();
+        };
+        timer = setTimeout(() => {
+            try {
+                child.kill(); // SIGTERM
+                // Escalate to SIGKILL shortly after in case git ignores SIGTERM, so a
+                // wedged child cannot orphan. unref so this timer never keeps the event
+                // loop alive on its own.
+                setTimeout(() => {
+                    try {
+                        child.kill('SIGKILL');
+                    }
+                    catch {
+                        // ignore
+                    }
+                }, 2000).unref?.();
+            }
+            catch {
+                // ignore
+            }
+            finish(() => reject(new Error(`git ${args[0]} timed out after ${timeoutMs}ms: ${Buffer.concat(err).toString('utf8')}`)));
+        }, timeoutMs);
         child.stderr?.on('data', (c) => err.push(Buffer.from(c)));
-        child.on('error', (e) => reject(e));
+        child.on('error', (e) => finish(() => reject(e)));
         child.on('close', (code) => {
-            if (code === 0)
-                resolve();
-            else
-                reject(new Error(`git ${args[0]} exited ${code}: ${Buffer.concat(err).toString('utf8')}`));
+            finish(() => {
+                if (code === 0)
+                    resolve();
+                else
+                    reject(new Error(`git ${args[0]} exited ${code}: ${Buffer.concat(err).toString('utf8')}`));
+            });
         });
     });
 }

package/dist/core/self-evolution/episode-orchestrator.d.ts CHANGED Viewed

@@ -52,6 +52,7 @@ import { type EpisodeStage } from './episode-store.js';
 import { type ArmObjective, type CriticBaselineMode } from './critic-agent.js';
 import { type RewardConfig } from './reward-aggregator.js';
 import { type RunEvolvingAgentResult } from './evolving-agent.js';
+import type { AgentHarness } from './host-harness.js';
 /** The 主智能体 MAIN AGENT (policy vN+1) capture the orchestrator records. */
 export interface MainArmCapture {
     /** Raw session transcript text, when provided; persisted as `transcript.jsonl`. */
@@ -162,6 +163,19 @@ export interface RunEpisodeOptions {
     };
     /** Injectable spawn seam — threaded to ALL THREE agents. Defaults to node's spawn. */
     spawn?: typeof nodeSpawn;
+    /**
+     * Hard timeout per agent run (ms), threaded into ALL THREE agents (critic,
+     * reward, evolving). Omitted ⇒ each agent defaults internally
+     * (DEFAULT_AGENT_TIMEOUT_MS, 10 min), so a wedged host CLI cannot hang the loop
+     * forever and leak the in-flight lock (ses_1330/1331).
+     */
+    agentTimeoutMs?: number;
+    /**
+     * Host harness override, threaded into ALL THREE agents so a subagent with an
+     * env-less ambient (resolveHostHarness ⇒ wrong default binary) still spawns the
+     * right CLI (ses_1331). Omitted ⇒ each agent resolves the harness itself.
+     */
+    harness?: AgentHarness;
     /** Injectable clock for the lock + episode id; defaults to `new Date()`. */
     now?: Date;
     /**
@@ -205,6 +219,10 @@ export interface ResumeEpisodeOptions {
     advantageRollbackThreshold?: number;
     /** Edit budget L (default 40). */
     editBudget?: number;
+    /** Hard timeout per agent run (ms); threaded into the resumed evolving agent. */
+    agentTimeoutMs?: number;
+    /** Host harness override; threaded into the resumed evolving agent. */
+    harness?: AgentHarness;
 }
 export interface ResumeEpisodeResult {
     episodeId: string;
@@ -223,6 +241,16 @@ export interface ResumeEpisodeResult {
  *   - 'scored'                    → run the decision (f) then the 演进智能体 (g).
  *   - 'rolled-back' / 'kept'      → run the 演进智能体 EVOLVING AGENT (g) then close.
  *   - 'evolved'/'evolution-refused'/'abstained' → close.
+ *   - 'errored'                   → RE-DRIVE from the last GOOD pre-error stage
+ *                                   (an episode may have errored on a TRANSIENT
+ *                                   cause — a one-off git/analyzer/agent timeout).
+ *                                   The pre-error stage is the last `stageHistory`
+ *                                   entry that is NOT 'errored'; when it is one of
+ *                                   {'scored','rolled-back','kept'} (the
+ *                                   resume-entry stages) we advance errored → that
+ *                                   stage and fall through to the normal dispatch.
+ *                                   Otherwise the pre-error stage is not
+ *                                   auto-resumable and the episode is reported as-is.
  *   - earlier stages              → not auto-resumable here (the arms / reward
  *                                   agent need their own re-entry); reported as-is.
  *