synergyspec-selfevolving 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import { findTranscriptsForChange, resolveChangeDir, validateExplicitTrajectoryH
7
7
  import { getTrajectoryForChange } from '../core/trajectory/registry.js';
8
8
  import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/facts.js';
9
9
  import { toActionSkeleton } from '../core/trajectory/skeleton.js';
10
- import { resolveHostHarness } from '../core/self-evolution/host-harness.js';
10
+ import { resolveHostHarness, resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
11
11
  import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
12
12
  import { captureMainArm, runEpisode, } from '../core/self-evolution/episode-orchestrator.js';
13
13
  import { buildLLMSummaryCandidates, ingestLearnHandoff, } from '../core/learn/llm-summary.js';
@@ -36,6 +36,21 @@ export function registerLearnCommand(program, deps = {}) {
36
36
  .action(async (change, options) => {
37
37
  try {
38
38
  const projectRoot = process.cwd();
39
+ // SEED the host harness for the env-less episode subagent. learn runs at
40
+ // HOST level, where the OPENCODE_*/CODEX_* env that distinguishes the
41
+ // host harness IS present; the downstream loop-v2 episode (and its
42
+ // reward/evolving agent spawns) can run in an env-less Task subagent that
43
+ // would otherwise default to the 'claude' binary. resolveHostHarnessForRepo
44
+ // self-persists the confidently-resolved harness to
45
+ // `.synergyspec-selfevolving/host-harness.json`, so the subagent reads it
46
+ // back instead of guessing. Best-effort: a persistence failure must never
47
+ // fail the learn run (a missing seed only degrades to today's behavior).
48
+ try {
49
+ await resolveHostHarnessForRepo(projectRoot);
50
+ }
51
+ catch {
52
+ // best-effort seed only.
53
+ }
39
54
  // USER-TYPED handle flags are validated up front and fail LOUD
40
55
  // (exit 1) on a miss — unlike the env-var channel, which keeps the
41
56
  // fail-closed refusal semantics inside discovery (empty result, the
@@ -180,18 +195,29 @@ export function registerLearnCommand(program, deps = {}) {
180
195
  report,
181
196
  });
182
197
  // Thread the loop-v2 reward judge-quality config (samples / noiseFloor /
183
- // orderSwap / tamperCheck). Omitted ⇒ the orchestrator's single-sample,
184
- // flag-only default (no extra spawns).
198
+ // orderSwap / tamperCheck / divergenceCheck). Omitted ⇒ the orchestrator's
199
+ // single-sample, divergence-routing default (no extra spawns).
185
200
  const episodeConfig = readProjectConfig(projectRoot);
201
+ // Pass the host-resolved harness EXPLICITLY into the in-process episode
202
+ // (learn runs host-level where the harness is confidently resolvable),
203
+ // so the orchestrator's reward/evolving agent spawns never fall back to
204
+ // the default 'claude' binary on a non-claude host.
205
+ const harness = await resolveHostHarnessForRepo(projectRoot);
186
206
  episodeOutcome = await runEpisodeImpl({
187
207
  repoRoot: projectRoot,
188
208
  targetId: concreteEvolveTarget.targetId,
189
209
  changeName: report.changeName,
190
210
  changeDirPath: report.changeDir,
191
211
  mainArm,
212
+ harness,
192
213
  ...(episodeConfig?.selfEvolution?.reward
193
214
  ? { reward: episodeConfig.selfEvolution.reward }
194
215
  : {}),
216
+ // Per-agent headless-spawn ceiling (ms). Omitted ⇒ the orchestrator's
217
+ // built-in DEFAULT_AGENT_TIMEOUT_MS default applies.
218
+ ...(episodeConfig?.selfEvolution?.agentTimeoutMs !== undefined
219
+ ? { agentTimeoutMs: episodeConfig.selfEvolution.agentTimeoutMs }
220
+ : {}),
195
221
  });
196
222
  }
197
223
  if (options.json) {
@@ -8,6 +8,7 @@ readPolicyLedger, readRejectBuffer, currentPolicyVersion, rollbackPolicyVersion,
8
8
  lookupCanonicalTarget, listCanonicalTargets, DESIGN_ARTIFACT_TARGET_ID, } from '../core/self-evolution/index.js';
9
9
  import { generateLearnReport } from '../core/learn.js';
10
10
  import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
11
+ import { resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
11
12
  import { validateChangeExists } from './workflow/shared.js';
12
13
  import { readProjectConfig } from '../core/project-config.js';
13
14
  /**
@@ -150,12 +151,21 @@ export async function runEpisodeCommand(args, opts) {
150
151
  let outcome;
151
152
  try {
152
153
  const episodeConfig = readProjectConfig(opts.repoRoot);
154
+ // Resolve the HOST harness once here (where the host's OPENCODE_*/CODEX_* env
155
+ // is still present) and thread it EXPLICITLY into the episode. resolveHost-
156
+ // HarnessForRepo self-persists the env-resolved choice to
157
+ // `.synergyspec-selfevolving/host-harness.json`, so even when the
158
+ // orchestrator's reward/evolving agents later spawn from an env-less Task
159
+ // subagent they read the seeded harness instead of defaulting to the
160
+ // 'claude' binary (the ydata proposer-spawn failure).
161
+ const harness = await resolveHostHarnessForRepo(opts.repoRoot);
153
162
  const episodeOptions = {
154
163
  repoRoot: opts.repoRoot,
155
164
  targetId,
156
165
  changeName,
157
166
  changeDirPath,
158
167
  mainArm,
168
+ harness,
159
169
  ...(args.noBaseline ? { skipBaseline: true } : {}),
160
170
  ...(episodeConfig?.selfEvolution?.reward
161
171
  ? { reward: episodeConfig.selfEvolution.reward }
@@ -163,6 +173,12 @@ export async function runEpisodeCommand(args, opts) {
163
173
  ...(episodeConfig?.selfEvolution?.critic
164
174
  ? { critic: episodeConfig.selfEvolution.critic }
165
175
  : {}),
176
+ // Per-agent headless-spawn ceiling (ms). Omitted ⇒ the orchestrator's
177
+ // built-in DEFAULT_AGENT_TIMEOUT_MS applies; configured to let a repo whose
178
+ // critic re-do baseline legitimately runs long raise the per-agent ceiling.
179
+ ...(episodeConfig?.selfEvolution?.agentTimeoutMs !== undefined
180
+ ? { agentTimeoutMs: episodeConfig.selfEvolution.agentTimeoutMs }
181
+ : {}),
166
182
  };
167
183
  outcome = await runEpisode(episodeOptions);
168
184
  }
@@ -234,7 +250,27 @@ export async function runResumeEpisodeCommand(args, opts) {
234
250
  const resumeEpisode = opts.resumeEpisode ?? resumeEpisodeImpl;
235
251
  let result;
236
252
  try {
237
- result = await resumeEpisode({ repoRoot: opts.repoRoot, episodeId: args.episodeId });
253
+ // Resolve the HOST harness HERE (where the host's OPENCODE_*/CODEX_* env is
254
+ // still present) and thread it EXPLICITLY into the resumed episode. Resume is
255
+ // the operator re-entry MOST likely to run env-less (a recovery from another
256
+ // shell), so without this the resumed 演进智能体 EVOLVING AGENT re-spawns
257
+ // against the absent default 'claude' binary on an opencode/codex host — the
258
+ // ses_1330/1331 ENAMETOOLONG/spawn failure the harness sidecar exists to
259
+ // prevent. resolveHostHarnessForRepo self-persists the resolved choice, so
260
+ // an env-less Task subagent reads the seeded harness instead of defaulting.
261
+ const harness = await resolveHostHarnessForRepo(opts.repoRoot);
262
+ // Thread the configured per-agent headless-spawn ceiling (ms) into the
263
+ // resumed 演进智能体 EVOLVING AGENT. Omitted ⇒ the built-in
264
+ // DEFAULT_AGENT_TIMEOUT_MS default applies.
265
+ const resumeConfig = readProjectConfig(opts.repoRoot);
266
+ result = await resumeEpisode({
267
+ repoRoot: opts.repoRoot,
268
+ episodeId: args.episodeId,
269
+ harness,
270
+ ...(resumeConfig?.selfEvolution?.agentTimeoutMs !== undefined
271
+ ? { agentTimeoutMs: resumeConfig.selfEvolution.agentTimeoutMs }
272
+ : {}),
273
+ });
238
274
  }
239
275
  catch (err) {
240
276
  const message = err instanceof Error ? err.message : String(err);
@@ -36,6 +36,9 @@ export interface LocalPythonMetricSourceOptions {
36
36
  /** Path to a slop-rules YAML for the ast-grep engine. When omitted, resolved
37
37
  * to the `slop_rules.yaml` vendored next to the analyzer script. */
38
38
  rulesPath?: string;
39
+ /** Wall-clock ceiling (ms) for one analyzer spawn before it is killed and the
40
+ * reading degraded to `null`. Defaults to {@link DEFAULT_ANALYZER_TIMEOUT_MS}. */
41
+ timeoutMs?: number;
39
42
  }
40
43
  /**
41
44
  * Locate the ast-grep binary the analyzer's Python slop-rule engine should
@@ -59,6 +62,7 @@ export declare class LocalPythonMetricSource implements MetricSource {
59
62
  private readonly scriptPath;
60
63
  private readonly astGrepBin;
61
64
  private readonly rulesPath;
65
+ private readonly timeoutMs;
62
66
  constructor(options?: LocalPythonMetricSourceOptions);
63
67
  /**
64
68
  * Run the analyzer over `codeDir` and return its metrics, or `null` on any
@@ -84,6 +88,13 @@ export declare class LocalPythonMetricSource implements MetricSource {
84
88
  * rules file exists (the analyzer's own PATH fallback still uses it even with
85
89
  * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
86
90
  * run, or `null` if the process cannot be spawned or exits non-zero.
91
+ *
92
+ * A {@link timeoutMs} wall-clock ceiling guards against a hung analyzer (an
93
+ * import deadlock, an ast-grep stall on a pathological file, an AV/junction
94
+ * traversal stall): on expiry the child is killed (SIGTERM, escalating to
95
+ * SIGKILL after {@link KILL_GRACE_MS}) and the reading degrades to `null` —
96
+ * the same "no signal" contract every other failure path already honours, so
97
+ * the awaiting episode never hangs with the in-flight lock held.
87
98
  */
88
99
  private runAnalyzer;
89
100
  }
@@ -25,6 +25,19 @@ import { createRequire } from 'node:module';
25
25
  import path from 'node:path';
26
26
  /** The exact set of numeric keys the analyzer emits. Order is irrelevant. */
27
27
  const HEALTH_KEYS = ['structural_erosion', 'verbosity'];
28
+ /**
29
+ * Wall-clock ceiling for one analyzer spawn. The Python/ast-grep analyzer is
30
+ * normally sub-second; a run that exceeds this is treated as hung (e.g. a Python
31
+ * import deadlock, ast-grep stalling on a pathological file, or an AV/junction
32
+ * traversal stall on Windows) and degraded to the "no signal ⇒ null" contract.
33
+ */
34
+ const DEFAULT_ANALYZER_TIMEOUT_MS = 120_000;
35
+ /**
36
+ * Grace window between the polite SIGTERM and the forced SIGKILL when a timed-out
37
+ * analyzer has not exited yet. Short on purpose: the goal is to stop holding the
38
+ * in-flight episode lock, not to let a wedged child linger.
39
+ */
40
+ const KILL_GRACE_MS = 2_000;
28
41
  /**
29
42
  * Locate `scripts/code-health.py` relative to this module. Built output lives
30
43
  * at `dist/core/fitness/health/local-source.js`; the script stays at the
@@ -218,12 +231,14 @@ export class LocalPythonMetricSource {
218
231
  scriptPath;
219
232
  astGrepBin;
220
233
  rulesPath;
234
+ timeoutMs;
221
235
  constructor(options = {}) {
222
236
  this.pythonBin = options.pythonBin ?? defaultPythonBin();
223
237
  this.spawnImpl = options.spawnImpl ?? nodeSpawn;
224
238
  this.scriptPath = options.scriptPath ?? defaultScriptPath();
225
239
  this.astGrepBin = options.astGrepBin ?? defaultAstGrepBin();
226
240
  this.rulesPath = options.rulesPath ?? defaultRulesPath(this.scriptPath);
241
+ this.timeoutMs = options.timeoutMs ?? DEFAULT_ANALYZER_TIMEOUT_MS;
227
242
  }
228
243
  /**
229
244
  * Run the analyzer over `codeDir` and return its metrics, or `null` on any
@@ -269,14 +284,29 @@ export class LocalPythonMetricSource {
269
284
  * rules file exists (the analyzer's own PATH fallback still uses it even with
270
285
  * no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
271
286
  * run, or `null` if the process cannot be spawned or exits non-zero.
287
+ *
288
+ * A {@link timeoutMs} wall-clock ceiling guards against a hung analyzer (an
289
+ * import deadlock, an ast-grep stall on a pathological file, an AV/junction
290
+ * traversal stall): on expiry the child is killed (SIGTERM, escalating to
291
+ * SIGKILL after {@link KILL_GRACE_MS}) and the reading degrades to `null` —
292
+ * the same "no signal" contract every other failure path already honours, so
293
+ * the awaiting episode never hangs with the in-flight lock held.
272
294
  */
273
295
  runAnalyzer(codeDir) {
274
296
  return new Promise((resolve) => {
275
297
  let settled = false;
298
+ let timeoutTimer;
299
+ let killTimer;
276
300
  const done = (value) => {
277
301
  if (settled)
278
302
  return;
279
303
  settled = true;
304
+ // Stop waiting for the (now irrelevant) timeout. The SIGKILL-escalation
305
+ // timer is intentionally NOT cleared here: it must outlive the resolve
306
+ // so a child that ignored SIGTERM is still force-reaped; it self-clears
307
+ // when the child finally closes/errors below.
308
+ if (timeoutTimer !== undefined)
309
+ clearTimeout(timeoutTimer);
280
310
  resolve(value);
281
311
  };
282
312
  const args = [this.scriptPath, codeDir];
@@ -293,18 +323,40 @@ export class LocalPythonMetricSource {
293
323
  done(null);
294
324
  return;
295
325
  }
326
+ // Once the child truly exits (normally OR after a kill), no escalation is
327
+ // needed; drop the SIGKILL-escalation timer so the event loop can drain.
328
+ const dropKillTimer = () => {
329
+ if (killTimer !== undefined) {
330
+ clearTimeout(killTimer);
331
+ killTimer = undefined;
332
+ }
333
+ };
296
334
  const out = [];
297
335
  child.stdout?.on('data', (chunk) => out.push(Buffer.from(chunk)));
298
336
  // stderr is intentionally ignored: the analyzer prints only JSON to
299
337
  // stdout and we treat any failure uniformly as "no signal".
300
- child.on('error', () => done(null));
338
+ child.on('error', () => {
339
+ dropKillTimer();
340
+ done(null);
341
+ });
301
342
  child.on('close', (code) => {
343
+ dropKillTimer();
302
344
  if (code !== 0) {
303
345
  done(null);
304
346
  return;
305
347
  }
306
348
  done(Buffer.concat(out).toString('utf8'));
307
349
  });
350
+ // Hung-analyzer guard: kill the child and degrade to null on expiry. The
351
+ // child's own 'close'/'error' (fired by the kill) is ignored once settled.
352
+ timeoutTimer = setTimeout(() => {
353
+ child.kill?.('SIGTERM');
354
+ // Escalate to SIGKILL if SIGTERM did not land in the grace window.
355
+ killTimer = setTimeout(() => child.kill?.('SIGKILL'), KILL_GRACE_MS);
356
+ killTimer.unref?.();
357
+ done(null);
358
+ }, this.timeoutMs);
359
+ timeoutTimer.unref?.();
308
360
  });
309
361
  }
310
362
  }
@@ -27,6 +27,7 @@ export declare const ProjectConfigSchema: z.ZodObject<{
27
27
  focus: z.ZodOptional<z.ZodBoolean>;
28
28
  advantageRollbackThreshold: z.ZodOptional<z.ZodNumber>;
29
29
  editBudget: z.ZodOptional<z.ZodNumber>;
30
+ agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
30
31
  reward: z.ZodOptional<z.ZodObject<{
31
32
  samples: z.ZodOptional<z.ZodNumber>;
32
33
  noiseFloor: z.ZodOptional<z.ZodNumber>;
@@ -37,6 +38,10 @@ export declare const ProjectConfigSchema: z.ZodObject<{
37
38
  flag: "flag";
38
39
  block: "block";
39
40
  }>>;
41
+ divergenceCheck: z.ZodOptional<z.ZodEnum<{
42
+ flag: "flag";
43
+ route: "route";
44
+ }>>;
40
45
  }, z.core.$strip>>;
41
46
  critic: z.ZodOptional<z.ZodObject<{
42
47
  baselineMode: z.ZodOptional<z.ZodEnum<{
@@ -60,6 +60,13 @@ export const ProjectConfigSchema = z.object({
60
60
  // 演进智能体 EVOLVING AGENT's ONE bounded edit may total. Default 40.
61
61
  // Optional/omitted ⇒ the agent's DEFAULT_EVOLVING_AGENT_EDIT_BUDGET applies.
62
62
  editBudget: z.number().optional(),
63
+ // Loop v2: per-agent headless-spawn ceiling in MILLISECONDS, threaded into
64
+ // ALL THREE agents (CRITIC AGENT(基线智能体 baseline agent), 奖励智能体
65
+ // REWARD AGENT, 演进智能体 EVOLVING AGENT). A wedged host CLI is killed after
66
+ // this so it cannot hang the episode and leak the in-flight lock. Raise it
67
+ // for a repo whose critic re-do baseline legitimately runs long. Must be a
68
+ // positive integer; omitted ⇒ the built-in DEFAULT_AGENT_TIMEOUT_MS default.
69
+ agentTimeoutMs: z.number().int().positive().optional(),
63
70
  // Loop v2 — 奖励智能体 REWARD AGENT judge-quality knobs. ALL optional; omitted
64
71
  // ⇒ the historical single-sample, flag-only behaviour (no extra LLM spawns).
65
72
  reward: z
@@ -77,6 +84,13 @@ export const ProjectConfigSchema = z.object({
77
84
  // ④ Test-tamper handling: 'off' (no check), 'flag' (annotate only,
78
85
  // default), or 'block' (force insufficient-signal + reject-buffer).
79
86
  tamperCheck: z.enum(['off', 'flag', 'block']).optional(),
87
+ // ④ Judge⇄verifier divergence handling: 'flag' (record the number +
88
+ // annotate a correctness contradiction, informational only) or 'route'
89
+ // (default) — ALSO demote such a duel to insufficient-signal so the loop
90
+ // abstains instead of evolving on it. Routing fires ONLY when the judge
91
+ // confidently prefers the worse-pass-rate arm (the complement to
92
+ // gate-not-blend), never on a legitimate health/verbosity override.
93
+ divergenceCheck: z.enum(['flag', 'route']).optional(),
80
94
  })
81
95
  .optional(),
82
96
  // Loop v2 — CRITIC AGENT(基线智能体 baseline agent)baseline construction.
@@ -282,6 +296,13 @@ export function readProjectConfig(projectRoot) {
282
296
  else if (rawSE.editBudget !== undefined) {
283
297
  console.warn(`Invalid 'selfEvolution.editBudget' in config (must be a number), ignoring`);
284
298
  }
299
+ const agentTimeoutResult = z.number().int().positive().safeParse(rawSE.agentTimeoutMs);
300
+ if (agentTimeoutResult.success) {
301
+ selfEvolution.agentTimeoutMs = agentTimeoutResult.data;
302
+ }
303
+ else if (rawSE.agentTimeoutMs !== undefined) {
304
+ console.warn(`Invalid 'selfEvolution.agentTimeoutMs' in config (must be a positive integer of milliseconds), ignoring`);
305
+ }
285
306
  // Loop v2 — 奖励智能体 REWARD AGENT knobs. Resilient: each sub-field is
286
307
  // validated independently; a bad value is dropped with a warning (the
287
308
  // judge/aggregator default applies). Omitted ⇒ undefined (single-sample,
@@ -297,7 +318,8 @@ export function readProjectConfig(projectRoot) {
297
318
  }
298
319
  else if (rawSE.reward !== undefined) {
299
320
  console.warn(`Invalid 'selfEvolution.reward' in config (samples/noiseFloor numbers, ` +
300
- `orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block), ignoring`);
321
+ `orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block, ` +
322
+ `divergenceCheck flag|route), ignoring`);
301
323
  }
302
324
  // Loop v2 — CRITIC AGENT knobs. Resilient: a bad value is dropped with a
303
325
  // warning (the critic default 're-do' then applies). Omitted ⇒ undefined
@@ -40,6 +40,7 @@
40
40
  */
41
41
  import { spawn as nodeSpawn } from 'node:child_process';
42
42
  import type { ObservedTestFailure } from '../trajectory/facts.js';
43
+ import { type AgentHarness } from './host-harness.js';
43
44
  /** Error thrown when the worktree could not be created (git AND copy fallback failed). */
44
45
  export declare class CriticWorktreeError extends Error {
45
46
  constructor(message: string);
@@ -160,12 +161,26 @@ export interface RunCriticAgentOptions {
160
161
  baselineMode?: CriticBaselineMode;
161
162
  /** Injectable spawn seam for tests; defaults to node's spawn. */
162
163
  spawn?: typeof nodeSpawn;
163
- /** Hard timeout per agent run (ms). Default 600000 (10 min). */
164
+ /** Hard timeout per agent run (ms). Default {@link DEFAULT_AGENT_TIMEOUT_MS} (10 min). */
164
165
  timeoutMs?: number;
166
+ /**
167
+ * Which host harness to spawn (claude|codex|opencode). When omitted,
168
+ * {@link runHeadlessAgent} resolves it from the ambient env. Threaded so an
169
+ * env-less subagent run can pass the recovered harness explicitly.
170
+ */
171
+ harness?: AgentHarness;
165
172
  /** Override `os.homedir()` for tests (claude transcript discovery). */
166
173
  homeDir?: string;
167
174
  /** TEST seam: inject the worktree root instead of git/copy, skipping setup teardown of git. */
168
175
  now?: Date;
176
+ /**
177
+ * Hard ceiling (ms) for each git worktree subcommand (create/remove/prune).
178
+ * Default {@link GIT_TIMEOUT_MS} (60s). A git hang past this is SIGTERM→SIGKILLed
179
+ * and rejected, which the worktree create/teardown paths absorb gracefully.
180
+ * Exposed mainly as a TEST seam (small value ⇒ a never-closing git fake settles
181
+ * fast instead of wedging the suite).
182
+ */
183
+ gitTimeoutMs?: number;
169
184
  }
170
185
  export interface RunCriticAgentResult {
171
186
  /** Absolute path of the `baseline-arm/` dir the capture landed in. */
@@ -47,7 +47,7 @@ import { readProjectConfig } from '../project-config.js';
47
47
  import { claudeProjectsDir } from '../learn/trajectory-discovery.js';
48
48
  import { claudeSourceFactory } from '../trajectory/adapters/claude.js';
49
49
  import { toActionSkeleton } from '../trajectory/skeleton.js';
50
- import { runHeadlessAgent } from './host-harness.js';
50
+ import { runHeadlessAgent, DEFAULT_AGENT_TIMEOUT_MS } from './host-harness.js';
51
51
  import { currentPolicyVersion, readPolicyLedger, readPolicySnapshotFiles, } from './policy/index.js';
52
52
  import { advanceEpisodeStage, writeArmCapture } from './episode-store.js';
53
53
  /** Error thrown when the worktree could not be created (git AND copy fallback failed). */
@@ -188,6 +188,20 @@ async function resetChangeArtifactsForRedo(changeDir) {
188
188
  const NODE_MODULES = 'node_modules';
189
189
  const CONFIG_DIR = '.synergyspec-selfevolving';
190
190
  const SCHEMAS_REL = path.join('synergyspec-selfevolving', 'schemas');
191
+ /**
192
+ * Hard ceiling (ms) for a single git worktree subcommand. Local worktree
193
+ * create/remove/prune ops are fast (sub-second), so a generous 60s ceiling only
194
+ * trips on a genuine HANG — a credential/GPG prompt, an `index.lock` held by a
195
+ * concurrent git, a stalled network FS, or a wedged hook. Without it `runGit`
196
+ * settles ONLY on the child's 'close'/'error', so such a hang would wedge the
197
+ * critic inside the in-flight-lock window (the same orphan/leak class the agent
198
+ * spawn already guards; the agent-spawn timeout cannot help here — the stall is
199
+ * in worktree setup/teardown, OUTSIDE {@link runHeadlessAgent}). On timeout the
200
+ * child is SIGTERM→SIGKILLed and the promise REJECTS, which the callers absorb
201
+ * gracefully: {@link createIsolatedWorktree} falls back to the copy path, and
202
+ * {@link teardownWorktree}'s git calls are best-effort (`.catch(() => {})`).
203
+ */
204
+ const GIT_TIMEOUT_MS = 60_000;
191
205
  /**
192
206
  * Run the CRITIC AGENT(基线智能体 baseline agent)'s full baseline arm and
193
207
  * persist its capture. ALWAYS tears the worktree down (产物即弃). On success it
@@ -198,7 +212,8 @@ const SCHEMAS_REL = path.join('synergyspec-selfevolving', 'schemas');
198
212
  export async function runCriticAgent(opts) {
199
213
  const repoRoot = path.resolve(opts.repoRoot);
200
214
  const spawnImpl = opts.spawn ?? nodeSpawn;
201
- const timeoutMs = opts.timeoutMs ?? 600000;
215
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
216
+ const gitTimeoutMs = opts.gitTimeoutMs ?? GIT_TIMEOUT_MS;
202
217
  const homeDir = opts.homeDir ?? os.homedir();
203
218
  const baselineMode = opts.baselineMode ?? 're-do';
204
219
  if (!Number.isInteger(opts.baselineVersion) || opts.baselineVersion < 0) {
@@ -212,7 +227,7 @@ export async function runCriticAgent(opts) {
212
227
  let worktreeMode = 'git-worktree';
213
228
  try {
214
229
  // 1) Isolated worktree OUTSIDE the repo (git worktree --detach, else copy).
215
- worktreeMode = await createIsolatedWorktree(repoRoot, worktreePath, spawnImpl);
230
+ worktreeMode = await createIsolatedWorktree(repoRoot, worktreePath, spawnImpl, gitTimeoutMs);
216
231
  // 're-do' fidelity needs the detached-HEAD tree (pre-change code). The copy
217
232
  // fallback (non-git repo) brings the LIVE tree — including the change's
218
233
  // uncommitted implementation — so it cannot reach the pre-change state and
@@ -242,6 +257,7 @@ export async function runCriticAgent(opts) {
242
257
  cwd: worktreePath,
243
258
  spawn: spawnImpl,
244
259
  timeoutMs,
260
+ ...(opts.harness ? { harness: opts.harness } : {}),
245
261
  });
246
262
  // 5) Build + persist the baseline arm.
247
263
  const measuredAt = new Date().toISOString();
@@ -341,7 +357,7 @@ export async function runCriticAgent(opts) {
341
357
  }
342
358
  finally {
343
359
  // 6) 产物即弃: ALWAYS tear the worktree down — even when a step above threw.
344
- await teardownWorktree(repoRoot, worktreePath, worktreeMode, spawnImpl);
360
+ await teardownWorktree(repoRoot, worktreePath, worktreeMode, spawnImpl, gitTimeoutMs);
345
361
  }
346
362
  }
347
363
  // ---------------------------------------------------------------------------
@@ -353,12 +369,12 @@ export async function runCriticAgent(opts) {
353
369
  * (not a repo, git missing, etc.) falls back to a recursive file copy of the
354
370
  * repo excluding `node_modules` and `.git`. Returns which mode succeeded.
355
371
  */
356
- async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl) {
372
+ async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl, gitTimeoutMs = GIT_TIMEOUT_MS) {
357
373
  // Best-effort: a stale worktree dir from an interrupted run would make both
358
374
  // git-add and copy fail; clear it first (产物即弃 — nothing here is durable).
359
375
  await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
360
376
  try {
361
- await runGit(repoRoot, ['worktree', 'add', '--detach', worktreePath, 'HEAD'], spawnImpl);
377
+ await runGit(repoRoot, ['worktree', 'add', '--detach', worktreePath, 'HEAD'], spawnImpl, gitTimeoutMs);
362
378
  return 'git-worktree';
363
379
  }
364
380
  catch {
@@ -378,27 +394,81 @@ async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl) {
378
394
  * For the copy fallback: recursive rmdir. Never throws — teardown failures must
379
395
  * not mask a real error from the run.
380
396
  */
381
- async function teardownWorktree(repoRoot, worktreePath, mode, spawnImpl) {
397
+ async function teardownWorktree(repoRoot, worktreePath, mode, spawnImpl, gitTimeoutMs = GIT_TIMEOUT_MS) {
382
398
  if (mode === 'git-worktree') {
383
- await runGit(repoRoot, ['worktree', 'remove', '--force', worktreePath], spawnImpl).catch(() => { });
384
- await runGit(repoRoot, ['worktree', 'prune'], spawnImpl).catch(() => { });
399
+ await runGit(repoRoot, ['worktree', 'remove', '--force', worktreePath], spawnImpl, gitTimeoutMs).catch(() => { });
400
+ await runGit(repoRoot, ['worktree', 'prune'], spawnImpl, gitTimeoutMs).catch(() => { });
385
401
  }
386
402
  // The node_modules entry is a junction/symlink; `rm -rf` removes the link, not
387
403
  // the real tree behind it. Belt-and-suspenders rmdir for both modes.
388
404
  await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
389
405
  }
390
- /** Run a git subcommand in `repoRoot`; rejects on a non-zero exit or spawn error. */
391
- async function runGit(repoRoot, args, spawnImpl) {
406
+ /**
407
+ * Run a git subcommand in `repoRoot`; rejects on a non-zero exit, a spawn error,
408
+ * OR a hang past `timeoutMs` (SIGTERM, then SIGKILL ~2s later — mirrors
409
+ * {@link runHeadlessAgent}'s escalation). Spawned with a NON-INTERACTIVE env so a
410
+ * credential/GPG prompt fails fast instead of blocking forever:
411
+ * - `GIT_TERMINAL_PROMPT=0` / `GIT_ASKPASS=''` / `GCM_INTERACTIVE='never'` —
412
+ * no auth prompt is ever opened (it errors out instead), and
413
+ * - `GIT_OPTIONAL_LOCKS=0` — git skips the optional index-lock acquisition that
414
+ * a concurrent git could otherwise block on.
415
+ * Both guards keep `runGit`'s existing resolve/reject contract: a hang becomes a
416
+ * rejection the callers already absorb (copy fallback / best-effort teardown),
417
+ * so the critic degrades gracefully rather than wedging.
418
+ */
419
+ async function runGit(repoRoot, args, spawnImpl, timeoutMs = GIT_TIMEOUT_MS) {
392
420
  await new Promise((resolve, reject) => {
393
- const child = spawnImpl('git', args, { cwd: repoRoot, shell: false });
421
+ const child = spawnImpl('git', args, {
422
+ cwd: repoRoot,
423
+ shell: false,
424
+ env: {
425
+ ...process.env,
426
+ GIT_TERMINAL_PROMPT: '0',
427
+ GIT_OPTIONAL_LOCKS: '0',
428
+ GIT_ASKPASS: '',
429
+ GCM_INTERACTIVE: 'never',
430
+ },
431
+ });
394
432
  const err = [];
433
+ let settled = false;
434
+ let timer;
435
+ const finish = (fn) => {
436
+ if (settled)
437
+ return;
438
+ settled = true;
439
+ if (timer)
440
+ clearTimeout(timer);
441
+ fn();
442
+ };
443
+ timer = setTimeout(() => {
444
+ try {
445
+ child.kill(); // SIGTERM
446
+ // Escalate to SIGKILL shortly after in case git ignores SIGTERM, so a
447
+ // wedged child cannot orphan. unref so this timer never keeps the event
448
+ // loop alive on its own.
449
+ setTimeout(() => {
450
+ try {
451
+ child.kill('SIGKILL');
452
+ }
453
+ catch {
454
+ // ignore
455
+ }
456
+ }, 2000).unref?.();
457
+ }
458
+ catch {
459
+ // ignore
460
+ }
461
+ finish(() => reject(new Error(`git ${args[0]} timed out after ${timeoutMs}ms: ${Buffer.concat(err).toString('utf8')}`)));
462
+ }, timeoutMs);
395
463
  child.stderr?.on('data', (c) => err.push(Buffer.from(c)));
396
- child.on('error', (e) => reject(e));
464
+ child.on('error', (e) => finish(() => reject(e)));
397
465
  child.on('close', (code) => {
398
- if (code === 0)
399
- resolve();
400
- else
401
- reject(new Error(`git ${args[0]} exited ${code}: ${Buffer.concat(err).toString('utf8')}`));
466
+ finish(() => {
467
+ if (code === 0)
468
+ resolve();
469
+ else
470
+ reject(new Error(`git ${args[0]} exited ${code}: ${Buffer.concat(err).toString('utf8')}`));
471
+ });
402
472
  });
403
473
  });
404
474
  }
@@ -52,6 +52,7 @@ import { type EpisodeStage } from './episode-store.js';
52
52
  import { type ArmObjective, type CriticBaselineMode } from './critic-agent.js';
53
53
  import { type RewardConfig } from './reward-aggregator.js';
54
54
  import { type RunEvolvingAgentResult } from './evolving-agent.js';
55
+ import type { AgentHarness } from './host-harness.js';
55
56
  /** The 主智能体 MAIN AGENT (policy vN+1) capture the orchestrator records. */
56
57
  export interface MainArmCapture {
57
58
  /** Raw session transcript text, when provided; persisted as `transcript.jsonl`. */
@@ -162,6 +163,19 @@ export interface RunEpisodeOptions {
162
163
  };
163
164
  /** Injectable spawn seam — threaded to ALL THREE agents. Defaults to node's spawn. */
164
165
  spawn?: typeof nodeSpawn;
166
+ /**
167
+ * Hard timeout per agent run (ms), threaded into ALL THREE agents (critic,
168
+ * reward, evolving). Omitted ⇒ each agent defaults internally
169
+ * (DEFAULT_AGENT_TIMEOUT_MS, 10 min), so a wedged host CLI cannot hang the loop
170
+ * forever and leak the in-flight lock (ses_1330/1331).
171
+ */
172
+ agentTimeoutMs?: number;
173
+ /**
174
+ * Host harness override, threaded into ALL THREE agents so a subagent with an
175
+ * env-less ambient (resolveHostHarness ⇒ wrong default binary) still spawns the
176
+ * right CLI (ses_1331). Omitted ⇒ each agent resolves the harness itself.
177
+ */
178
+ harness?: AgentHarness;
165
179
  /** Injectable clock for the lock + episode id; defaults to `new Date()`. */
166
180
  now?: Date;
167
181
  /**
@@ -205,6 +219,10 @@ export interface ResumeEpisodeOptions {
205
219
  advantageRollbackThreshold?: number;
206
220
  /** Edit budget L (default 40). */
207
221
  editBudget?: number;
222
+ /** Hard timeout per agent run (ms); threaded into the resumed evolving agent. */
223
+ agentTimeoutMs?: number;
224
+ /** Host harness override; threaded into the resumed evolving agent. */
225
+ harness?: AgentHarness;
208
226
  }
209
227
  export interface ResumeEpisodeResult {
210
228
  episodeId: string;
@@ -223,6 +241,16 @@ export interface ResumeEpisodeResult {
223
241
  * - 'scored' → run the decision (f) then the 演进智能体 (g).
224
242
  * - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
225
243
  * - 'evolved'/'evolution-refused'/'abstained' → close.
244
+ * - 'errored' → RE-DRIVE from the last GOOD pre-error stage
245
+ * (an episode may have errored on a TRANSIENT
246
+ * cause — a one-off git/analyzer/agent timeout).
247
+ * The pre-error stage is the last `stageHistory`
248
+ * entry that is NOT 'errored'; when it is one of
249
+ * {'scored','rolled-back','kept'} (the
250
+ * resume-entry stages) we advance errored → that
251
+ * stage and fall through to the normal dispatch.
252
+ * Otherwise the pre-error stage is not
253
+ * auto-resumable and the episode is reported as-is.
226
254
  * - earlier stages → not auto-resumable here (the arms / reward
227
255
  * agent need their own re-entry); reported as-is.
228
256
  *