npm - synergyspec-selfevolving - Versions diffs - 2.1.5 → 2.1.6 - Mend

synergyspec-selfevolving 2.1.5 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/dist/commands/learn.js +80 -24
package/dist/commands/self-evolution-dream.d.ts +15 -1
package/dist/commands/self-evolution-dream.js +111 -6
package/dist/commands/self-evolution-episode.d.ts +3 -0
package/dist/commands/self-evolution-episode.js +157 -108
package/dist/commands/workflow/status.js +4 -0
package/dist/core/archive.js +17 -9
package/dist/core/change-readiness.d.ts +16 -1
package/dist/core/change-readiness.js +441 -15
package/dist/core/fitness/loss.d.ts +3 -5
package/dist/core/fitness/loss.js +2 -2
package/dist/core/fitness/test-metrics.d.ts +1 -0
package/dist/core/fitness/test-metrics.js +49 -0
package/dist/core/learn.js +129 -11
package/dist/core/migration.d.ts +6 -14
package/dist/core/migration.js +63 -21
package/dist/core/runner-evidence.d.ts +53 -0
package/dist/core/runner-evidence.js +613 -0
package/dist/core/self-evolution/candidates.js +0 -2
package/dist/core/self-evolution/dream.d.ts +57 -3
package/dist/core/self-evolution/dream.js +480 -9
package/dist/core/self-evolution/episode-orchestrator.d.ts +2 -0
package/dist/core/self-evolution/episode-orchestrator.js +17 -5
package/dist/core/self-evolution/episode-store.d.ts +5 -0
package/dist/core/self-evolution/episode-store.js +6 -2
package/dist/core/self-evolution/evolving-agent.js +8 -0
package/dist/core/self-evolution/host-harness.d.ts +35 -12
package/dist/core/self-evolution/host-harness.js +188 -49
package/dist/core/self-evolution/reward-aggregator.js +2 -2
package/dist/core/templates/workflows/archive-change.js +18 -18
package/dist/core/templates/workflows/dream.js +57 -47
package/dist/core/templates/workflows/learn.js +7 -5
package/dist/core/templates/workflows/run-tests.js +48 -29
package/dist/core/templates/workflows/self-evolving.js +11 -8
package/dist/core/trajectory/facts.d.ts +1 -1
package/dist/core/trajectory/registry.js +39 -8
package/package.json +1 -1

package/dist/core/self-evolution/episode-orchestrator.js CHANGED Viewed

@@ -39,7 +39,6 @@ export async function captureMainArm(opts) {
     // `objective.verified` below), so EVERY verified:false arm warns exactly once;
     // a genuinely verified arm (`facts.verified === true`) stays quiet.
     if (!facts || facts.verified !== true) {
-        // eslint-disable-next-line no-console
         console.warn(`[episode-orchestrator] observed grading unavailable for change "${opts.changeName}" — recording verified:false (observed run not verified)`);
     }
     // Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
@@ -275,6 +274,7 @@ async function resultFromReusableEpisode(repoRoot, episode) {
     }
     return {
         episodeId: episode.episodeId,
+        ...(episode.harness ? { harness: episode.harness } : {}),
         baselineSkipped: episode.stageHistory.some((entry) => entry.stage === 'baseline-skipped'),
         advantage: typeof episode.advantage === 'number' ? episode.advantage : null,
         decision,
@@ -420,6 +420,7 @@ export async function runEpisode(opts) {
             changeName: opts.changeName,
             changeDirPath: opts.changeDirPath,
             targetId,
+            ...(opts.harness ? { harness: opts.harness } : {}),
             policyVersionMain,
             idempotencyKey,
             episodeId,
@@ -650,7 +651,17 @@ async function runEpisodeAfterCreate(opts) {
         evolutionOutcomeReason: evolutionOutcomeReasonForClose(evolution),
     });
     const newPolicyVersion = await currentPolicyVersion(repoRoot, targetId);
-    return { episodeId, baselineSkipped, advantage, decision, evolution, newPolicyVersion };
+    const finalEpisode = await readEpisode(repoRoot, episodeId).catch(() => null);
+    const harness = finalEpisode?.harness ?? opts.harness;
+    return {
+        episodeId,
+        ...(harness ? { harness } : {}),
+        baselineSkipped,
+        advantage,
+        decision,
+        evolution,
+        newPolicyVersion,
+    };
 }
 /**
  * Advance the episode to 'closed' from whatever terminal-ish stage it reached,
@@ -807,6 +818,7 @@ export async function resumeEpisode(opts) {
     const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
     const threshold = opts.advantageRollbackThreshold ?? 0;
     const ep = await readEpisode(repoRoot, episodeId);
+    const harness = opts.harness ?? ep.harness;
     const resumedFrom = ep.stage;
     const targetId = ep.targetId;
     let evolution = null;
@@ -951,7 +963,7 @@ export async function resumeEpisode(opts) {
                     ...(calibrationNote ? { calibrationNote } : {}),
                     spawn: opts.spawn,
                     ...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
-                    ...(opts.harness ? { harness: opts.harness } : {}),
+                    ...(harness ? { harness } : {}),
                     markEvolving: true,
                 });
             }
@@ -985,7 +997,7 @@ export async function resumeEpisode(opts) {
                 ...(calibrationNote ? { calibrationNote } : {}),
                 spawn: opts.spawn,
                 ...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
-                ...(opts.harness ? { harness: opts.harness } : {}),
+                ...(harness ? { harness } : {}),
                 markEvolving: stage !== 'evolving',
             });
             await closeEpisodeBestEffort(repoRoot, episodeId, {
@@ -1012,7 +1024,7 @@ export async function resumeEpisode(opts) {
         throw err;
     }
     const after = await readEpisode(repoRoot, episodeId);
-    return { episodeId, resumedFrom, stage: after.stage, evolution };
+    return { episodeId, ...(harness ? { harness } : {}), resumedFrom, stage: after.stage, evolution };
 }
 /**
  * Read the episode's diagnosis.json for resume's decision step, normalized to

package/dist/core/self-evolution/episode-store.d.ts CHANGED Viewed

@@ -46,6 +46,7 @@
  *   - Stage changes go through a validated MONOTONIC state machine —
  *     advancing to a stage not reachable from the current one throws.
  */
+import type { AgentHarness } from './host-harness.js';
 /**
  * Lifecycle stage for an episode.
  *
@@ -145,6 +146,8 @@ export interface EpisodeRecord {
     changeDirPath: string;
     /** The canonical target whose 策略 POLICY the episode exercises. */
     targetId: string;
+    /** Host/code-agent harness used for trajectory grading and spawned agents, when known. */
+    harness?: AgentHarness;
     /** 版本账本 ledger version the 主智能体 MAIN AGENT ran (vN+1); null when unknown. */
     policyVersionMain: number | null;
     /** 版本账本 ledger version the CRITIC AGENT（基线智能体 baseline agent）reran (vN); null until captured. */
@@ -221,6 +224,8 @@ export interface CreateEpisodeOptions {
     changeDirPath: string;
     /** The canonical target whose 策略 POLICY the episode exercises. */
     targetId: string;
+    /** Host/code-agent harness used for trajectory grading and spawned agents, when known. */
+    harness?: AgentHarness;
     /** 版本账本 ledger version the 主智能体 MAIN AGENT ran (vN+1); null when unknown. */
     policyVersionMain: number | null;
     /** Stable completed-run reuse key; see {@link EpisodeRecord.idempotencyKey}. */

package/dist/core/self-evolution/episode-store.js CHANGED Viewed

@@ -49,6 +49,7 @@
 import { promises as fs } from 'node:fs';
 import * as path from 'node:path';
 import * as crypto from 'node:crypto';
+const EPISODE_HARNESSES = ['claude', 'codex', 'opencode'];
 /**
  * Iterable list of every legal {@link EpisodeStage} value. Order follows the
  * documented state machine for readability, not behavior.
@@ -296,6 +297,7 @@ export async function createEpisode(opts) {
         changeName,
         changeDirPath,
         targetId,
+        ...(opts.harness ? { harness: opts.harness } : {}),
         policyVersionMain,
         policyVersionBaseline: null,
         ...(opts.idempotencyKey ? { idempotencyKey: opts.idempotencyKey } : {}),
@@ -368,6 +370,10 @@ function parseEpisodeJson(jsonRaw, episodeId) {
     requiredString('changeName');
     requiredString('changeDirPath');
     requiredString('targetId');
+    if (o.harness !== undefined &&
+        (typeof o.harness !== 'string' || !EPISODE_HARNESSES.includes(o.harness))) {
+        throw new Error(`Invalid episode.json for ${episodeId}: field "harness" must be claude, codex, or opencode when present`);
+    }
     requiredString('createdAt');
     requiredString('updatedAt');
     numberOrNull('policyVersionMain');
@@ -538,7 +544,6 @@ export async function listEpisodes(repoRoot) {
             raw = await fs.readFile(path.join(baseDir, entry.name, EPISODE_JSON_FILE), 'utf8');
         }
         catch {
-            // eslint-disable-next-line no-console
             console.warn(`[episode-store] skipping ${entry.name}: missing or unreadable ${EPISODE_JSON_FILE}`);
             continue;
         }
@@ -547,7 +552,6 @@ export async function listEpisodes(repoRoot) {
             parsed = parseEpisodeJson(raw, entry.name);
         }
         catch {
-            // eslint-disable-next-line no-console
             console.warn(`[episode-store] skipping ${entry.name}: invalid ${EPISODE_JSON_FILE}`);
             continue;
         }

package/dist/core/self-evolution/evolving-agent.js CHANGED Viewed

@@ -484,6 +484,7 @@ export async function runEvolvingAgent(opts) {
             timeoutMs,
             harness: opts.harness,
         });
+        await restoreTargetFileSnapshot(repoRoot, currentFiles);
         if (run.exitCode !== 0 || run.stdout.length === 0) {
             // Agent crash is NOT repaired (the evolving agent's invocation contract).
             throw new EvolvingAgentInvocationError(run.stderr);
@@ -571,4 +572,11 @@ export async function runEvolvingAgent(opts) {
     await advanceEpisodeStage({ repoRoot, episodeId, stage: 'evolved' });
     return { kind: 'evolved', ledgerEntry };
 }
+async function restoreTargetFileSnapshot(repoRoot, files) {
+    await Promise.all(files.map(async (file) => {
+        const fullPath = path.join(repoRoot, ...file.relPath.split('/'));
+        await fs.mkdir(path.dirname(fullPath), { recursive: true });
+        await fs.writeFile(fullPath, file.content, 'utf8');
+    }));
+}
 //# sourceMappingURL=evolving-agent.js.map

package/dist/core/self-evolution/host-harness.d.ts CHANGED Viewed

@@ -64,8 +64,8 @@ export declare function resolveIdleTimeoutMs(harness?: AgentHarness): number;
  *   (1) `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` when it parses to a positive
  *       finite integer — a host-wide tunable that overrides every harness.
  *   (2) the per-harness default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}): the 10-min
- *       {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, a lower wall for the
- *       empirically slow-to-emit opencode.
+ *       {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, and a longer wall for
+ *       opencode live edit attempts.
  *
  * `harness` omitted ⇒ {@link resolveHostHarness} is consulted so the default is
  * host-appropriate.
@@ -77,13 +77,13 @@ export declare function resolveAgentTimeoutMs(harness?: AgentHarness): number;
  * Precedence:
  *   (a) `SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS` when it equals claude|codex|opencode.
  *   (b) Heuristic on the ambient environment:
- *       - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
  *       - `OPENCODE_DATA_DIR` or any `OPENCODE_*` var set → 'opencode'.
+ *       - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
  *   (c) Default 'claude'.
  *
- * Codex is checked before opencode so that, in the unlikely event both families
- * of env vars are present, the explicit override remains the only way to force a
- * choice; the heuristic is best-effort.
+ * OpenCode is checked before Codex because Codex can be the meta-runner that is
+ * invoking an OpenCode smoke test; in that mixed environment OPENCODE_* is the
+ * stronger signal for the observed run whose trajectory we must grade.
  */
 export declare function resolveHostHarness(): AgentHarness;
 /**
@@ -124,8 +124,11 @@ export interface HostHarnessResolution {
  *       wrong binary,
  *   (4) 'claude'.
  *
- * When (1) or (2) resolve CONFIDENTLY from a real env signal, the result is
- * persisted best-effort (fire-and-forget) so a later env-less call recovers it.
+ * This resolver is read-only. Command entry points that need to seed an
+ * env-less subagent call `seedHostHarnessForRepo`; keeping this function pure
+ * matters because learn preview/report generation uses it during trajectory
+ * lookup and must not write sidecar files.
+ *
  * The env checks are replicated inline (rather than only calling the sync
  * {@link resolveHostHarness}) precisely so we can tell "env gave a real signal"
  * apart from "defaulted to claude with no signal" — the sync resolver collapses
@@ -133,20 +136,35 @@ export interface HostHarnessResolution {
  */
 export declare function resolveHostHarnessDetailsForRepo(repoRoot: string): Promise<HostHarnessResolution>;
 export declare function resolveHostHarnessForRepo(repoRoot: string): Promise<AgentHarness>;
+/**
+ * Resolve the host harness and persist only a confident host signal (explicit
+ * override or CODEX_/OPENCODE_ env). This is the side-effecting entry point for
+ * command handlers that are about to spawn env-less subagents; core report and
+ * trajectory readers should use the read-only resolver above.
+ */
+export declare function seedHostHarnessForRepo(repoRoot: string): Promise<HostHarnessResolution>;
 export interface HeadlessCommand {
     binary: string;
     args: string[];
     /** When true, the prompt must be written to the child's stdin (and stdin end()ed). */
     useStdin: boolean;
 }
+export interface HeadlessSpawnCommand extends HeadlessCommand {
+    /**
+     * Native executables and POSIX commands keep `shell:false`. Windows `.cmd` /
+     * `.bat` shims are wrapped explicitly through `cmd.exe`, also with
+     * `shell:false`, so Node does not concatenate unescaped args.
+     */
+    shell: boolean;
+}
 /**
  * Build the concrete `{binary, args, useStdin}` invocation for a headless run.
  *
  * Full escape hatch: if `SYNERGYSPEC_CODE_AGENT_COMMAND` is set, it is parsed as a
- * JSON `string[]` template. The literal tokens `{prompt}` and `{cwd}` are
- * substituted in each element; `binary = template[0]`, `args = template.slice(1)`.
- * `useStdin` is inferred — true iff the template does NOT contain a `{prompt}`
- * token anywhere (so the caller streams the prompt to stdin instead).
+ * JSON `string[]` template. The literal token `{cwd}` is substituted in each
+ * element; `binary = template[0]`, `args = template.slice(1)`. `{prompt}` is
+ * deliberately rejected: loop-v2 prompts are too large for argv and must flow
+ * through stdin for every harness and override.
  *
  * Otherwise the command is derived from the harness (default
  * {@link resolveHostHarness}). Every harness streams the prompt over stdin
@@ -157,6 +175,11 @@ export declare function buildHeadlessCommand(prompt: string, opts: {
     harness?: AgentHarness;
     binaryOverride?: string;
 }): HeadlessCommand;
+export declare function resolveHeadlessCommandForSpawn(command: HeadlessCommand, opts?: {
+    platform?: NodeJS.Platform;
+    env?: NodeJS.ProcessEnv;
+    isExecutableFile?: (candidate: string, isWindows: boolean) => boolean;
+}): HeadlessSpawnCommand;
 export interface RunHeadlessAgentResult {
     exitCode: number | null;
     stdout: string;

package/dist/core/self-evolution/host-harness.js CHANGED Viewed

@@ -34,17 +34,18 @@ const HARNESSES = ['claude', 'codex', 'opencode'];
 export const DEFAULT_AGENT_TIMEOUT_MS = 600_000;
 /**
  * Per-host absolute-timeout defaults. claude/codex keep the 10-min
- * {@link DEFAULT_AGENT_TIMEOUT_MS}; opencode is given a lower wall because — in
- * the v2.1.2 smoke run — an opencode/GPT-5.5 print-mode spawn emitted ZERO
- * output and burned the full 10 minutes before the wall fired (the host CLI is
- * empirically slow-to-emit / occasionally non-terminating in `run` print mode).
+ * {@link DEFAULT_AGENT_TIMEOUT_MS}; opencode gets a longer wall because the
+ * v2.1.5 Windows/OpenCode smoke run reached reward/scoring, then killed the
+ * evolving agent at the previous 5-min wall while it was still producing a
+ * bounded candidate. The idle watchdog remains the earlier trip wire for silent
+ * wedges, so the absolute wall should be large enough for a live edit attempt.
  * The wall is still overridable per-host via
  * `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` ({@link resolveAgentTimeoutMs}).
  */
 const HARNESS_TIMEOUT_DEFAULTS_MS = {
     claude: DEFAULT_AGENT_TIMEOUT_MS,
     codex: DEFAULT_AGENT_TIMEOUT_MS,
-    opencode: 300_000,
+    opencode: 900_000,
 };
 /**
  * Default STDOUT/STDERR-idle watchdog window (2 min). If a spawned host CLI
@@ -62,11 +63,13 @@ export const DEFAULT_AGENT_IDLE_TIMEOUT_MS = 120_000;
  * emit ZERO bytes for well over 2 min while it reasons, so claude/codex get a
  * 5-min idle leash. opencode keeps the tighter 2-min window — it is the
  * empirically-wedging host (the v2.1.2 hang emitted no output at all) and a
- * faster idle kill is what we want there.
+ * faster idle kill is what we want there. opencode's absolute wall is longer
+ * than claude/codex because its live edit attempts can be slower even when they
+ * are not silent.
  *
  * INVARIANT: every harness's idle default is strictly LESS than its absolute
  * default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}) so the idle watchdog stays the
- * earlier trip wire (claude 300<600, codex 300<600, opencode 120<300).
+ * earlier trip wire (claude 300<600, codex 300<600, opencode 120<900).
  * Overridable per host via `SYNERGYSPEC_SELFEVOLVING_AGENT_IDLE_TIMEOUT_MS`
  * ({@link resolveIdleTimeoutMs}).
  */
@@ -125,8 +128,8 @@ const AGENT_TIMEOUT_ENV = 'SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS';
  *   (1) `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` when it parses to a positive
  *       finite integer — a host-wide tunable that overrides every harness.
  *   (2) the per-harness default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}): the 10-min
- *       {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, a lower wall for the
- *       empirically slow-to-emit opencode.
+ *       {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, and a longer wall for
+ *       opencode live edit attempts.
  *
  * `harness` omitted ⇒ {@link resolveHostHarness} is consulted so the default is
  * host-appropriate.
@@ -150,25 +153,25 @@ function isAgentHarness(value) {
  * Precedence:
  *   (a) `SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS` when it equals claude|codex|opencode.
  *   (b) Heuristic on the ambient environment:
- *       - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
  *       - `OPENCODE_DATA_DIR` or any `OPENCODE_*` var set → 'opencode'.
+ *       - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
  *   (c) Default 'claude'.
  *
- * Codex is checked before opencode so that, in the unlikely event both families
- * of env vars are present, the explicit override remains the only way to force a
- * choice; the heuristic is best-effort.
+ * OpenCode is checked before Codex because Codex can be the meta-runner that is
+ * invoking an OpenCode smoke test; in that mixed environment OPENCODE_* is the
+ * stronger signal for the observed run whose trajectory we must grade.
  */
 export function resolveHostHarness() {
     const override = process.env.SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS;
     if (isAgentHarness(override))
         return override;
     const envKeys = Object.keys(process.env);
-    const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
-    if (hasCodex)
-        return 'codex';
     const hasOpencode = process.env.OPENCODE_DATA_DIR !== undefined || envKeys.some((k) => k.startsWith('OPENCODE_'));
     if (hasOpencode)
         return 'opencode';
+    const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
+    if (hasCodex)
+        return 'codex';
     return 'claude';
 }
 // ---------------------------------------------------------------------------
@@ -194,14 +197,23 @@ function hostHarnessPath(repoRoot) {
  * spawns, never a precondition for the current run.
  */
 export async function persistHostHarness(repoRoot, harness) {
+    let tmpFile = null;
     try {
         const file = hostHarnessPath(repoRoot);
         await fs.mkdir(path.dirname(file), { recursive: true });
-        await fs.writeFile(file, `${JSON.stringify({ harness }, null, 2)}\n`, 'utf8');
+        tmpFile = path.join(path.dirname(file), `${HOST_HARNESS_FILE}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2)}.tmp`);
+        await fs.writeFile(tmpFile, `${JSON.stringify({ harness }, null, 2)}\n`, 'utf8');
+        await fs.rename(tmpFile, file);
+        tmpFile = null;
     }
     catch {
         // Swallow: a read-only or transient FS must not break the loop.
     }
+    finally {
+        if (tmpFile) {
+            await fs.unlink(tmpFile).catch(() => undefined);
+        }
+    }
 }
 /**
  * Read + parse + validate the persisted-harness sidecar. Returns the
@@ -253,18 +265,16 @@ function binaryResolvable(binary) {
         if (binary.trim().length === 0)
             return false;
         const isWindows = process.platform === 'win32';
-        // Windows PATHEXT (e.g. `.COM;.EXE;.BAT;.CMD`); also try the bare name (a
-        // binary may already carry its extension).
-        const exts = isWindows
-            ? ['', ...(process.env.PATHEXT ?? '.COM;.EXE;.BAT;.CMD').split(';').filter(Boolean)]
-            : [''];
+        // Windows PATHEXT (e.g. `.COM;.EXE;.BAT;.CMD`). A bare extensionless npm
+        // shim is not a CreateProcess target; prefer the PATHEXT-resolved .cmd/.exe.
+        const exts = executableExtensions(binary, isWindows, process.env.PATHEXT);
         const isExecutableFile = (candidate) => {
             try {
                 const st = statSync(candidate);
                 if (!st.isFile())
                     return false;
                 if (isWindows)
-                    return true; // Windows has no executable bit; existence + ext suffices.
+                    return isWindowsSpawnCompatibleExecutable(candidate);
                 // POSIX: any execute bit (owner/group/other) marks it runnable.
                 return (st.mode & 0o111) !== 0;
             }
@@ -314,8 +324,11 @@ function persistedBinary(harness) {
  *       wrong binary,
  *   (4) 'claude'.
  *
- * When (1) or (2) resolve CONFIDENTLY from a real env signal, the result is
- * persisted best-effort (fire-and-forget) so a later env-less call recovers it.
+ * This resolver is read-only. Command entry points that need to seed an
+ * env-less subagent call `seedHostHarnessForRepo`; keeping this function pure
+ * matters because learn preview/report generation uses it during trajectory
+ * lookup and must not write sidecar files.
+ *
  * The env checks are replicated inline (rather than only calling the sync
  * {@link resolveHostHarness}) precisely so we can tell "env gave a real signal"
  * apart from "defaulted to claude with no signal" — the sync resolver collapses
@@ -325,24 +338,21 @@ export async function resolveHostHarnessDetailsForRepo(repoRoot) {
     // (1) explicit override.
     const override = process.env.SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS;
     if (isAgentHarness(override)) {
-        void persistHostHarness(repoRoot, override);
         return { harness: override, source: 'override' };
     }
     // (2) env heuristic — only a POSITIVE hit counts (mirrors resolveHostHarness'
-    //     CODEX_-before-OPENCODE_ ordering, but distinguishes a real signal from
+    //     OPENCODE_-before-CODEX_ ordering, but distinguishes a real signal from
     //     the 'claude' fall-through).
     const envKeys = Object.keys(process.env);
-    const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
-    if (hasCodex) {
-        void persistHostHarness(repoRoot, 'codex');
-        return { harness: 'codex', source: 'env' };
-    }
     const hasOpencode = process.env.OPENCODE_DATA_DIR !== undefined ||
         envKeys.some((k) => k.startsWith('OPENCODE_'));
     if (hasOpencode) {
-        void persistHostHarness(repoRoot, 'opencode');
         return { harness: 'opencode', source: 'env' };
     }
+    const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
+    if (hasCodex) {
+        return { harness: 'codex', source: 'env' };
+    }
     // (3) persisted sidecar (the env-less-subagent recovery path) — honored ONLY
     //     when its binary is resolvable here. The persisted value for codex /
     //     opencode IS the binary name; probing it on PATH skips a wrong/stale
@@ -358,14 +368,27 @@ export async function resolveHostHarnessDetailsForRepo(repoRoot) {
 export async function resolveHostHarnessForRepo(repoRoot) {
     return (await resolveHostHarnessDetailsForRepo(repoRoot)).harness;
 }
+/**
+ * Resolve the host harness and persist only a confident host signal (explicit
+ * override or CODEX_/OPENCODE_ env). This is the side-effecting entry point for
+ * command handlers that are about to spawn env-less subagents; core report and
+ * trajectory readers should use the read-only resolver above.
+ */
+export async function seedHostHarnessForRepo(repoRoot) {
+    const resolution = await resolveHostHarnessDetailsForRepo(repoRoot);
+    if (resolution.source === 'override' || resolution.source === 'env') {
+        await persistHostHarness(repoRoot, resolution.harness);
+    }
+    return resolution;
+}
 /**
  * Build the concrete `{binary, args, useStdin}` invocation for a headless run.
  *
  * Full escape hatch: if `SYNERGYSPEC_CODE_AGENT_COMMAND` is set, it is parsed as a
- * JSON `string[]` template. The literal tokens `{prompt}` and `{cwd}` are
- * substituted in each element; `binary = template[0]`, `args = template.slice(1)`.
- * `useStdin` is inferred — true iff the template does NOT contain a `{prompt}`
- * token anywhere (so the caller streams the prompt to stdin instead).
+ * JSON `string[]` template. The literal token `{cwd}` is substituted in each
+ * element; `binary = template[0]`, `args = template.slice(1)`. `{prompt}` is
+ * deliberately rejected: loop-v2 prompts are too large for argv and must flow
+ * through stdin for every harness and override.
  *
  * Otherwise the command is derived from the harness (default
  * {@link resolveHostHarness}). Every harness streams the prompt over stdin
@@ -385,12 +408,14 @@ export function buildHeadlessCommand(prompt, opts) {
             throw new Error('SYNERGYSPEC_CODE_AGENT_COMMAND must be a non-empty JSON array of strings');
         }
         const rawTemplate = parsed;
-        const useStdin = !rawTemplate.some((e) => e.includes('{prompt}'));
-        const substituted = rawTemplate.map((e) => e.split('{prompt}').join(prompt).split('{cwd}').join(opts.cwd));
+        if (rawTemplate.some((e) => e.includes('{prompt}'))) {
+            throw new Error('SYNERGYSPEC_CODE_AGENT_COMMAND must not contain {prompt}; prompts are always streamed over stdin');
+        }
+        const substituted = rawTemplate.map((e) => e.split('{cwd}').join(opts.cwd));
         return {
             binary: substituted[0],
             args: substituted.slice(1),
-            useStdin,
+            useStdin: true,
         };
     }
     const harness = opts.harness ?? resolveHostHarness();
@@ -422,6 +447,110 @@ export function buildHeadlessCommand(prompt, opts) {
         }
     }
 }
+export function resolveHeadlessCommandForSpawn(command, opts = {}) {
+    const platform = opts.platform ?? process.platform;
+    if (platform !== 'win32') {
+        return { ...command, shell: false };
+    }
+    const resolved = resolveWindowsExecutable(command.binary, {
+        env: opts.env ?? process.env,
+        isExecutableFile: opts.isExecutableFile ??
+            ((candidate) => {
+                try {
+                    return statSync(candidate).isFile();
+                }
+                catch {
+                    return false;
+                }
+            }),
+    });
+    const binary = resolved ?? command.binary;
+    if (isUnsupportedWindowsExecutable(binary)) {
+        throw new Error(`Windows headless agent binary '${binary}' has unsupported extension '${path.win32
+            .extname(binary)
+            .toLowerCase()}'; use a .cmd, .bat, .exe, or .com shim, or invoke the interpreter explicitly via SYNERGYSPEC_CODE_AGENT_COMMAND.`);
+    }
+    if (isWindowsShellScript(binary)) {
+        const wrapper = wrapWindowsShellScript(binary, command.args, opts.env ?? process.env);
+        return {
+            ...command,
+            binary: wrapper.binary,
+            args: wrapper.args,
+            shell: false,
+        };
+    }
+    return {
+        ...command,
+        binary,
+        shell: false,
+    };
+}
+function executableExtensions(binary, isWindows, pathext) {
+    if (!isWindows)
+        return [''];
+    if (path.win32.extname(binary))
+        return [''];
+    return (pathext ?? '.COM;.EXE;.BAT;.CMD')
+        .split(';')
+        .map((ext) => ext.trim())
+        .filter(Boolean);
+}
+function resolveWindowsExecutable(binary, opts) {
+    if (!binary || binary.trim().length === 0)
+        return null;
+    const exts = executableExtensions(binary, true, opts.env.PATHEXT);
+    const candidates = [];
+    const hasPathSeparator = binary.includes('/') || binary.includes('\\');
+    if (hasPathSeparator) {
+        candidates.push(...exts.map((ext) => binary + ext));
+    }
+    else {
+        const entries = (opts.env.PATH ?? '').split(';').filter(Boolean);
+        for (const dir of entries) {
+            for (const ext of exts)
+                candidates.push(path.win32.join(dir, binary + ext));
+        }
+    }
+    let firstUnsupported = null;
+    for (const candidate of candidates) {
+        if (!opts.isExecutableFile(candidate, true))
+            continue;
+        if (isWindowsSpawnCompatibleExecutable(candidate))
+            return candidate;
+        firstUnsupported ??= candidate;
+    }
+    if (firstUnsupported) {
+        throw new Error(`Windows headless agent binary resolved to '${firstUnsupported}', but that extension cannot be spawned with shell:false; use a .cmd, .bat, .exe, or .com shim, or invoke the interpreter explicitly via SYNERGYSPEC_CODE_AGENT_COMMAND.`);
+    }
+    return null;
+}
+function isWindowsShellScript(binary) {
+    const ext = path.win32.extname(binary).toLowerCase();
+    return ext === '.cmd' || ext === '.bat';
+}
+function isWindowsSpawnCompatibleExecutable(binary) {
+    const ext = path.win32.extname(binary).toLowerCase();
+    return ext === '' || ext === '.com' || ext === '.exe' || ext === '.bat' || ext === '.cmd';
+}
+function isUnsupportedWindowsExecutable(binary) {
+    const ext = path.win32.extname(binary).toLowerCase();
+    return ext.length > 0 && !isWindowsSpawnCompatibleExecutable(binary);
+}
+function wrapWindowsShellScript(binary, args, env) {
+    const comspec = firstNonBlankEnv(env, 'ComSpec', 'COMSPEC') ?? 'cmd.exe';
+    return {
+        binary: comspec,
+        args: ['/d', '/s', '/c', 'call', binary, ...args],
+    };
+}
+function firstNonBlankEnv(env, ...keys) {
+    for (const key of keys) {
+        const value = env[key];
+        if (typeof value === 'string' && value.trim().length > 0)
+            return value;
+    }
+    return undefined;
+}
 /**
  * The claude-default binary fallback: `SYNERGYSPEC_SELFEVOLVING_CLAUDE_BIN` when
  * non-empty, else `'claude'`. Kept here so {@link buildHeadlessCommand} is the
@@ -457,16 +586,27 @@ function claudeDefaultBinary() {
  */
 export async function runHeadlessAgent(prompt, opts) {
     const spawnImpl = opts.spawn ?? nodeSpawn;
-    const command = buildHeadlessCommand(prompt, {
-        cwd: opts.cwd,
-        harness: opts.harness,
-        binaryOverride: opts.binaryOverride,
-    });
+    let spawnCommand;
+    try {
+        const command = buildHeadlessCommand(prompt, {
+            cwd: opts.cwd,
+            harness: opts.harness,
+            binaryOverride: opts.binaryOverride,
+        });
+        spawnCommand = resolveHeadlessCommandForSpawn(command);
+    }
+    catch (e) {
+        return {
+            exitCode: -1,
+            stdout: '',
+            stderr: e instanceof Error ? e.message : String(e),
+        };
+    }
     return await new Promise((resolve) => {
         let child;
         try {
-            child = spawnImpl(command.binary, command.args, {
-                shell: false,
+            child = spawnImpl(spawnCommand.binary, spawnCommand.args, {
+                shell: spawnCommand.shell,
                 cwd: opts.cwd,
             });
         }
@@ -524,7 +664,7 @@ export async function runHeadlessAgent(prompt, opts) {
                 // ignore
             }
         };
-        if (command.useStdin) {
+        if (spawnCommand.useStdin) {
             // Swallow stdin stream errors (e.g. EPIPE when the child exits before it
             // has read the whole — possibly 100KB+ — prompt). The real failure is
             // reported via the child's own 'error'/'close' handlers below; an
@@ -585,7 +725,6 @@ export async function runHeadlessAgent(prompt, opts) {
                 if (settled)
                     return;
                 const elapsedS = Math.round((Date.now() - startedAt) / 1000);
-                // eslint-disable-next-line no-console
                 console.error(`[self-evolution] headless agent running: ${elapsedS}s elapsed, ${bytesReceived} bytes received`);
             }, HEARTBEAT_INTERVAL_MS);
             heartbeatTimer.unref?.();