synergyspec-selfevolving 2.1.5 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/learn.js +80 -24
- package/dist/commands/self-evolution-dream.d.ts +15 -1
- package/dist/commands/self-evolution-dream.js +111 -6
- package/dist/commands/self-evolution-episode.d.ts +3 -0
- package/dist/commands/self-evolution-episode.js +157 -108
- package/dist/commands/workflow/status.js +4 -0
- package/dist/core/archive.js +17 -9
- package/dist/core/change-readiness.d.ts +16 -1
- package/dist/core/change-readiness.js +441 -15
- package/dist/core/fitness/loss.d.ts +3 -5
- package/dist/core/fitness/loss.js +2 -2
- package/dist/core/fitness/test-metrics.d.ts +1 -0
- package/dist/core/fitness/test-metrics.js +49 -0
- package/dist/core/learn.js +129 -11
- package/dist/core/migration.d.ts +6 -14
- package/dist/core/migration.js +63 -21
- package/dist/core/runner-evidence.d.ts +53 -0
- package/dist/core/runner-evidence.js +613 -0
- package/dist/core/self-evolution/candidates.js +0 -2
- package/dist/core/self-evolution/dream.d.ts +57 -3
- package/dist/core/self-evolution/dream.js +480 -9
- package/dist/core/self-evolution/episode-orchestrator.d.ts +2 -0
- package/dist/core/self-evolution/episode-orchestrator.js +17 -5
- package/dist/core/self-evolution/episode-store.d.ts +5 -0
- package/dist/core/self-evolution/episode-store.js +6 -2
- package/dist/core/self-evolution/evolving-agent.js +8 -0
- package/dist/core/self-evolution/host-harness.d.ts +35 -12
- package/dist/core/self-evolution/host-harness.js +188 -49
- package/dist/core/self-evolution/reward-aggregator.js +2 -2
- package/dist/core/templates/workflows/archive-change.js +18 -18
- package/dist/core/templates/workflows/dream.js +57 -47
- package/dist/core/templates/workflows/learn.js +7 -5
- package/dist/core/templates/workflows/run-tests.js +48 -29
- package/dist/core/templates/workflows/self-evolving.js +11 -8
- package/dist/core/trajectory/facts.d.ts +1 -1
- package/dist/core/trajectory/registry.js +39 -8
- package/package.json +1 -1
|
@@ -39,7 +39,6 @@ export async function captureMainArm(opts) {
|
|
|
39
39
|
// `objective.verified` below), so EVERY verified:false arm warns exactly once;
|
|
40
40
|
// a genuinely verified arm (`facts.verified === true`) stays quiet.
|
|
41
41
|
if (!facts || facts.verified !== true) {
|
|
42
|
-
// eslint-disable-next-line no-console
|
|
43
42
|
console.warn(`[episode-orchestrator] observed grading unavailable for change "${opts.changeName}" — recording verified:false (observed run not verified)`);
|
|
44
43
|
}
|
|
45
44
|
// Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
|
|
@@ -275,6 +274,7 @@ async function resultFromReusableEpisode(repoRoot, episode) {
|
|
|
275
274
|
}
|
|
276
275
|
return {
|
|
277
276
|
episodeId: episode.episodeId,
|
|
277
|
+
...(episode.harness ? { harness: episode.harness } : {}),
|
|
278
278
|
baselineSkipped: episode.stageHistory.some((entry) => entry.stage === 'baseline-skipped'),
|
|
279
279
|
advantage: typeof episode.advantage === 'number' ? episode.advantage : null,
|
|
280
280
|
decision,
|
|
@@ -420,6 +420,7 @@ export async function runEpisode(opts) {
|
|
|
420
420
|
changeName: opts.changeName,
|
|
421
421
|
changeDirPath: opts.changeDirPath,
|
|
422
422
|
targetId,
|
|
423
|
+
...(opts.harness ? { harness: opts.harness } : {}),
|
|
423
424
|
policyVersionMain,
|
|
424
425
|
idempotencyKey,
|
|
425
426
|
episodeId,
|
|
@@ -650,7 +651,17 @@ async function runEpisodeAfterCreate(opts) {
|
|
|
650
651
|
evolutionOutcomeReason: evolutionOutcomeReasonForClose(evolution),
|
|
651
652
|
});
|
|
652
653
|
const newPolicyVersion = await currentPolicyVersion(repoRoot, targetId);
|
|
653
|
-
|
|
654
|
+
const finalEpisode = await readEpisode(repoRoot, episodeId).catch(() => null);
|
|
655
|
+
const harness = finalEpisode?.harness ?? opts.harness;
|
|
656
|
+
return {
|
|
657
|
+
episodeId,
|
|
658
|
+
...(harness ? { harness } : {}),
|
|
659
|
+
baselineSkipped,
|
|
660
|
+
advantage,
|
|
661
|
+
decision,
|
|
662
|
+
evolution,
|
|
663
|
+
newPolicyVersion,
|
|
664
|
+
};
|
|
654
665
|
}
|
|
655
666
|
/**
|
|
656
667
|
* Advance the episode to 'closed' from whatever terminal-ish stage it reached,
|
|
@@ -807,6 +818,7 @@ export async function resumeEpisode(opts) {
|
|
|
807
818
|
const editBudget = opts.editBudget ?? DEFAULT_EVOLVING_AGENT_EDIT_BUDGET;
|
|
808
819
|
const threshold = opts.advantageRollbackThreshold ?? 0;
|
|
809
820
|
const ep = await readEpisode(repoRoot, episodeId);
|
|
821
|
+
const harness = opts.harness ?? ep.harness;
|
|
810
822
|
const resumedFrom = ep.stage;
|
|
811
823
|
const targetId = ep.targetId;
|
|
812
824
|
let evolution = null;
|
|
@@ -951,7 +963,7 @@ export async function resumeEpisode(opts) {
|
|
|
951
963
|
...(calibrationNote ? { calibrationNote } : {}),
|
|
952
964
|
spawn: opts.spawn,
|
|
953
965
|
...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
|
|
954
|
-
...(
|
|
966
|
+
...(harness ? { harness } : {}),
|
|
955
967
|
markEvolving: true,
|
|
956
968
|
});
|
|
957
969
|
}
|
|
@@ -985,7 +997,7 @@ export async function resumeEpisode(opts) {
|
|
|
985
997
|
...(calibrationNote ? { calibrationNote } : {}),
|
|
986
998
|
spawn: opts.spawn,
|
|
987
999
|
...(opts.agentTimeoutMs !== undefined ? { timeoutMs: opts.agentTimeoutMs } : {}),
|
|
988
|
-
...(
|
|
1000
|
+
...(harness ? { harness } : {}),
|
|
989
1001
|
markEvolving: stage !== 'evolving',
|
|
990
1002
|
});
|
|
991
1003
|
await closeEpisodeBestEffort(repoRoot, episodeId, {
|
|
@@ -1012,7 +1024,7 @@ export async function resumeEpisode(opts) {
|
|
|
1012
1024
|
throw err;
|
|
1013
1025
|
}
|
|
1014
1026
|
const after = await readEpisode(repoRoot, episodeId);
|
|
1015
|
-
return { episodeId, resumedFrom, stage: after.stage, evolution };
|
|
1027
|
+
return { episodeId, ...(harness ? { harness } : {}), resumedFrom, stage: after.stage, evolution };
|
|
1016
1028
|
}
|
|
1017
1029
|
/**
|
|
1018
1030
|
* Read the episode's diagnosis.json for resume's decision step, normalized to
|
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
* - Stage changes go through a validated MONOTONIC state machine —
|
|
47
47
|
* advancing to a stage not reachable from the current one throws.
|
|
48
48
|
*/
|
|
49
|
+
import type { AgentHarness } from './host-harness.js';
|
|
49
50
|
/**
|
|
50
51
|
* Lifecycle stage for an episode.
|
|
51
52
|
*
|
|
@@ -145,6 +146,8 @@ export interface EpisodeRecord {
|
|
|
145
146
|
changeDirPath: string;
|
|
146
147
|
/** The canonical target whose 策略 POLICY the episode exercises. */
|
|
147
148
|
targetId: string;
|
|
149
|
+
/** Host/code-agent harness used for trajectory grading and spawned agents, when known. */
|
|
150
|
+
harness?: AgentHarness;
|
|
148
151
|
/** 版本账本 ledger version the 主智能体 MAIN AGENT ran (vN+1); null when unknown. */
|
|
149
152
|
policyVersionMain: number | null;
|
|
150
153
|
/** 版本账本 ledger version the CRITIC AGENT(基线智能体 baseline agent)reran (vN); null until captured. */
|
|
@@ -221,6 +224,8 @@ export interface CreateEpisodeOptions {
|
|
|
221
224
|
changeDirPath: string;
|
|
222
225
|
/** The canonical target whose 策略 POLICY the episode exercises. */
|
|
223
226
|
targetId: string;
|
|
227
|
+
/** Host/code-agent harness used for trajectory grading and spawned agents, when known. */
|
|
228
|
+
harness?: AgentHarness;
|
|
224
229
|
/** 版本账本 ledger version the 主智能体 MAIN AGENT ran (vN+1); null when unknown. */
|
|
225
230
|
policyVersionMain: number | null;
|
|
226
231
|
/** Stable completed-run reuse key; see {@link EpisodeRecord.idempotencyKey}. */
|
|
@@ -49,6 +49,7 @@
|
|
|
49
49
|
import { promises as fs } from 'node:fs';
|
|
50
50
|
import * as path from 'node:path';
|
|
51
51
|
import * as crypto from 'node:crypto';
|
|
52
|
+
const EPISODE_HARNESSES = ['claude', 'codex', 'opencode'];
|
|
52
53
|
/**
|
|
53
54
|
* Iterable list of every legal {@link EpisodeStage} value. Order follows the
|
|
54
55
|
* documented state machine for readability, not behavior.
|
|
@@ -296,6 +297,7 @@ export async function createEpisode(opts) {
|
|
|
296
297
|
changeName,
|
|
297
298
|
changeDirPath,
|
|
298
299
|
targetId,
|
|
300
|
+
...(opts.harness ? { harness: opts.harness } : {}),
|
|
299
301
|
policyVersionMain,
|
|
300
302
|
policyVersionBaseline: null,
|
|
301
303
|
...(opts.idempotencyKey ? { idempotencyKey: opts.idempotencyKey } : {}),
|
|
@@ -368,6 +370,10 @@ function parseEpisodeJson(jsonRaw, episodeId) {
|
|
|
368
370
|
requiredString('changeName');
|
|
369
371
|
requiredString('changeDirPath');
|
|
370
372
|
requiredString('targetId');
|
|
373
|
+
if (o.harness !== undefined &&
|
|
374
|
+
(typeof o.harness !== 'string' || !EPISODE_HARNESSES.includes(o.harness))) {
|
|
375
|
+
throw new Error(`Invalid episode.json for ${episodeId}: field "harness" must be claude, codex, or opencode when present`);
|
|
376
|
+
}
|
|
371
377
|
requiredString('createdAt');
|
|
372
378
|
requiredString('updatedAt');
|
|
373
379
|
numberOrNull('policyVersionMain');
|
|
@@ -538,7 +544,6 @@ export async function listEpisodes(repoRoot) {
|
|
|
538
544
|
raw = await fs.readFile(path.join(baseDir, entry.name, EPISODE_JSON_FILE), 'utf8');
|
|
539
545
|
}
|
|
540
546
|
catch {
|
|
541
|
-
// eslint-disable-next-line no-console
|
|
542
547
|
console.warn(`[episode-store] skipping ${entry.name}: missing or unreadable ${EPISODE_JSON_FILE}`);
|
|
543
548
|
continue;
|
|
544
549
|
}
|
|
@@ -547,7 +552,6 @@ export async function listEpisodes(repoRoot) {
|
|
|
547
552
|
parsed = parseEpisodeJson(raw, entry.name);
|
|
548
553
|
}
|
|
549
554
|
catch {
|
|
550
|
-
// eslint-disable-next-line no-console
|
|
551
555
|
console.warn(`[episode-store] skipping ${entry.name}: invalid ${EPISODE_JSON_FILE}`);
|
|
552
556
|
continue;
|
|
553
557
|
}
|
|
@@ -484,6 +484,7 @@ export async function runEvolvingAgent(opts) {
|
|
|
484
484
|
timeoutMs,
|
|
485
485
|
harness: opts.harness,
|
|
486
486
|
});
|
|
487
|
+
await restoreTargetFileSnapshot(repoRoot, currentFiles);
|
|
487
488
|
if (run.exitCode !== 0 || run.stdout.length === 0) {
|
|
488
489
|
// Agent crash is NOT repaired (the evolving agent's invocation contract).
|
|
489
490
|
throw new EvolvingAgentInvocationError(run.stderr);
|
|
@@ -571,4 +572,11 @@ export async function runEvolvingAgent(opts) {
|
|
|
571
572
|
await advanceEpisodeStage({ repoRoot, episodeId, stage: 'evolved' });
|
|
572
573
|
return { kind: 'evolved', ledgerEntry };
|
|
573
574
|
}
|
|
575
|
+
async function restoreTargetFileSnapshot(repoRoot, files) {
|
|
576
|
+
await Promise.all(files.map(async (file) => {
|
|
577
|
+
const fullPath = path.join(repoRoot, ...file.relPath.split('/'));
|
|
578
|
+
await fs.mkdir(path.dirname(fullPath), { recursive: true });
|
|
579
|
+
await fs.writeFile(fullPath, file.content, 'utf8');
|
|
580
|
+
}));
|
|
581
|
+
}
|
|
574
582
|
//# sourceMappingURL=evolving-agent.js.map
|
|
@@ -64,8 +64,8 @@ export declare function resolveIdleTimeoutMs(harness?: AgentHarness): number;
|
|
|
64
64
|
* (1) `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` when it parses to a positive
|
|
65
65
|
* finite integer — a host-wide tunable that overrides every harness.
|
|
66
66
|
* (2) the per-harness default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}): the 10-min
|
|
67
|
-
* {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, a
|
|
68
|
-
*
|
|
67
|
+
* {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, and a longer wall for
|
|
68
|
+
* opencode live edit attempts.
|
|
69
69
|
*
|
|
70
70
|
* `harness` omitted ⇒ {@link resolveHostHarness} is consulted so the default is
|
|
71
71
|
* host-appropriate.
|
|
@@ -77,13 +77,13 @@ export declare function resolveAgentTimeoutMs(harness?: AgentHarness): number;
|
|
|
77
77
|
* Precedence:
|
|
78
78
|
* (a) `SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS` when it equals claude|codex|opencode.
|
|
79
79
|
* (b) Heuristic on the ambient environment:
|
|
80
|
-
* - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
|
|
81
80
|
* - `OPENCODE_DATA_DIR` or any `OPENCODE_*` var set → 'opencode'.
|
|
81
|
+
* - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
|
|
82
82
|
* (c) Default 'claude'.
|
|
83
83
|
*
|
|
84
|
-
*
|
|
85
|
-
*
|
|
86
|
-
*
|
|
84
|
+
* OpenCode is checked before Codex because Codex can be the meta-runner that is
|
|
85
|
+
* invoking an OpenCode smoke test; in that mixed environment OPENCODE_* is the
|
|
86
|
+
* stronger signal for the observed run whose trajectory we must grade.
|
|
87
87
|
*/
|
|
88
88
|
export declare function resolveHostHarness(): AgentHarness;
|
|
89
89
|
/**
|
|
@@ -124,8 +124,11 @@ export interface HostHarnessResolution {
|
|
|
124
124
|
* wrong binary,
|
|
125
125
|
* (4) 'claude'.
|
|
126
126
|
*
|
|
127
|
-
*
|
|
128
|
-
*
|
|
127
|
+
* This resolver is read-only. Command entry points that need to seed an
|
|
128
|
+
* env-less subagent call `seedHostHarnessForRepo`; keeping this function pure
|
|
129
|
+
* matters because learn preview/report generation uses it during trajectory
|
|
130
|
+
* lookup and must not write sidecar files.
|
|
131
|
+
*
|
|
129
132
|
* The env checks are replicated inline (rather than only calling the sync
|
|
130
133
|
* {@link resolveHostHarness}) precisely so we can tell "env gave a real signal"
|
|
131
134
|
* apart from "defaulted to claude with no signal" — the sync resolver collapses
|
|
@@ -133,20 +136,35 @@ export interface HostHarnessResolution {
|
|
|
133
136
|
*/
|
|
134
137
|
export declare function resolveHostHarnessDetailsForRepo(repoRoot: string): Promise<HostHarnessResolution>;
|
|
135
138
|
export declare function resolveHostHarnessForRepo(repoRoot: string): Promise<AgentHarness>;
|
|
139
|
+
/**
|
|
140
|
+
* Resolve the host harness and persist only a confident host signal (explicit
|
|
141
|
+
* override or CODEX_/OPENCODE_ env). This is the side-effecting entry point for
|
|
142
|
+
* command handlers that are about to spawn env-less subagents; core report and
|
|
143
|
+
* trajectory readers should use the read-only resolver above.
|
|
144
|
+
*/
|
|
145
|
+
export declare function seedHostHarnessForRepo(repoRoot: string): Promise<HostHarnessResolution>;
|
|
136
146
|
export interface HeadlessCommand {
|
|
137
147
|
binary: string;
|
|
138
148
|
args: string[];
|
|
139
149
|
/** When true, the prompt must be written to the child's stdin (and stdin end()ed). */
|
|
140
150
|
useStdin: boolean;
|
|
141
151
|
}
|
|
152
|
+
export interface HeadlessSpawnCommand extends HeadlessCommand {
|
|
153
|
+
/**
|
|
154
|
+
* Native executables and POSIX commands keep `shell:false`. Windows `.cmd` /
|
|
155
|
+
* `.bat` shims are wrapped explicitly through `cmd.exe`, also with
|
|
156
|
+
* `shell:false`, so Node does not concatenate unescaped args.
|
|
157
|
+
*/
|
|
158
|
+
shell: boolean;
|
|
159
|
+
}
|
|
142
160
|
/**
|
|
143
161
|
* Build the concrete `{binary, args, useStdin}` invocation for a headless run.
|
|
144
162
|
*
|
|
145
163
|
* Full escape hatch: if `SYNERGYSPEC_CODE_AGENT_COMMAND` is set, it is parsed as a
|
|
146
|
-
* JSON `string[]` template. The literal
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
*
|
|
164
|
+
* JSON `string[]` template. The literal token `{cwd}` is substituted in each
|
|
165
|
+
* element; `binary = template[0]`, `args = template.slice(1)`. `{prompt}` is
|
|
166
|
+
* deliberately rejected: loop-v2 prompts are too large for argv and must flow
|
|
167
|
+
* through stdin for every harness and override.
|
|
150
168
|
*
|
|
151
169
|
* Otherwise the command is derived from the harness (default
|
|
152
170
|
* {@link resolveHostHarness}). Every harness streams the prompt over stdin
|
|
@@ -157,6 +175,11 @@ export declare function buildHeadlessCommand(prompt: string, opts: {
|
|
|
157
175
|
harness?: AgentHarness;
|
|
158
176
|
binaryOverride?: string;
|
|
159
177
|
}): HeadlessCommand;
|
|
178
|
+
export declare function resolveHeadlessCommandForSpawn(command: HeadlessCommand, opts?: {
|
|
179
|
+
platform?: NodeJS.Platform;
|
|
180
|
+
env?: NodeJS.ProcessEnv;
|
|
181
|
+
isExecutableFile?: (candidate: string, isWindows: boolean) => boolean;
|
|
182
|
+
}): HeadlessSpawnCommand;
|
|
160
183
|
export interface RunHeadlessAgentResult {
|
|
161
184
|
exitCode: number | null;
|
|
162
185
|
stdout: string;
|
|
@@ -34,17 +34,18 @@ const HARNESSES = ['claude', 'codex', 'opencode'];
|
|
|
34
34
|
export const DEFAULT_AGENT_TIMEOUT_MS = 600_000;
|
|
35
35
|
/**
|
|
36
36
|
* Per-host absolute-timeout defaults. claude/codex keep the 10-min
|
|
37
|
-
* {@link DEFAULT_AGENT_TIMEOUT_MS}; opencode
|
|
38
|
-
*
|
|
39
|
-
*
|
|
40
|
-
*
|
|
37
|
+
* {@link DEFAULT_AGENT_TIMEOUT_MS}; opencode gets a longer wall because the
|
|
38
|
+
* v2.1.5 Windows/OpenCode smoke run reached reward/scoring, then killed the
|
|
39
|
+
* evolving agent at the previous 5-min wall while it was still producing a
|
|
40
|
+
* bounded candidate. The idle watchdog remains the earlier trip wire for silent
|
|
41
|
+
* wedges, so the absolute wall should be large enough for a live edit attempt.
|
|
41
42
|
* The wall is still overridable per-host via
|
|
42
43
|
* `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` ({@link resolveAgentTimeoutMs}).
|
|
43
44
|
*/
|
|
44
45
|
const HARNESS_TIMEOUT_DEFAULTS_MS = {
|
|
45
46
|
claude: DEFAULT_AGENT_TIMEOUT_MS,
|
|
46
47
|
codex: DEFAULT_AGENT_TIMEOUT_MS,
|
|
47
|
-
opencode:
|
|
48
|
+
opencode: 900_000,
|
|
48
49
|
};
|
|
49
50
|
/**
|
|
50
51
|
* Default STDOUT/STDERR-idle watchdog window (2 min). If a spawned host CLI
|
|
@@ -62,11 +63,13 @@ export const DEFAULT_AGENT_IDLE_TIMEOUT_MS = 120_000;
|
|
|
62
63
|
* emit ZERO bytes for well over 2 min while it reasons, so claude/codex get a
|
|
63
64
|
* 5-min idle leash. opencode keeps the tighter 2-min window — it is the
|
|
64
65
|
* empirically-wedging host (the v2.1.2 hang emitted no output at all) and a
|
|
65
|
-
* faster idle kill is what we want there.
|
|
66
|
+
* faster idle kill is what we want there. opencode's absolute wall is longer
|
|
67
|
+
* than claude/codex because its live edit attempts can be slower even when they
|
|
68
|
+
* are not silent.
|
|
66
69
|
*
|
|
67
70
|
* INVARIANT: every harness's idle default is strictly LESS than its absolute
|
|
68
71
|
* default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}) so the idle watchdog stays the
|
|
69
|
-
* earlier trip wire (claude 300<600, codex 300<600, opencode 120<
|
|
72
|
+
* earlier trip wire (claude 300<600, codex 300<600, opencode 120<900).
|
|
70
73
|
* Overridable per host via `SYNERGYSPEC_SELFEVOLVING_AGENT_IDLE_TIMEOUT_MS`
|
|
71
74
|
* ({@link resolveIdleTimeoutMs}).
|
|
72
75
|
*/
|
|
@@ -125,8 +128,8 @@ const AGENT_TIMEOUT_ENV = 'SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS';
|
|
|
125
128
|
* (1) `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` when it parses to a positive
|
|
126
129
|
* finite integer — a host-wide tunable that overrides every harness.
|
|
127
130
|
* (2) the per-harness default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}): the 10-min
|
|
128
|
-
* {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, a
|
|
129
|
-
*
|
|
131
|
+
* {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, and a longer wall for
|
|
132
|
+
* opencode live edit attempts.
|
|
130
133
|
*
|
|
131
134
|
* `harness` omitted ⇒ {@link resolveHostHarness} is consulted so the default is
|
|
132
135
|
* host-appropriate.
|
|
@@ -150,25 +153,25 @@ function isAgentHarness(value) {
|
|
|
150
153
|
* Precedence:
|
|
151
154
|
* (a) `SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS` when it equals claude|codex|opencode.
|
|
152
155
|
* (b) Heuristic on the ambient environment:
|
|
153
|
-
* - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
|
|
154
156
|
* - `OPENCODE_DATA_DIR` or any `OPENCODE_*` var set → 'opencode'.
|
|
157
|
+
* - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
|
|
155
158
|
* (c) Default 'claude'.
|
|
156
159
|
*
|
|
157
|
-
*
|
|
158
|
-
*
|
|
159
|
-
*
|
|
160
|
+
* OpenCode is checked before Codex because Codex can be the meta-runner that is
|
|
161
|
+
* invoking an OpenCode smoke test; in that mixed environment OPENCODE_* is the
|
|
162
|
+
* stronger signal for the observed run whose trajectory we must grade.
|
|
160
163
|
*/
|
|
161
164
|
export function resolveHostHarness() {
|
|
162
165
|
const override = process.env.SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS;
|
|
163
166
|
if (isAgentHarness(override))
|
|
164
167
|
return override;
|
|
165
168
|
const envKeys = Object.keys(process.env);
|
|
166
|
-
const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
|
|
167
|
-
if (hasCodex)
|
|
168
|
-
return 'codex';
|
|
169
169
|
const hasOpencode = process.env.OPENCODE_DATA_DIR !== undefined || envKeys.some((k) => k.startsWith('OPENCODE_'));
|
|
170
170
|
if (hasOpencode)
|
|
171
171
|
return 'opencode';
|
|
172
|
+
const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
|
|
173
|
+
if (hasCodex)
|
|
174
|
+
return 'codex';
|
|
172
175
|
return 'claude';
|
|
173
176
|
}
|
|
174
177
|
// ---------------------------------------------------------------------------
|
|
@@ -194,14 +197,23 @@ function hostHarnessPath(repoRoot) {
|
|
|
194
197
|
* spawns, never a precondition for the current run.
|
|
195
198
|
*/
|
|
196
199
|
export async function persistHostHarness(repoRoot, harness) {
|
|
200
|
+
let tmpFile = null;
|
|
197
201
|
try {
|
|
198
202
|
const file = hostHarnessPath(repoRoot);
|
|
199
203
|
await fs.mkdir(path.dirname(file), { recursive: true });
|
|
200
|
-
|
|
204
|
+
tmpFile = path.join(path.dirname(file), `${HOST_HARNESS_FILE}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2)}.tmp`);
|
|
205
|
+
await fs.writeFile(tmpFile, `${JSON.stringify({ harness }, null, 2)}\n`, 'utf8');
|
|
206
|
+
await fs.rename(tmpFile, file);
|
|
207
|
+
tmpFile = null;
|
|
201
208
|
}
|
|
202
209
|
catch {
|
|
203
210
|
// Swallow: a read-only or transient FS must not break the loop.
|
|
204
211
|
}
|
|
212
|
+
finally {
|
|
213
|
+
if (tmpFile) {
|
|
214
|
+
await fs.unlink(tmpFile).catch(() => undefined);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
205
217
|
}
|
|
206
218
|
/**
|
|
207
219
|
* Read + parse + validate the persisted-harness sidecar. Returns the
|
|
@@ -253,18 +265,16 @@ function binaryResolvable(binary) {
|
|
|
253
265
|
if (binary.trim().length === 0)
|
|
254
266
|
return false;
|
|
255
267
|
const isWindows = process.platform === 'win32';
|
|
256
|
-
// Windows PATHEXT (e.g. `.COM;.EXE;.BAT;.CMD`)
|
|
257
|
-
//
|
|
258
|
-
const exts = isWindows
|
|
259
|
-
? ['', ...(process.env.PATHEXT ?? '.COM;.EXE;.BAT;.CMD').split(';').filter(Boolean)]
|
|
260
|
-
: [''];
|
|
268
|
+
// Windows PATHEXT (e.g. `.COM;.EXE;.BAT;.CMD`). A bare extensionless npm
|
|
269
|
+
// shim is not a CreateProcess target; prefer the PATHEXT-resolved .cmd/.exe.
|
|
270
|
+
const exts = executableExtensions(binary, isWindows, process.env.PATHEXT);
|
|
261
271
|
const isExecutableFile = (candidate) => {
|
|
262
272
|
try {
|
|
263
273
|
const st = statSync(candidate);
|
|
264
274
|
if (!st.isFile())
|
|
265
275
|
return false;
|
|
266
276
|
if (isWindows)
|
|
267
|
-
return
|
|
277
|
+
return isWindowsSpawnCompatibleExecutable(candidate);
|
|
268
278
|
// POSIX: any execute bit (owner/group/other) marks it runnable.
|
|
269
279
|
return (st.mode & 0o111) !== 0;
|
|
270
280
|
}
|
|
@@ -314,8 +324,11 @@ function persistedBinary(harness) {
|
|
|
314
324
|
* wrong binary,
|
|
315
325
|
* (4) 'claude'.
|
|
316
326
|
*
|
|
317
|
-
*
|
|
318
|
-
*
|
|
327
|
+
* This resolver is read-only. Command entry points that need to seed an
|
|
328
|
+
* env-less subagent call `seedHostHarnessForRepo`; keeping this function pure
|
|
329
|
+
* matters because learn preview/report generation uses it during trajectory
|
|
330
|
+
* lookup and must not write sidecar files.
|
|
331
|
+
*
|
|
319
332
|
* The env checks are replicated inline (rather than only calling the sync
|
|
320
333
|
* {@link resolveHostHarness}) precisely so we can tell "env gave a real signal"
|
|
321
334
|
* apart from "defaulted to claude with no signal" — the sync resolver collapses
|
|
@@ -325,24 +338,21 @@ export async function resolveHostHarnessDetailsForRepo(repoRoot) {
|
|
|
325
338
|
// (1) explicit override.
|
|
326
339
|
const override = process.env.SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS;
|
|
327
340
|
if (isAgentHarness(override)) {
|
|
328
|
-
void persistHostHarness(repoRoot, override);
|
|
329
341
|
return { harness: override, source: 'override' };
|
|
330
342
|
}
|
|
331
343
|
// (2) env heuristic — only a POSITIVE hit counts (mirrors resolveHostHarness'
|
|
332
|
-
//
|
|
344
|
+
// OPENCODE_-before-CODEX_ ordering, but distinguishes a real signal from
|
|
333
345
|
// the 'claude' fall-through).
|
|
334
346
|
const envKeys = Object.keys(process.env);
|
|
335
|
-
const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
|
|
336
|
-
if (hasCodex) {
|
|
337
|
-
void persistHostHarness(repoRoot, 'codex');
|
|
338
|
-
return { harness: 'codex', source: 'env' };
|
|
339
|
-
}
|
|
340
347
|
const hasOpencode = process.env.OPENCODE_DATA_DIR !== undefined ||
|
|
341
348
|
envKeys.some((k) => k.startsWith('OPENCODE_'));
|
|
342
349
|
if (hasOpencode) {
|
|
343
|
-
void persistHostHarness(repoRoot, 'opencode');
|
|
344
350
|
return { harness: 'opencode', source: 'env' };
|
|
345
351
|
}
|
|
352
|
+
const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
|
|
353
|
+
if (hasCodex) {
|
|
354
|
+
return { harness: 'codex', source: 'env' };
|
|
355
|
+
}
|
|
346
356
|
// (3) persisted sidecar (the env-less-subagent recovery path) — honored ONLY
|
|
347
357
|
// when its binary is resolvable here. The persisted value for codex /
|
|
348
358
|
// opencode IS the binary name; probing it on PATH skips a wrong/stale
|
|
@@ -358,14 +368,27 @@ export async function resolveHostHarnessDetailsForRepo(repoRoot) {
|
|
|
358
368
|
export async function resolveHostHarnessForRepo(repoRoot) {
|
|
359
369
|
return (await resolveHostHarnessDetailsForRepo(repoRoot)).harness;
|
|
360
370
|
}
|
|
371
|
+
/**
|
|
372
|
+
* Resolve the host harness and persist only a confident host signal (explicit
|
|
373
|
+
* override or CODEX_/OPENCODE_ env). This is the side-effecting entry point for
|
|
374
|
+
* command handlers that are about to spawn env-less subagents; core report and
|
|
375
|
+
* trajectory readers should use the read-only resolver above.
|
|
376
|
+
*/
|
|
377
|
+
export async function seedHostHarnessForRepo(repoRoot) {
|
|
378
|
+
const resolution = await resolveHostHarnessDetailsForRepo(repoRoot);
|
|
379
|
+
if (resolution.source === 'override' || resolution.source === 'env') {
|
|
380
|
+
await persistHostHarness(repoRoot, resolution.harness);
|
|
381
|
+
}
|
|
382
|
+
return resolution;
|
|
383
|
+
}
|
|
361
384
|
/**
|
|
362
385
|
* Build the concrete `{binary, args, useStdin}` invocation for a headless run.
|
|
363
386
|
*
|
|
364
387
|
* Full escape hatch: if `SYNERGYSPEC_CODE_AGENT_COMMAND` is set, it is parsed as a
|
|
365
|
-
* JSON `string[]` template. The literal
|
|
366
|
-
*
|
|
367
|
-
*
|
|
368
|
-
*
|
|
388
|
+
* JSON `string[]` template. The literal token `{cwd}` is substituted in each
|
|
389
|
+
* element; `binary = template[0]`, `args = template.slice(1)`. `{prompt}` is
|
|
390
|
+
* deliberately rejected: loop-v2 prompts are too large for argv and must flow
|
|
391
|
+
* through stdin for every harness and override.
|
|
369
392
|
*
|
|
370
393
|
* Otherwise the command is derived from the harness (default
|
|
371
394
|
* {@link resolveHostHarness}). Every harness streams the prompt over stdin
|
|
@@ -385,12 +408,14 @@ export function buildHeadlessCommand(prompt, opts) {
|
|
|
385
408
|
throw new Error('SYNERGYSPEC_CODE_AGENT_COMMAND must be a non-empty JSON array of strings');
|
|
386
409
|
}
|
|
387
410
|
const rawTemplate = parsed;
|
|
388
|
-
|
|
389
|
-
|
|
411
|
+
if (rawTemplate.some((e) => e.includes('{prompt}'))) {
|
|
412
|
+
throw new Error('SYNERGYSPEC_CODE_AGENT_COMMAND must not contain {prompt}; prompts are always streamed over stdin');
|
|
413
|
+
}
|
|
414
|
+
const substituted = rawTemplate.map((e) => e.split('{cwd}').join(opts.cwd));
|
|
390
415
|
return {
|
|
391
416
|
binary: substituted[0],
|
|
392
417
|
args: substituted.slice(1),
|
|
393
|
-
useStdin,
|
|
418
|
+
useStdin: true,
|
|
394
419
|
};
|
|
395
420
|
}
|
|
396
421
|
const harness = opts.harness ?? resolveHostHarness();
|
|
@@ -422,6 +447,110 @@ export function buildHeadlessCommand(prompt, opts) {
|
|
|
422
447
|
}
|
|
423
448
|
}
|
|
424
449
|
}
|
|
450
|
+
export function resolveHeadlessCommandForSpawn(command, opts = {}) {
|
|
451
|
+
const platform = opts.platform ?? process.platform;
|
|
452
|
+
if (platform !== 'win32') {
|
|
453
|
+
return { ...command, shell: false };
|
|
454
|
+
}
|
|
455
|
+
const resolved = resolveWindowsExecutable(command.binary, {
|
|
456
|
+
env: opts.env ?? process.env,
|
|
457
|
+
isExecutableFile: opts.isExecutableFile ??
|
|
458
|
+
((candidate) => {
|
|
459
|
+
try {
|
|
460
|
+
return statSync(candidate).isFile();
|
|
461
|
+
}
|
|
462
|
+
catch {
|
|
463
|
+
return false;
|
|
464
|
+
}
|
|
465
|
+
}),
|
|
466
|
+
});
|
|
467
|
+
const binary = resolved ?? command.binary;
|
|
468
|
+
if (isUnsupportedWindowsExecutable(binary)) {
|
|
469
|
+
throw new Error(`Windows headless agent binary '${binary}' has unsupported extension '${path.win32
|
|
470
|
+
.extname(binary)
|
|
471
|
+
.toLowerCase()}'; use a .cmd, .bat, .exe, or .com shim, or invoke the interpreter explicitly via SYNERGYSPEC_CODE_AGENT_COMMAND.`);
|
|
472
|
+
}
|
|
473
|
+
if (isWindowsShellScript(binary)) {
|
|
474
|
+
const wrapper = wrapWindowsShellScript(binary, command.args, opts.env ?? process.env);
|
|
475
|
+
return {
|
|
476
|
+
...command,
|
|
477
|
+
binary: wrapper.binary,
|
|
478
|
+
args: wrapper.args,
|
|
479
|
+
shell: false,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
return {
|
|
483
|
+
...command,
|
|
484
|
+
binary,
|
|
485
|
+
shell: false,
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
function executableExtensions(binary, isWindows, pathext) {
|
|
489
|
+
if (!isWindows)
|
|
490
|
+
return [''];
|
|
491
|
+
if (path.win32.extname(binary))
|
|
492
|
+
return [''];
|
|
493
|
+
return (pathext ?? '.COM;.EXE;.BAT;.CMD')
|
|
494
|
+
.split(';')
|
|
495
|
+
.map((ext) => ext.trim())
|
|
496
|
+
.filter(Boolean);
|
|
497
|
+
}
|
|
498
|
+
function resolveWindowsExecutable(binary, opts) {
|
|
499
|
+
if (!binary || binary.trim().length === 0)
|
|
500
|
+
return null;
|
|
501
|
+
const exts = executableExtensions(binary, true, opts.env.PATHEXT);
|
|
502
|
+
const candidates = [];
|
|
503
|
+
const hasPathSeparator = binary.includes('/') || binary.includes('\\');
|
|
504
|
+
if (hasPathSeparator) {
|
|
505
|
+
candidates.push(...exts.map((ext) => binary + ext));
|
|
506
|
+
}
|
|
507
|
+
else {
|
|
508
|
+
const entries = (opts.env.PATH ?? '').split(';').filter(Boolean);
|
|
509
|
+
for (const dir of entries) {
|
|
510
|
+
for (const ext of exts)
|
|
511
|
+
candidates.push(path.win32.join(dir, binary + ext));
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
let firstUnsupported = null;
|
|
515
|
+
for (const candidate of candidates) {
|
|
516
|
+
if (!opts.isExecutableFile(candidate, true))
|
|
517
|
+
continue;
|
|
518
|
+
if (isWindowsSpawnCompatibleExecutable(candidate))
|
|
519
|
+
return candidate;
|
|
520
|
+
firstUnsupported ??= candidate;
|
|
521
|
+
}
|
|
522
|
+
if (firstUnsupported) {
|
|
523
|
+
throw new Error(`Windows headless agent binary resolved to '${firstUnsupported}', but that extension cannot be spawned with shell:false; use a .cmd, .bat, .exe, or .com shim, or invoke the interpreter explicitly via SYNERGYSPEC_CODE_AGENT_COMMAND.`);
|
|
524
|
+
}
|
|
525
|
+
return null;
|
|
526
|
+
}
|
|
527
|
+
function isWindowsShellScript(binary) {
|
|
528
|
+
const ext = path.win32.extname(binary).toLowerCase();
|
|
529
|
+
return ext === '.cmd' || ext === '.bat';
|
|
530
|
+
}
|
|
531
|
+
function isWindowsSpawnCompatibleExecutable(binary) {
|
|
532
|
+
const ext = path.win32.extname(binary).toLowerCase();
|
|
533
|
+
return ext === '' || ext === '.com' || ext === '.exe' || ext === '.bat' || ext === '.cmd';
|
|
534
|
+
}
|
|
535
|
+
function isUnsupportedWindowsExecutable(binary) {
|
|
536
|
+
const ext = path.win32.extname(binary).toLowerCase();
|
|
537
|
+
return ext.length > 0 && !isWindowsSpawnCompatibleExecutable(binary);
|
|
538
|
+
}
|
|
539
|
+
function wrapWindowsShellScript(binary, args, env) {
|
|
540
|
+
const comspec = firstNonBlankEnv(env, 'ComSpec', 'COMSPEC') ?? 'cmd.exe';
|
|
541
|
+
return {
|
|
542
|
+
binary: comspec,
|
|
543
|
+
args: ['/d', '/s', '/c', 'call', binary, ...args],
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
function firstNonBlankEnv(env, ...keys) {
|
|
547
|
+
for (const key of keys) {
|
|
548
|
+
const value = env[key];
|
|
549
|
+
if (typeof value === 'string' && value.trim().length > 0)
|
|
550
|
+
return value;
|
|
551
|
+
}
|
|
552
|
+
return undefined;
|
|
553
|
+
}
|
|
425
554
|
/**
|
|
426
555
|
* The claude-default binary fallback: `SYNERGYSPEC_SELFEVOLVING_CLAUDE_BIN` when
|
|
427
556
|
* non-empty, else `'claude'`. Kept here so {@link buildHeadlessCommand} is the
|
|
@@ -457,16 +586,27 @@ function claudeDefaultBinary() {
|
|
|
457
586
|
*/
|
|
458
587
|
export async function runHeadlessAgent(prompt, opts) {
|
|
459
588
|
const spawnImpl = opts.spawn ?? nodeSpawn;
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
589
|
+
let spawnCommand;
|
|
590
|
+
try {
|
|
591
|
+
const command = buildHeadlessCommand(prompt, {
|
|
592
|
+
cwd: opts.cwd,
|
|
593
|
+
harness: opts.harness,
|
|
594
|
+
binaryOverride: opts.binaryOverride,
|
|
595
|
+
});
|
|
596
|
+
spawnCommand = resolveHeadlessCommandForSpawn(command);
|
|
597
|
+
}
|
|
598
|
+
catch (e) {
|
|
599
|
+
return {
|
|
600
|
+
exitCode: -1,
|
|
601
|
+
stdout: '',
|
|
602
|
+
stderr: e instanceof Error ? e.message : String(e),
|
|
603
|
+
};
|
|
604
|
+
}
|
|
465
605
|
return await new Promise((resolve) => {
|
|
466
606
|
let child;
|
|
467
607
|
try {
|
|
468
|
-
child = spawnImpl(
|
|
469
|
-
shell:
|
|
608
|
+
child = spawnImpl(spawnCommand.binary, spawnCommand.args, {
|
|
609
|
+
shell: spawnCommand.shell,
|
|
470
610
|
cwd: opts.cwd,
|
|
471
611
|
});
|
|
472
612
|
}
|
|
@@ -524,7 +664,7 @@ export async function runHeadlessAgent(prompt, opts) {
|
|
|
524
664
|
// ignore
|
|
525
665
|
}
|
|
526
666
|
};
|
|
527
|
-
if (
|
|
667
|
+
if (spawnCommand.useStdin) {
|
|
528
668
|
// Swallow stdin stream errors (e.g. EPIPE when the child exits before it
|
|
529
669
|
// has read the whole — possibly 100KB+ — prompt). The real failure is
|
|
530
670
|
// reported via the child's own 'error'/'close' handlers below; an
|
|
@@ -585,7 +725,6 @@ export async function runHeadlessAgent(prompt, opts) {
|
|
|
585
725
|
if (settled)
|
|
586
726
|
return;
|
|
587
727
|
const elapsedS = Math.round((Date.now() - startedAt) / 1000);
|
|
588
|
-
// eslint-disable-next-line no-console
|
|
589
728
|
console.error(`[self-evolution] headless agent running: ${elapsedS}s elapsed, ${bytesReceived} bytes received`);
|
|
590
729
|
}, HEARTBEAT_INTERVAL_MS);
|
|
591
730
|
heartbeatTimer.unref?.();
|