synergyspec-selfevolving 2.1.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/learn.js +29 -3
- package/dist/commands/self-evolution-episode.js +37 -1
- package/dist/core/fitness/health/local-source.d.ts +11 -0
- package/dist/core/fitness/health/local-source.js +53 -1
- package/dist/core/project-config.d.ts +5 -0
- package/dist/core/project-config.js +23 -1
- package/dist/core/self-evolution/critic-agent.d.ts +16 -1
- package/dist/core/self-evolution/critic-agent.js +87 -17
- package/dist/core/self-evolution/episode-orchestrator.d.ts +28 -0
- package/dist/core/self-evolution/episode-orchestrator.js +369 -220
- package/dist/core/self-evolution/episode-store.d.ts +41 -2
- package/dist/core/self-evolution/episode-store.js +33 -9
- package/dist/core/self-evolution/evolving-agent.d.ts +51 -2
- package/dist/core/self-evolution/evolving-agent.js +45 -4
- package/dist/core/self-evolution/host-harness.d.ts +43 -0
- package/dist/core/self-evolution/host-harness.js +192 -0
- package/dist/core/self-evolution/reward-agent.d.ts +68 -0
- package/dist/core/self-evolution/reward-agent.js +92 -23
- package/dist/core/self-evolution/reward-aggregator.d.ts +26 -7
- package/dist/core/self-evolution/reward-aggregator.js +78 -20
- package/dist/core/self-evolution/verdict.d.ts +3 -2
- package/dist/core/self-evolution/verdict.js +4 -1
- package/dist/dashboard/react-client.js +2 -1
- package/dist/ui/ascii-patterns.d.ts +7 -8
- package/dist/ui/ascii-patterns.js +54 -120
- package/dist/ui/welcome-screen.d.ts +8 -0
- package/dist/ui/welcome-screen.js +2 -2
- package/package.json +1 -1
package/dist/commands/learn.js
CHANGED
|
@@ -7,7 +7,7 @@ import { findTranscriptsForChange, resolveChangeDir, validateExplicitTrajectoryH
|
|
|
7
7
|
import { getTrajectoryForChange } from '../core/trajectory/registry.js';
|
|
8
8
|
import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/facts.js';
|
|
9
9
|
import { toActionSkeleton } from '../core/trajectory/skeleton.js';
|
|
10
|
-
import { resolveHostHarness } from '../core/self-evolution/host-harness.js';
|
|
10
|
+
import { resolveHostHarness, resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
|
|
11
11
|
import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
|
|
12
12
|
import { captureMainArm, runEpisode, } from '../core/self-evolution/episode-orchestrator.js';
|
|
13
13
|
import { buildLLMSummaryCandidates, ingestLearnHandoff, } from '../core/learn/llm-summary.js';
|
|
@@ -36,6 +36,21 @@ export function registerLearnCommand(program, deps = {}) {
|
|
|
36
36
|
.action(async (change, options) => {
|
|
37
37
|
try {
|
|
38
38
|
const projectRoot = process.cwd();
|
|
39
|
+
// SEED the host harness for the env-less episode subagent. learn runs at
|
|
40
|
+
// HOST level, where the OPENCODE_*/CODEX_* env that distinguishes the
|
|
41
|
+
// host harness IS present; the downstream loop-v2 episode (and its
|
|
42
|
+
// reward/evolving agent spawns) can run in an env-less Task subagent that
|
|
43
|
+
// would otherwise default to the 'claude' binary. resolveHostHarnessForRepo
|
|
44
|
+
// self-persists the confidently-resolved harness to
|
|
45
|
+
// `.synergyspec-selfevolving/host-harness.json`, so the subagent reads it
|
|
46
|
+
// back instead of guessing. Best-effort: a persistence failure must never
|
|
47
|
+
// fail the learn run (a missing seed only degrades to today's behavior).
|
|
48
|
+
try {
|
|
49
|
+
await resolveHostHarnessForRepo(projectRoot);
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
// best-effort seed only.
|
|
53
|
+
}
|
|
39
54
|
// USER-TYPED handle flags are validated up front and fail LOUD
|
|
40
55
|
// (exit 1) on a miss — unlike the env-var channel, which keeps the
|
|
41
56
|
// fail-closed refusal semantics inside discovery (empty result, the
|
|
@@ -180,18 +195,29 @@ export function registerLearnCommand(program, deps = {}) {
|
|
|
180
195
|
report,
|
|
181
196
|
});
|
|
182
197
|
// Thread the loop-v2 reward judge-quality config (samples / noiseFloor /
|
|
183
|
-
// orderSwap / tamperCheck). Omitted ⇒ the orchestrator's
|
|
184
|
-
//
|
|
198
|
+
// orderSwap / tamperCheck / divergenceCheck). Omitted ⇒ the orchestrator's
|
|
199
|
+
// single-sample, divergence-routing default (no extra spawns).
|
|
185
200
|
const episodeConfig = readProjectConfig(projectRoot);
|
|
201
|
+
// Pass the host-resolved harness EXPLICITLY into the in-process episode
|
|
202
|
+
// (learn runs host-level where the harness is confidently resolvable),
|
|
203
|
+
// so the orchestrator's reward/evolving agent spawns never fall back to
|
|
204
|
+
// the default 'claude' binary on a non-claude host.
|
|
205
|
+
const harness = await resolveHostHarnessForRepo(projectRoot);
|
|
186
206
|
episodeOutcome = await runEpisodeImpl({
|
|
187
207
|
repoRoot: projectRoot,
|
|
188
208
|
targetId: concreteEvolveTarget.targetId,
|
|
189
209
|
changeName: report.changeName,
|
|
190
210
|
changeDirPath: report.changeDir,
|
|
191
211
|
mainArm,
|
|
212
|
+
harness,
|
|
192
213
|
...(episodeConfig?.selfEvolution?.reward
|
|
193
214
|
? { reward: episodeConfig.selfEvolution.reward }
|
|
194
215
|
: {}),
|
|
216
|
+
// Per-agent headless-spawn ceiling (ms). Omitted ⇒ the orchestrator's
|
|
217
|
+
// built-in DEFAULT_AGENT_TIMEOUT_MS default applies.
|
|
218
|
+
...(episodeConfig?.selfEvolution?.agentTimeoutMs !== undefined
|
|
219
|
+
? { agentTimeoutMs: episodeConfig.selfEvolution.agentTimeoutMs }
|
|
220
|
+
: {}),
|
|
195
221
|
});
|
|
196
222
|
}
|
|
197
223
|
if (options.json) {
|
|
@@ -8,6 +8,7 @@ readPolicyLedger, readRejectBuffer, currentPolicyVersion, rollbackPolicyVersion,
|
|
|
8
8
|
lookupCanonicalTarget, listCanonicalTargets, DESIGN_ARTIFACT_TARGET_ID, } from '../core/self-evolution/index.js';
|
|
9
9
|
import { generateLearnReport } from '../core/learn.js';
|
|
10
10
|
import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
|
|
11
|
+
import { resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
|
|
11
12
|
import { validateChangeExists } from './workflow/shared.js';
|
|
12
13
|
import { readProjectConfig } from '../core/project-config.js';
|
|
13
14
|
/**
|
|
@@ -150,12 +151,21 @@ export async function runEpisodeCommand(args, opts) {
|
|
|
150
151
|
let outcome;
|
|
151
152
|
try {
|
|
152
153
|
const episodeConfig = readProjectConfig(opts.repoRoot);
|
|
154
|
+
// Resolve the HOST harness once here (where the host's OPENCODE_*/CODEX_* env
|
|
155
|
+
// is still present) and thread it EXPLICITLY into the episode. resolveHost-
|
|
156
|
+
// HarnessForRepo self-persists the env-resolved choice to
|
|
157
|
+
// `.synergyspec-selfevolving/host-harness.json`, so even when the
|
|
158
|
+
// orchestrator's reward/evolving agents later spawn from an env-less Task
|
|
159
|
+
// subagent they read the seeded harness instead of defaulting to the
|
|
160
|
+
// 'claude' binary (the ydata proposer-spawn failure).
|
|
161
|
+
const harness = await resolveHostHarnessForRepo(opts.repoRoot);
|
|
153
162
|
const episodeOptions = {
|
|
154
163
|
repoRoot: opts.repoRoot,
|
|
155
164
|
targetId,
|
|
156
165
|
changeName,
|
|
157
166
|
changeDirPath,
|
|
158
167
|
mainArm,
|
|
168
|
+
harness,
|
|
159
169
|
...(args.noBaseline ? { skipBaseline: true } : {}),
|
|
160
170
|
...(episodeConfig?.selfEvolution?.reward
|
|
161
171
|
? { reward: episodeConfig.selfEvolution.reward }
|
|
@@ -163,6 +173,12 @@ export async function runEpisodeCommand(args, opts) {
|
|
|
163
173
|
...(episodeConfig?.selfEvolution?.critic
|
|
164
174
|
? { critic: episodeConfig.selfEvolution.critic }
|
|
165
175
|
: {}),
|
|
176
|
+
// Per-agent headless-spawn ceiling (ms). Omitted ⇒ the orchestrator's
|
|
177
|
+
// built-in DEFAULT_AGENT_TIMEOUT_MS applies; configured to let a repo whose
|
|
178
|
+
// critic re-do baseline legitimately runs long raise the per-agent ceiling.
|
|
179
|
+
...(episodeConfig?.selfEvolution?.agentTimeoutMs !== undefined
|
|
180
|
+
? { agentTimeoutMs: episodeConfig.selfEvolution.agentTimeoutMs }
|
|
181
|
+
: {}),
|
|
166
182
|
};
|
|
167
183
|
outcome = await runEpisode(episodeOptions);
|
|
168
184
|
}
|
|
@@ -234,7 +250,27 @@ export async function runResumeEpisodeCommand(args, opts) {
|
|
|
234
250
|
const resumeEpisode = opts.resumeEpisode ?? resumeEpisodeImpl;
|
|
235
251
|
let result;
|
|
236
252
|
try {
|
|
237
|
-
|
|
253
|
+
// Resolve the HOST harness HERE (where the host's OPENCODE_*/CODEX_* env is
|
|
254
|
+
// still present) and thread it EXPLICITLY into the resumed episode. Resume is
|
|
255
|
+
// the operator re-entry MOST likely to run env-less (a recovery from another
|
|
256
|
+
// shell), so without this the resumed 演进智能体 EVOLVING AGENT re-spawns
|
|
257
|
+
// against the absent default 'claude' binary on an opencode/codex host — the
|
|
258
|
+
// ses_1330/1331 ENAMETOOLONG/spawn failure the harness sidecar exists to
|
|
259
|
+
// prevent. resolveHostHarnessForRepo self-persists the resolved choice, so
|
|
260
|
+
// an env-less Task subagent reads the seeded harness instead of defaulting.
|
|
261
|
+
const harness = await resolveHostHarnessForRepo(opts.repoRoot);
|
|
262
|
+
// Thread the configured per-agent headless-spawn ceiling (ms) into the
|
|
263
|
+
// resumed 演进智能体 EVOLVING AGENT. Omitted ⇒ the built-in
|
|
264
|
+
// DEFAULT_AGENT_TIMEOUT_MS default applies.
|
|
265
|
+
const resumeConfig = readProjectConfig(opts.repoRoot);
|
|
266
|
+
result = await resumeEpisode({
|
|
267
|
+
repoRoot: opts.repoRoot,
|
|
268
|
+
episodeId: args.episodeId,
|
|
269
|
+
harness,
|
|
270
|
+
...(resumeConfig?.selfEvolution?.agentTimeoutMs !== undefined
|
|
271
|
+
? { agentTimeoutMs: resumeConfig.selfEvolution.agentTimeoutMs }
|
|
272
|
+
: {}),
|
|
273
|
+
});
|
|
238
274
|
}
|
|
239
275
|
catch (err) {
|
|
240
276
|
const message = err instanceof Error ? err.message : String(err);
|
|
@@ -36,6 +36,9 @@ export interface LocalPythonMetricSourceOptions {
|
|
|
36
36
|
/** Path to a slop-rules YAML for the ast-grep engine. When omitted, resolved
|
|
37
37
|
* to the `slop_rules.yaml` vendored next to the analyzer script. */
|
|
38
38
|
rulesPath?: string;
|
|
39
|
+
/** Wall-clock ceiling (ms) for one analyzer spawn before it is killed and the
|
|
40
|
+
* reading degraded to `null`. Defaults to {@link DEFAULT_ANALYZER_TIMEOUT_MS}. */
|
|
41
|
+
timeoutMs?: number;
|
|
39
42
|
}
|
|
40
43
|
/**
|
|
41
44
|
* Locate the ast-grep binary the analyzer's Python slop-rule engine should
|
|
@@ -59,6 +62,7 @@ export declare class LocalPythonMetricSource implements MetricSource {
|
|
|
59
62
|
private readonly scriptPath;
|
|
60
63
|
private readonly astGrepBin;
|
|
61
64
|
private readonly rulesPath;
|
|
65
|
+
private readonly timeoutMs;
|
|
62
66
|
constructor(options?: LocalPythonMetricSourceOptions);
|
|
63
67
|
/**
|
|
64
68
|
* Run the analyzer over `codeDir` and return its metrics, or `null` on any
|
|
@@ -84,6 +88,13 @@ export declare class LocalPythonMetricSource implements MetricSource {
|
|
|
84
88
|
* rules file exists (the analyzer's own PATH fallback still uses it even with
|
|
85
89
|
* no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
|
|
86
90
|
* run, or `null` if the process cannot be spawned or exits non-zero.
|
|
91
|
+
*
|
|
92
|
+
* A {@link timeoutMs} wall-clock ceiling guards against a hung analyzer (an
|
|
93
|
+
* import deadlock, an ast-grep stall on a pathological file, an AV/junction
|
|
94
|
+
* traversal stall): on expiry the child is killed (SIGTERM, escalating to
|
|
95
|
+
* SIGKILL after {@link KILL_GRACE_MS}) and the reading degrades to `null` —
|
|
96
|
+
* the same "no signal" contract every other failure path already honours, so
|
|
97
|
+
* the awaiting episode never hangs with the in-flight lock held.
|
|
87
98
|
*/
|
|
88
99
|
private runAnalyzer;
|
|
89
100
|
}
|
|
@@ -25,6 +25,19 @@ import { createRequire } from 'node:module';
|
|
|
25
25
|
import path from 'node:path';
|
|
26
26
|
/** The exact set of numeric keys the analyzer emits. Order is irrelevant. */
|
|
27
27
|
const HEALTH_KEYS = ['structural_erosion', 'verbosity'];
|
|
28
|
+
/**
|
|
29
|
+
* Wall-clock ceiling for one analyzer spawn. The Python/ast-grep analyzer is
|
|
30
|
+
* normally sub-second; a run that exceeds this is treated as hung (e.g. a Python
|
|
31
|
+
* import deadlock, ast-grep stalling on a pathological file, or an AV/junction
|
|
32
|
+
* traversal stall on Windows) and degraded to the "no signal ⇒ null" contract.
|
|
33
|
+
*/
|
|
34
|
+
const DEFAULT_ANALYZER_TIMEOUT_MS = 120_000;
|
|
35
|
+
/**
|
|
36
|
+
* Grace window between the polite SIGTERM and the forced SIGKILL when a timed-out
|
|
37
|
+
* analyzer has not exited yet. Short on purpose: the goal is to stop holding the
|
|
38
|
+
* in-flight episode lock, not to let a wedged child linger.
|
|
39
|
+
*/
|
|
40
|
+
const KILL_GRACE_MS = 2_000;
|
|
28
41
|
/**
|
|
29
42
|
* Locate `scripts/code-health.py` relative to this module. Built output lives
|
|
30
43
|
* at `dist/core/fitness/health/local-source.js`; the script stays at the
|
|
@@ -218,12 +231,14 @@ export class LocalPythonMetricSource {
|
|
|
218
231
|
scriptPath;
|
|
219
232
|
astGrepBin;
|
|
220
233
|
rulesPath;
|
|
234
|
+
timeoutMs;
|
|
221
235
|
constructor(options = {}) {
|
|
222
236
|
this.pythonBin = options.pythonBin ?? defaultPythonBin();
|
|
223
237
|
this.spawnImpl = options.spawnImpl ?? nodeSpawn;
|
|
224
238
|
this.scriptPath = options.scriptPath ?? defaultScriptPath();
|
|
225
239
|
this.astGrepBin = options.astGrepBin ?? defaultAstGrepBin();
|
|
226
240
|
this.rulesPath = options.rulesPath ?? defaultRulesPath(this.scriptPath);
|
|
241
|
+
this.timeoutMs = options.timeoutMs ?? DEFAULT_ANALYZER_TIMEOUT_MS;
|
|
227
242
|
}
|
|
228
243
|
/**
|
|
229
244
|
* Run the analyzer over `codeDir` and return its metrics, or `null` on any
|
|
@@ -269,14 +284,29 @@ export class LocalPythonMetricSource {
|
|
|
269
284
|
* rules file exists (the analyzer's own PATH fallback still uses it even with
|
|
270
285
|
* no resolved binary). Resolves to the raw stdout string on a clean (exit 0)
|
|
271
286
|
* run, or `null` if the process cannot be spawned or exits non-zero.
|
|
287
|
+
*
|
|
288
|
+
* A {@link timeoutMs} wall-clock ceiling guards against a hung analyzer (an
|
|
289
|
+
* import deadlock, an ast-grep stall on a pathological file, an AV/junction
|
|
290
|
+
* traversal stall): on expiry the child is killed (SIGTERM, escalating to
|
|
291
|
+
* SIGKILL after {@link KILL_GRACE_MS}) and the reading degrades to `null` —
|
|
292
|
+
* the same "no signal" contract every other failure path already honours, so
|
|
293
|
+
* the awaiting episode never hangs with the in-flight lock held.
|
|
272
294
|
*/
|
|
273
295
|
runAnalyzer(codeDir) {
|
|
274
296
|
return new Promise((resolve) => {
|
|
275
297
|
let settled = false;
|
|
298
|
+
let timeoutTimer;
|
|
299
|
+
let killTimer;
|
|
276
300
|
const done = (value) => {
|
|
277
301
|
if (settled)
|
|
278
302
|
return;
|
|
279
303
|
settled = true;
|
|
304
|
+
// Stop waiting for the (now irrelevant) timeout. The SIGKILL-escalation
|
|
305
|
+
// timer is intentionally NOT cleared here: it must outlive the resolve
|
|
306
|
+
// so a child that ignored SIGTERM is still force-reaped; it self-clears
|
|
307
|
+
// when the child finally closes/errors below.
|
|
308
|
+
if (timeoutTimer !== undefined)
|
|
309
|
+
clearTimeout(timeoutTimer);
|
|
280
310
|
resolve(value);
|
|
281
311
|
};
|
|
282
312
|
const args = [this.scriptPath, codeDir];
|
|
@@ -293,18 +323,40 @@ export class LocalPythonMetricSource {
|
|
|
293
323
|
done(null);
|
|
294
324
|
return;
|
|
295
325
|
}
|
|
326
|
+
// Once the child truly exits (normally OR after a kill), no escalation is
|
|
327
|
+
// needed; drop the SIGKILL-escalation timer so the event loop can drain.
|
|
328
|
+
const dropKillTimer = () => {
|
|
329
|
+
if (killTimer !== undefined) {
|
|
330
|
+
clearTimeout(killTimer);
|
|
331
|
+
killTimer = undefined;
|
|
332
|
+
}
|
|
333
|
+
};
|
|
296
334
|
const out = [];
|
|
297
335
|
child.stdout?.on('data', (chunk) => out.push(Buffer.from(chunk)));
|
|
298
336
|
// stderr is intentionally ignored: the analyzer prints only JSON to
|
|
299
337
|
// stdout and we treat any failure uniformly as "no signal".
|
|
300
|
-
child.on('error', () =>
|
|
338
|
+
child.on('error', () => {
|
|
339
|
+
dropKillTimer();
|
|
340
|
+
done(null);
|
|
341
|
+
});
|
|
301
342
|
child.on('close', (code) => {
|
|
343
|
+
dropKillTimer();
|
|
302
344
|
if (code !== 0) {
|
|
303
345
|
done(null);
|
|
304
346
|
return;
|
|
305
347
|
}
|
|
306
348
|
done(Buffer.concat(out).toString('utf8'));
|
|
307
349
|
});
|
|
350
|
+
// Hung-analyzer guard: kill the child and degrade to null on expiry. The
|
|
351
|
+
// child's own 'close'/'error' (fired by the kill) is ignored once settled.
|
|
352
|
+
timeoutTimer = setTimeout(() => {
|
|
353
|
+
child.kill?.('SIGTERM');
|
|
354
|
+
// Escalate to SIGKILL if SIGTERM did not land in the grace window.
|
|
355
|
+
killTimer = setTimeout(() => child.kill?.('SIGKILL'), KILL_GRACE_MS);
|
|
356
|
+
killTimer.unref?.();
|
|
357
|
+
done(null);
|
|
358
|
+
}, this.timeoutMs);
|
|
359
|
+
timeoutTimer.unref?.();
|
|
308
360
|
});
|
|
309
361
|
}
|
|
310
362
|
}
|
|
@@ -27,6 +27,7 @@ export declare const ProjectConfigSchema: z.ZodObject<{
|
|
|
27
27
|
focus: z.ZodOptional<z.ZodBoolean>;
|
|
28
28
|
advantageRollbackThreshold: z.ZodOptional<z.ZodNumber>;
|
|
29
29
|
editBudget: z.ZodOptional<z.ZodNumber>;
|
|
30
|
+
agentTimeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
30
31
|
reward: z.ZodOptional<z.ZodObject<{
|
|
31
32
|
samples: z.ZodOptional<z.ZodNumber>;
|
|
32
33
|
noiseFloor: z.ZodOptional<z.ZodNumber>;
|
|
@@ -37,6 +38,10 @@ export declare const ProjectConfigSchema: z.ZodObject<{
|
|
|
37
38
|
flag: "flag";
|
|
38
39
|
block: "block";
|
|
39
40
|
}>>;
|
|
41
|
+
divergenceCheck: z.ZodOptional<z.ZodEnum<{
|
|
42
|
+
flag: "flag";
|
|
43
|
+
route: "route";
|
|
44
|
+
}>>;
|
|
40
45
|
}, z.core.$strip>>;
|
|
41
46
|
critic: z.ZodOptional<z.ZodObject<{
|
|
42
47
|
baselineMode: z.ZodOptional<z.ZodEnum<{
|
|
@@ -60,6 +60,13 @@ export const ProjectConfigSchema = z.object({
|
|
|
60
60
|
// 演进智能体 EVOLVING AGENT's ONE bounded edit may total. Default 40.
|
|
61
61
|
// Optional/omitted ⇒ the agent's DEFAULT_EVOLVING_AGENT_EDIT_BUDGET applies.
|
|
62
62
|
editBudget: z.number().optional(),
|
|
63
|
+
// Loop v2: per-agent headless-spawn ceiling in MILLISECONDS, threaded into
|
|
64
|
+
// ALL THREE agents (CRITIC AGENT(基线智能体 baseline agent), 奖励智能体
|
|
65
|
+
// REWARD AGENT, 演进智能体 EVOLVING AGENT). A wedged host CLI is killed after
|
|
66
|
+
// this so it cannot hang the episode and leak the in-flight lock. Raise it
|
|
67
|
+
// for a repo whose critic re-do baseline legitimately runs long. Must be a
|
|
68
|
+
// positive integer; omitted ⇒ the built-in DEFAULT_AGENT_TIMEOUT_MS default.
|
|
69
|
+
agentTimeoutMs: z.number().int().positive().optional(),
|
|
63
70
|
// Loop v2 — 奖励智能体 REWARD AGENT judge-quality knobs. ALL optional; omitted
|
|
64
71
|
// ⇒ the historical single-sample, flag-only behaviour (no extra LLM spawns).
|
|
65
72
|
reward: z
|
|
@@ -77,6 +84,13 @@ export const ProjectConfigSchema = z.object({
|
|
|
77
84
|
// ④ Test-tamper handling: 'off' (no check), 'flag' (annotate only,
|
|
78
85
|
// default), or 'block' (force insufficient-signal + reject-buffer).
|
|
79
86
|
tamperCheck: z.enum(['off', 'flag', 'block']).optional(),
|
|
87
|
+
// ④ Judge⇄verifier divergence handling: 'flag' (record the number +
|
|
88
|
+
// annotate a correctness contradiction, informational only) or 'route'
|
|
89
|
+
// (default) — ALSO demote such a duel to insufficient-signal so the loop
|
|
90
|
+
// abstains instead of evolving on it. Routing fires ONLY when the judge
|
|
91
|
+
// confidently prefers the worse-pass-rate arm (the complement to
|
|
92
|
+
// gate-not-blend), never on a legitimate health/verbosity override.
|
|
93
|
+
divergenceCheck: z.enum(['flag', 'route']).optional(),
|
|
80
94
|
})
|
|
81
95
|
.optional(),
|
|
82
96
|
// Loop v2 — CRITIC AGENT(基线智能体 baseline agent)baseline construction.
|
|
@@ -282,6 +296,13 @@ export function readProjectConfig(projectRoot) {
|
|
|
282
296
|
else if (rawSE.editBudget !== undefined) {
|
|
283
297
|
console.warn(`Invalid 'selfEvolution.editBudget' in config (must be a number), ignoring`);
|
|
284
298
|
}
|
|
299
|
+
const agentTimeoutResult = z.number().int().positive().safeParse(rawSE.agentTimeoutMs);
|
|
300
|
+
if (agentTimeoutResult.success) {
|
|
301
|
+
selfEvolution.agentTimeoutMs = agentTimeoutResult.data;
|
|
302
|
+
}
|
|
303
|
+
else if (rawSE.agentTimeoutMs !== undefined) {
|
|
304
|
+
console.warn(`Invalid 'selfEvolution.agentTimeoutMs' in config (must be a positive integer of milliseconds), ignoring`);
|
|
305
|
+
}
|
|
285
306
|
// Loop v2 — 奖励智能体 REWARD AGENT knobs. Resilient: each sub-field is
|
|
286
307
|
// validated independently; a bad value is dropped with a warning (the
|
|
287
308
|
// judge/aggregator default applies). Omitted ⇒ undefined (single-sample,
|
|
@@ -297,7 +318,8 @@ export function readProjectConfig(projectRoot) {
|
|
|
297
318
|
}
|
|
298
319
|
else if (rawSE.reward !== undefined) {
|
|
299
320
|
console.warn(`Invalid 'selfEvolution.reward' in config (samples/noiseFloor numbers, ` +
|
|
300
|
-
`orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block
|
|
321
|
+
`orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block, ` +
|
|
322
|
+
`divergenceCheck flag|route), ignoring`);
|
|
301
323
|
}
|
|
302
324
|
// Loop v2 — CRITIC AGENT knobs. Resilient: a bad value is dropped with a
|
|
303
325
|
// warning (the critic default 're-do' then applies). Omitted ⇒ undefined
|
|
@@ -40,6 +40,7 @@
|
|
|
40
40
|
*/
|
|
41
41
|
import { spawn as nodeSpawn } from 'node:child_process';
|
|
42
42
|
import type { ObservedTestFailure } from '../trajectory/facts.js';
|
|
43
|
+
import { type AgentHarness } from './host-harness.js';
|
|
43
44
|
/** Error thrown when the worktree could not be created (git AND copy fallback failed). */
|
|
44
45
|
export declare class CriticWorktreeError extends Error {
|
|
45
46
|
constructor(message: string);
|
|
@@ -160,12 +161,26 @@ export interface RunCriticAgentOptions {
|
|
|
160
161
|
baselineMode?: CriticBaselineMode;
|
|
161
162
|
/** Injectable spawn seam for tests; defaults to node's spawn. */
|
|
162
163
|
spawn?: typeof nodeSpawn;
|
|
163
|
-
/** Hard timeout per agent run (ms). Default
|
|
164
|
+
/** Hard timeout per agent run (ms). Default {@link DEFAULT_AGENT_TIMEOUT_MS} (10 min). */
|
|
164
165
|
timeoutMs?: number;
|
|
166
|
+
/**
|
|
167
|
+
* Which host harness to spawn (claude|codex|opencode). When omitted,
|
|
168
|
+
* {@link runHeadlessAgent} resolves it from the ambient env. Threaded so an
|
|
169
|
+
* env-less subagent run can pass the recovered harness explicitly.
|
|
170
|
+
*/
|
|
171
|
+
harness?: AgentHarness;
|
|
165
172
|
/** Override `os.homedir()` for tests (claude transcript discovery). */
|
|
166
173
|
homeDir?: string;
|
|
167
174
|
/** TEST seam: inject the worktree root instead of git/copy, skipping setup teardown of git. */
|
|
168
175
|
now?: Date;
|
|
176
|
+
/**
|
|
177
|
+
* Hard ceiling (ms) for each git worktree subcommand (create/remove/prune).
|
|
178
|
+
* Default {@link GIT_TIMEOUT_MS} (60s). A git hang past this is SIGTERM→SIGKILLed
|
|
179
|
+
* and rejected, which the worktree create/teardown paths absorb gracefully.
|
|
180
|
+
* Exposed mainly as a TEST seam (small value ⇒ a never-closing git fake settles
|
|
181
|
+
* fast instead of wedging the suite).
|
|
182
|
+
*/
|
|
183
|
+
gitTimeoutMs?: number;
|
|
169
184
|
}
|
|
170
185
|
export interface RunCriticAgentResult {
|
|
171
186
|
/** Absolute path of the `baseline-arm/` dir the capture landed in. */
|
|
@@ -47,7 +47,7 @@ import { readProjectConfig } from '../project-config.js';
|
|
|
47
47
|
import { claudeProjectsDir } from '../learn/trajectory-discovery.js';
|
|
48
48
|
import { claudeSourceFactory } from '../trajectory/adapters/claude.js';
|
|
49
49
|
import { toActionSkeleton } from '../trajectory/skeleton.js';
|
|
50
|
-
import { runHeadlessAgent } from './host-harness.js';
|
|
50
|
+
import { runHeadlessAgent, DEFAULT_AGENT_TIMEOUT_MS } from './host-harness.js';
|
|
51
51
|
import { currentPolicyVersion, readPolicyLedger, readPolicySnapshotFiles, } from './policy/index.js';
|
|
52
52
|
import { advanceEpisodeStage, writeArmCapture } from './episode-store.js';
|
|
53
53
|
/** Error thrown when the worktree could not be created (git AND copy fallback failed). */
|
|
@@ -188,6 +188,20 @@ async function resetChangeArtifactsForRedo(changeDir) {
|
|
|
188
188
|
const NODE_MODULES = 'node_modules';
|
|
189
189
|
const CONFIG_DIR = '.synergyspec-selfevolving';
|
|
190
190
|
const SCHEMAS_REL = path.join('synergyspec-selfevolving', 'schemas');
|
|
191
|
+
/**
|
|
192
|
+
* Hard ceiling (ms) for a single git worktree subcommand. Local worktree
|
|
193
|
+
* create/remove/prune ops are fast (sub-second), so a generous 60s ceiling only
|
|
194
|
+
* trips on a genuine HANG — a credential/GPG prompt, an `index.lock` held by a
|
|
195
|
+
* concurrent git, a stalled network FS, or a wedged hook. Without it `runGit`
|
|
196
|
+
* settles ONLY on the child's 'close'/'error', so such a hang would wedge the
|
|
197
|
+
* critic inside the in-flight-lock window (the same orphan/leak class the agent
|
|
198
|
+
* spawn already guards; the agent-spawn timeout cannot help here — the stall is
|
|
199
|
+
* in worktree setup/teardown, OUTSIDE {@link runHeadlessAgent}). On timeout the
|
|
200
|
+
* child is SIGTERM→SIGKILLed and the promise REJECTS, which the callers absorb
|
|
201
|
+
* gracefully: {@link createIsolatedWorktree} falls back to the copy path, and
|
|
202
|
+
* {@link teardownWorktree}'s git calls are best-effort (`.catch(() => {})`).
|
|
203
|
+
*/
|
|
204
|
+
const GIT_TIMEOUT_MS = 60_000;
|
|
191
205
|
/**
|
|
192
206
|
* Run the CRITIC AGENT(基线智能体 baseline agent)'s full baseline arm and
|
|
193
207
|
* persist its capture. ALWAYS tears the worktree down (产物即弃). On success it
|
|
@@ -198,7 +212,8 @@ const SCHEMAS_REL = path.join('synergyspec-selfevolving', 'schemas');
|
|
|
198
212
|
export async function runCriticAgent(opts) {
|
|
199
213
|
const repoRoot = path.resolve(opts.repoRoot);
|
|
200
214
|
const spawnImpl = opts.spawn ?? nodeSpawn;
|
|
201
|
-
const timeoutMs = opts.timeoutMs ??
|
|
215
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
|
|
216
|
+
const gitTimeoutMs = opts.gitTimeoutMs ?? GIT_TIMEOUT_MS;
|
|
202
217
|
const homeDir = opts.homeDir ?? os.homedir();
|
|
203
218
|
const baselineMode = opts.baselineMode ?? 're-do';
|
|
204
219
|
if (!Number.isInteger(opts.baselineVersion) || opts.baselineVersion < 0) {
|
|
@@ -212,7 +227,7 @@ export async function runCriticAgent(opts) {
|
|
|
212
227
|
let worktreeMode = 'git-worktree';
|
|
213
228
|
try {
|
|
214
229
|
// 1) Isolated worktree OUTSIDE the repo (git worktree --detach, else copy).
|
|
215
|
-
worktreeMode = await createIsolatedWorktree(repoRoot, worktreePath, spawnImpl);
|
|
230
|
+
worktreeMode = await createIsolatedWorktree(repoRoot, worktreePath, spawnImpl, gitTimeoutMs);
|
|
216
231
|
// 're-do' fidelity needs the detached-HEAD tree (pre-change code). The copy
|
|
217
232
|
// fallback (non-git repo) brings the LIVE tree — including the change's
|
|
218
233
|
// uncommitted implementation — so it cannot reach the pre-change state and
|
|
@@ -242,6 +257,7 @@ export async function runCriticAgent(opts) {
|
|
|
242
257
|
cwd: worktreePath,
|
|
243
258
|
spawn: spawnImpl,
|
|
244
259
|
timeoutMs,
|
|
260
|
+
...(opts.harness ? { harness: opts.harness } : {}),
|
|
245
261
|
});
|
|
246
262
|
// 5) Build + persist the baseline arm.
|
|
247
263
|
const measuredAt = new Date().toISOString();
|
|
@@ -341,7 +357,7 @@ export async function runCriticAgent(opts) {
|
|
|
341
357
|
}
|
|
342
358
|
finally {
|
|
343
359
|
// 6) 产物即弃: ALWAYS tear the worktree down — even when a step above threw.
|
|
344
|
-
await teardownWorktree(repoRoot, worktreePath, worktreeMode, spawnImpl);
|
|
360
|
+
await teardownWorktree(repoRoot, worktreePath, worktreeMode, spawnImpl, gitTimeoutMs);
|
|
345
361
|
}
|
|
346
362
|
}
|
|
347
363
|
// ---------------------------------------------------------------------------
|
|
@@ -353,12 +369,12 @@ export async function runCriticAgent(opts) {
|
|
|
353
369
|
* (not a repo, git missing, etc.) falls back to a recursive file copy of the
|
|
354
370
|
* repo excluding `node_modules` and `.git`. Returns which mode succeeded.
|
|
355
371
|
*/
|
|
356
|
-
async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl) {
|
|
372
|
+
async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl, gitTimeoutMs = GIT_TIMEOUT_MS) {
|
|
357
373
|
// Best-effort: a stale worktree dir from an interrupted run would make both
|
|
358
374
|
// git-add and copy fail; clear it first (产物即弃 — nothing here is durable).
|
|
359
375
|
await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
|
|
360
376
|
try {
|
|
361
|
-
await runGit(repoRoot, ['worktree', 'add', '--detach', worktreePath, 'HEAD'], spawnImpl);
|
|
377
|
+
await runGit(repoRoot, ['worktree', 'add', '--detach', worktreePath, 'HEAD'], spawnImpl, gitTimeoutMs);
|
|
362
378
|
return 'git-worktree';
|
|
363
379
|
}
|
|
364
380
|
catch {
|
|
@@ -378,27 +394,81 @@ async function createIsolatedWorktree(repoRoot, worktreePath, spawnImpl) {
|
|
|
378
394
|
* For the copy fallback: recursive rmdir. Never throws — teardown failures must
|
|
379
395
|
* not mask a real error from the run.
|
|
380
396
|
*/
|
|
381
|
-
async function teardownWorktree(repoRoot, worktreePath, mode, spawnImpl) {
|
|
397
|
+
async function teardownWorktree(repoRoot, worktreePath, mode, spawnImpl, gitTimeoutMs = GIT_TIMEOUT_MS) {
|
|
382
398
|
if (mode === 'git-worktree') {
|
|
383
|
-
await runGit(repoRoot, ['worktree', 'remove', '--force', worktreePath], spawnImpl).catch(() => { });
|
|
384
|
-
await runGit(repoRoot, ['worktree', 'prune'], spawnImpl).catch(() => { });
|
|
399
|
+
await runGit(repoRoot, ['worktree', 'remove', '--force', worktreePath], spawnImpl, gitTimeoutMs).catch(() => { });
|
|
400
|
+
await runGit(repoRoot, ['worktree', 'prune'], spawnImpl, gitTimeoutMs).catch(() => { });
|
|
385
401
|
}
|
|
386
402
|
// The node_modules entry is a junction/symlink; `rm -rf` removes the link, not
|
|
387
403
|
// the real tree behind it. Belt-and-suspenders rmdir for both modes.
|
|
388
404
|
await fs.rm(worktreePath, { recursive: true, force: true }).catch(() => { });
|
|
389
405
|
}
|
|
390
|
-
/**
|
|
391
|
-
|
|
406
|
+
/**
|
|
407
|
+
* Run a git subcommand in `repoRoot`; rejects on a non-zero exit, a spawn error,
|
|
408
|
+
* OR a hang past `timeoutMs` (SIGTERM, then SIGKILL ~2s later — mirrors
|
|
409
|
+
* {@link runHeadlessAgent}'s escalation). Spawned with a NON-INTERACTIVE env so a
|
|
410
|
+
* credential/GPG prompt fails fast instead of blocking forever:
|
|
411
|
+
* - `GIT_TERMINAL_PROMPT=0` / `GIT_ASKPASS=''` / `GCM_INTERACTIVE='never'` —
|
|
412
|
+
* no auth prompt is ever opened (it errors out instead), and
|
|
413
|
+
* - `GIT_OPTIONAL_LOCKS=0` — git skips the optional index-lock acquisition that
|
|
414
|
+
* a concurrent git could otherwise block on.
|
|
415
|
+
* Both guards keep `runGit`'s existing resolve/reject contract: a hang becomes a
|
|
416
|
+
* rejection the callers already absorb (copy fallback / best-effort teardown),
|
|
417
|
+
* so the critic degrades gracefully rather than wedging.
|
|
418
|
+
*/
|
|
419
|
+
async function runGit(repoRoot, args, spawnImpl, timeoutMs = GIT_TIMEOUT_MS) {
|
|
392
420
|
await new Promise((resolve, reject) => {
|
|
393
|
-
const child = spawnImpl('git', args, {
|
|
421
|
+
const child = spawnImpl('git', args, {
|
|
422
|
+
cwd: repoRoot,
|
|
423
|
+
shell: false,
|
|
424
|
+
env: {
|
|
425
|
+
...process.env,
|
|
426
|
+
GIT_TERMINAL_PROMPT: '0',
|
|
427
|
+
GIT_OPTIONAL_LOCKS: '0',
|
|
428
|
+
GIT_ASKPASS: '',
|
|
429
|
+
GCM_INTERACTIVE: 'never',
|
|
430
|
+
},
|
|
431
|
+
});
|
|
394
432
|
const err = [];
|
|
433
|
+
let settled = false;
|
|
434
|
+
let timer;
|
|
435
|
+
const finish = (fn) => {
|
|
436
|
+
if (settled)
|
|
437
|
+
return;
|
|
438
|
+
settled = true;
|
|
439
|
+
if (timer)
|
|
440
|
+
clearTimeout(timer);
|
|
441
|
+
fn();
|
|
442
|
+
};
|
|
443
|
+
timer = setTimeout(() => {
|
|
444
|
+
try {
|
|
445
|
+
child.kill(); // SIGTERM
|
|
446
|
+
// Escalate to SIGKILL shortly after in case git ignores SIGTERM, so a
|
|
447
|
+
// wedged child cannot orphan. unref so this timer never keeps the event
|
|
448
|
+
// loop alive on its own.
|
|
449
|
+
setTimeout(() => {
|
|
450
|
+
try {
|
|
451
|
+
child.kill('SIGKILL');
|
|
452
|
+
}
|
|
453
|
+
catch {
|
|
454
|
+
// ignore
|
|
455
|
+
}
|
|
456
|
+
}, 2000).unref?.();
|
|
457
|
+
}
|
|
458
|
+
catch {
|
|
459
|
+
// ignore
|
|
460
|
+
}
|
|
461
|
+
finish(() => reject(new Error(`git ${args[0]} timed out after ${timeoutMs}ms: ${Buffer.concat(err).toString('utf8')}`)));
|
|
462
|
+
}, timeoutMs);
|
|
395
463
|
child.stderr?.on('data', (c) => err.push(Buffer.from(c)));
|
|
396
|
-
child.on('error', (e) => reject(e));
|
|
464
|
+
child.on('error', (e) => finish(() => reject(e)));
|
|
397
465
|
child.on('close', (code) => {
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
466
|
+
finish(() => {
|
|
467
|
+
if (code === 0)
|
|
468
|
+
resolve();
|
|
469
|
+
else
|
|
470
|
+
reject(new Error(`git ${args[0]} exited ${code}: ${Buffer.concat(err).toString('utf8')}`));
|
|
471
|
+
});
|
|
402
472
|
});
|
|
403
473
|
});
|
|
404
474
|
}
|
|
@@ -52,6 +52,7 @@ import { type EpisodeStage } from './episode-store.js';
|
|
|
52
52
|
import { type ArmObjective, type CriticBaselineMode } from './critic-agent.js';
|
|
53
53
|
import { type RewardConfig } from './reward-aggregator.js';
|
|
54
54
|
import { type RunEvolvingAgentResult } from './evolving-agent.js';
|
|
55
|
+
import type { AgentHarness } from './host-harness.js';
|
|
55
56
|
/** The 主智能体 MAIN AGENT (policy vN+1) capture the orchestrator records. */
|
|
56
57
|
export interface MainArmCapture {
|
|
57
58
|
/** Raw session transcript text, when provided; persisted as `transcript.jsonl`. */
|
|
@@ -162,6 +163,19 @@ export interface RunEpisodeOptions {
|
|
|
162
163
|
};
|
|
163
164
|
/** Injectable spawn seam — threaded to ALL THREE agents. Defaults to node's spawn. */
|
|
164
165
|
spawn?: typeof nodeSpawn;
|
|
166
|
+
/**
|
|
167
|
+
* Hard timeout per agent run (ms), threaded into ALL THREE agents (critic,
|
|
168
|
+
* reward, evolving). Omitted ⇒ each agent defaults internally
|
|
169
|
+
* (DEFAULT_AGENT_TIMEOUT_MS, 10 min), so a wedged host CLI cannot hang the loop
|
|
170
|
+
* forever and leak the in-flight lock (ses_1330/1331).
|
|
171
|
+
*/
|
|
172
|
+
agentTimeoutMs?: number;
|
|
173
|
+
/**
|
|
174
|
+
* Host harness override, threaded into ALL THREE agents so a subagent with an
|
|
175
|
+
* env-less ambient (resolveHostHarness ⇒ wrong default binary) still spawns the
|
|
176
|
+
* right CLI (ses_1331). Omitted ⇒ each agent resolves the harness itself.
|
|
177
|
+
*/
|
|
178
|
+
harness?: AgentHarness;
|
|
165
179
|
/** Injectable clock for the lock + episode id; defaults to `new Date()`. */
|
|
166
180
|
now?: Date;
|
|
167
181
|
/**
|
|
@@ -205,6 +219,10 @@ export interface ResumeEpisodeOptions {
|
|
|
205
219
|
advantageRollbackThreshold?: number;
|
|
206
220
|
/** Edit budget L (default 40). */
|
|
207
221
|
editBudget?: number;
|
|
222
|
+
/** Hard timeout per agent run (ms); threaded into the resumed evolving agent. */
|
|
223
|
+
agentTimeoutMs?: number;
|
|
224
|
+
/** Host harness override; threaded into the resumed evolving agent. */
|
|
225
|
+
harness?: AgentHarness;
|
|
208
226
|
}
|
|
209
227
|
export interface ResumeEpisodeResult {
|
|
210
228
|
episodeId: string;
|
|
@@ -223,6 +241,16 @@ export interface ResumeEpisodeResult {
|
|
|
223
241
|
* - 'scored' → run the decision (f) then the 演进智能体 (g).
|
|
224
242
|
* - 'rolled-back' / 'kept' → run the 演进智能体 EVOLVING AGENT (g) then close.
|
|
225
243
|
* - 'evolved'/'evolution-refused'/'abstained' → close.
|
|
244
|
+
* - 'errored' → RE-DRIVE from the last GOOD pre-error stage
|
|
245
|
+
* (an episode may have errored on a TRANSIENT
|
|
246
|
+
* cause — a one-off git/analyzer/agent timeout).
|
|
247
|
+
* The pre-error stage is the last `stageHistory`
|
|
248
|
+
* entry that is NOT 'errored'; when it is one of
|
|
249
|
+
* {'scored','rolled-back','kept'} (the
|
|
250
|
+
* resume-entry stages) we advance errored → that
|
|
251
|
+
* stage and fall through to the normal dispatch.
|
|
252
|
+
* Otherwise the pre-error stage is not
|
|
253
|
+
* auto-resumable and the episode is reported as-is.
|
|
226
254
|
* - earlier stages → not auto-resumable here (the arms / reward
|
|
227
255
|
* agent need their own re-entry); reported as-is.
|
|
228
256
|
*
|