synergyspec-selfevolving 2.1.4 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/commands/config.js +4 -0
  2. package/dist/commands/learn.js +80 -24
  3. package/dist/commands/self-evolution-dream.d.ts +54 -0
  4. package/dist/commands/self-evolution-dream.js +265 -0
  5. package/dist/commands/self-evolution-episode.d.ts +5 -0
  6. package/dist/commands/self-evolution-episode.js +160 -107
  7. package/dist/commands/self-evolution.js +127 -4
  8. package/dist/commands/workflow/status.js +38 -7
  9. package/dist/core/archive.js +27 -9
  10. package/dist/core/change-readiness.d.ts +63 -6
  11. package/dist/core/change-readiness.js +912 -23
  12. package/dist/core/completions/command-registry.js +1 -1
  13. package/dist/core/fitness/loss.d.ts +10 -5
  14. package/dist/core/fitness/loss.js +11 -4
  15. package/dist/core/fitness/test-metrics.d.ts +3 -0
  16. package/dist/core/fitness/test-metrics.js +78 -1
  17. package/dist/core/learn/trajectory-discovery.js +5 -0
  18. package/dist/core/learn.js +131 -13
  19. package/dist/core/migration.d.ts +6 -14
  20. package/dist/core/migration.js +63 -21
  21. package/dist/core/profiles.d.ts +1 -1
  22. package/dist/core/profiles.js +1 -0
  23. package/dist/core/runner-evidence.d.ts +53 -0
  24. package/dist/core/runner-evidence.js +613 -0
  25. package/dist/core/self-evolution/candidates.d.ts +1 -1
  26. package/dist/core/self-evolution/candidates.js +1 -2
  27. package/dist/core/self-evolution/canonical-targets.js +1 -0
  28. package/dist/core/self-evolution/dream.d.ts +132 -0
  29. package/dist/core/self-evolution/dream.js +1093 -0
  30. package/dist/core/self-evolution/episode-orchestrator.d.ts +7 -0
  31. package/dist/core/self-evolution/episode-orchestrator.js +162 -12
  32. package/dist/core/self-evolution/episode-store.d.ts +21 -0
  33. package/dist/core/self-evolution/episode-store.js +16 -3
  34. package/dist/core/self-evolution/evolving-agent.js +8 -0
  35. package/dist/core/self-evolution/host-harness.d.ts +46 -12
  36. package/dist/core/self-evolution/host-harness.js +198 -55
  37. package/dist/core/self-evolution/index.d.ts +1 -0
  38. package/dist/core/self-evolution/index.js +1 -0
  39. package/dist/core/self-evolution/policy/policy-store.d.ts +19 -2
  40. package/dist/core/self-evolution/policy/policy-store.js +85 -0
  41. package/dist/core/self-evolution/promote.d.ts +7 -5
  42. package/dist/core/self-evolution/promote.js +111 -19
  43. package/dist/core/self-evolution/reward-agent.js +11 -9
  44. package/dist/core/self-evolution/reward-aggregator.js +2 -2
  45. package/dist/core/shared/skill-generation.d.ts +37 -0
  46. package/dist/core/shared/skill-generation.js +91 -0
  47. package/dist/core/templates/skill-templates.d.ts +1 -0
  48. package/dist/core/templates/skill-templates.js +1 -0
  49. package/dist/core/templates/workflow-manifest.js +2 -0
  50. package/dist/core/templates/workflows/archive-change.js +76 -39
  51. package/dist/core/templates/workflows/ci.js +47 -1
  52. package/dist/core/templates/workflows/dream.d.ts +10 -0
  53. package/dist/core/templates/workflows/dream.js +123 -0
  54. package/dist/core/templates/workflows/gen-tests.js +9 -3
  55. package/dist/core/templates/workflows/learn.js +11 -7
  56. package/dist/core/templates/workflows/run-tests.js +99 -4
  57. package/dist/core/templates/workflows/self-evolving.js +118 -115
  58. package/dist/core/templates/workflows/verify-change.js +130 -22
  59. package/dist/core/trajectory/adapters/codex.js +87 -29
  60. package/dist/core/trajectory/adapters/opencode.js +69 -23
  61. package/dist/core/trajectory/facts.d.ts +1 -1
  62. package/dist/core/trajectory/facts.js +23 -5
  63. package/dist/core/trajectory/registry.d.ts +16 -2
  64. package/dist/core/trajectory/registry.js +104 -29
  65. package/dist/core/trajectory/source.d.ts +27 -4
  66. package/dist/dashboard/react-client.js +4 -4
  67. package/dist/utils/change-utils.d.ts +2 -0
  68. package/dist/utils/change-utils.js +53 -2
  69. package/package.json +99 -99
  70. package/schemas/spec-driven/templates/design.md +6 -0
  71. package/scripts/nl2repo_synergyspec-selfevolving_wrapper.py +170 -0
@@ -34,17 +34,18 @@ const HARNESSES = ['claude', 'codex', 'opencode'];
34
34
  export const DEFAULT_AGENT_TIMEOUT_MS = 600_000;
35
35
  /**
36
36
  * Per-host absolute-timeout defaults. claude/codex keep the 10-min
37
- * {@link DEFAULT_AGENT_TIMEOUT_MS}; opencode is given a lower wall because — in
38
- * the v2.1.2 smoke run an opencode/GPT-5.5 print-mode spawn emitted ZERO
39
- * output and burned the full 10 minutes before the wall fired (the host CLI is
40
- * empirically slow-to-emit / occasionally non-terminating in `run` print mode).
37
+ * {@link DEFAULT_AGENT_TIMEOUT_MS}; opencode gets a longer wall because the
38
+ * v2.1.5 Windows/OpenCode smoke run reached reward/scoring, then killed the
39
+ * evolving agent at the previous 5-min wall while it was still producing a
40
+ * bounded candidate. The idle watchdog remains the earlier trip wire for silent
41
+ * wedges, so the absolute wall should be large enough for a live edit attempt.
41
42
  * The wall is still overridable per-host via
42
43
  * `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` ({@link resolveAgentTimeoutMs}).
43
44
  */
44
45
  const HARNESS_TIMEOUT_DEFAULTS_MS = {
45
46
  claude: DEFAULT_AGENT_TIMEOUT_MS,
46
47
  codex: DEFAULT_AGENT_TIMEOUT_MS,
47
- opencode: 300_000,
48
+ opencode: 900_000,
48
49
  };
49
50
  /**
50
51
  * Default STDOUT/STDERR-idle watchdog window (2 min). If a spawned host CLI
@@ -62,11 +63,13 @@ export const DEFAULT_AGENT_IDLE_TIMEOUT_MS = 120_000;
62
63
  * emit ZERO bytes for well over 2 min while it reasons, so claude/codex get a
63
64
  * 5-min idle leash. opencode keeps the tighter 2-min window — it is the
64
65
  * empirically-wedging host (the v2.1.2 hang emitted no output at all) and a
65
- * faster idle kill is what we want there.
66
+ * faster idle kill is what we want there. opencode's absolute wall is longer
67
+ * than claude/codex because its live edit attempts can be slower even when they
68
+ * are not silent.
66
69
  *
67
70
  * INVARIANT: every harness's idle default is strictly LESS than its absolute
68
71
  * default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}) so the idle watchdog stays the
69
- * earlier trip wire (claude 300<600, codex 300<600, opencode 120<300).
72
+ * earlier trip wire (claude 300<600, codex 300<600, opencode 120<900).
70
73
  * Overridable per host via `SYNERGYSPEC_SELFEVOLVING_AGENT_IDLE_TIMEOUT_MS`
71
74
  * ({@link resolveIdleTimeoutMs}).
72
75
  */
@@ -125,8 +128,8 @@ const AGENT_TIMEOUT_ENV = 'SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS';
125
128
  * (1) `SYNERGYSPEC_SELFEVOLVING_AGENT_TIMEOUT_MS` when it parses to a positive
126
129
  * finite integer — a host-wide tunable that overrides every harness.
127
130
  * (2) the per-harness default ({@link HARNESS_TIMEOUT_DEFAULTS_MS}): the 10-min
128
- * {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, a lower wall for the
129
- * empirically slow-to-emit opencode.
131
+ * {@link DEFAULT_AGENT_TIMEOUT_MS} for claude/codex, and a longer wall for
132
+ * opencode live edit attempts.
130
133
  *
131
134
  * `harness` omitted ⇒ {@link resolveHostHarness} is consulted so the default is
132
135
  * host-appropriate.
@@ -150,25 +153,25 @@ function isAgentHarness(value) {
150
153
  * Precedence:
151
154
  * (a) `SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS` when it equals claude|codex|opencode.
152
155
  * (b) Heuristic on the ambient environment:
153
- * - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
154
156
  * - `OPENCODE_DATA_DIR` or any `OPENCODE_*` var set → 'opencode'.
157
+ * - `CODEX_HOME` or any `CODEX_*` var set → 'codex'.
155
158
  * (c) Default 'claude'.
156
159
  *
157
- * Codex is checked before opencode so that, in the unlikely event both families
158
- * of env vars are present, the explicit override remains the only way to force a
159
- * choice; the heuristic is best-effort.
160
+ * OpenCode is checked before Codex because Codex can be the meta-runner that is
161
+ * invoking an OpenCode smoke test; in that mixed environment OPENCODE_* is the
162
+ * stronger signal for the observed run whose trajectory we must grade.
160
163
  */
161
164
  export function resolveHostHarness() {
162
165
  const override = process.env.SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS;
163
166
  if (isAgentHarness(override))
164
167
  return override;
165
168
  const envKeys = Object.keys(process.env);
166
- const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
167
- if (hasCodex)
168
- return 'codex';
169
169
  const hasOpencode = process.env.OPENCODE_DATA_DIR !== undefined || envKeys.some((k) => k.startsWith('OPENCODE_'));
170
170
  if (hasOpencode)
171
171
  return 'opencode';
172
+ const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
173
+ if (hasCodex)
174
+ return 'codex';
172
175
  return 'claude';
173
176
  }
174
177
  // ---------------------------------------------------------------------------
@@ -194,14 +197,23 @@ function hostHarnessPath(repoRoot) {
194
197
  * spawns, never a precondition for the current run.
195
198
  */
196
199
  export async function persistHostHarness(repoRoot, harness) {
200
+ let tmpFile = null;
197
201
  try {
198
202
  const file = hostHarnessPath(repoRoot);
199
203
  await fs.mkdir(path.dirname(file), { recursive: true });
200
- await fs.writeFile(file, `${JSON.stringify({ harness }, null, 2)}\n`, 'utf8');
204
+ tmpFile = path.join(path.dirname(file), `${HOST_HARNESS_FILE}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2)}.tmp`);
205
+ await fs.writeFile(tmpFile, `${JSON.stringify({ harness }, null, 2)}\n`, 'utf8');
206
+ await fs.rename(tmpFile, file);
207
+ tmpFile = null;
201
208
  }
202
209
  catch {
203
210
  // Swallow: a read-only or transient FS must not break the loop.
204
211
  }
212
+ finally {
213
+ if (tmpFile) {
214
+ await fs.unlink(tmpFile).catch(() => undefined);
215
+ }
216
+ }
205
217
  }
206
218
  /**
207
219
  * Read + parse + validate the persisted-harness sidecar. Returns the
@@ -253,18 +265,16 @@ function binaryResolvable(binary) {
253
265
  if (binary.trim().length === 0)
254
266
  return false;
255
267
  const isWindows = process.platform === 'win32';
256
- // Windows PATHEXT (e.g. `.COM;.EXE;.BAT;.CMD`); also try the bare name (a
257
- // binary may already carry its extension).
258
- const exts = isWindows
259
- ? ['', ...(process.env.PATHEXT ?? '.COM;.EXE;.BAT;.CMD').split(';').filter(Boolean)]
260
- : [''];
268
+ // Windows PATHEXT (e.g. `.COM;.EXE;.BAT;.CMD`). A bare extensionless npm
269
+ // shim is not a CreateProcess target; prefer the PATHEXT-resolved .cmd/.exe.
270
+ const exts = executableExtensions(binary, isWindows, process.env.PATHEXT);
261
271
  const isExecutableFile = (candidate) => {
262
272
  try {
263
273
  const st = statSync(candidate);
264
274
  if (!st.isFile())
265
275
  return false;
266
276
  if (isWindows)
267
- return true; // Windows has no executable bit; existence + ext suffices.
277
+ return isWindowsSpawnCompatibleExecutable(candidate);
268
278
  // POSIX: any execute bit (owner/group/other) marks it runnable.
269
279
  return (st.mode & 0o111) !== 0;
270
280
  }
@@ -314,34 +324,34 @@ function persistedBinary(harness) {
314
324
  * wrong binary,
315
325
  * (4) 'claude'.
316
326
  *
317
- * When (1) or (2) resolve CONFIDENTLY from a real env signal, the result is
318
- * persisted best-effort (fire-and-forget) so a later env-less call recovers it.
327
+ * This resolver is read-only. Command entry points that need to seed an
328
+ * env-less subagent call `seedHostHarnessForRepo`; keeping this function pure
329
+ * matters because learn preview/report generation uses it during trajectory
330
+ * lookup and must not write sidecar files.
331
+ *
319
332
  * The env checks are replicated inline (rather than only calling the sync
320
333
  * {@link resolveHostHarness}) precisely so we can tell "env gave a real signal"
321
334
  * apart from "defaulted to claude with no signal" — the sync resolver collapses
322
335
  * both onto 'claude'.
323
336
  */
324
- export async function resolveHostHarnessForRepo(repoRoot) {
337
+ export async function resolveHostHarnessDetailsForRepo(repoRoot) {
325
338
  // (1) explicit override.
326
339
  const override = process.env.SYNERGYSPEC_SELFEVOLVING_HOST_HARNESS;
327
340
  if (isAgentHarness(override)) {
328
- void persistHostHarness(repoRoot, override);
329
- return override;
341
+ return { harness: override, source: 'override' };
330
342
  }
331
343
  // (2) env heuristic — only a POSITIVE hit counts (mirrors resolveHostHarness'
332
- // CODEX_-before-OPENCODE_ ordering, but distinguishes a real signal from
344
+ // OPENCODE_-before-CODEX_ ordering, but distinguishes a real signal from
333
345
  // the 'claude' fall-through).
334
346
  const envKeys = Object.keys(process.env);
335
- const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
336
- if (hasCodex) {
337
- void persistHostHarness(repoRoot, 'codex');
338
- return 'codex';
339
- }
340
347
  const hasOpencode = process.env.OPENCODE_DATA_DIR !== undefined ||
341
348
  envKeys.some((k) => k.startsWith('OPENCODE_'));
342
349
  if (hasOpencode) {
343
- void persistHostHarness(repoRoot, 'opencode');
344
- return 'opencode';
350
+ return { harness: 'opencode', source: 'env' };
351
+ }
352
+ const hasCodex = process.env.CODEX_HOME !== undefined || envKeys.some((k) => k.startsWith('CODEX_'));
353
+ if (hasCodex) {
354
+ return { harness: 'codex', source: 'env' };
345
355
  }
346
356
  // (3) persisted sidecar (the env-less-subagent recovery path) — honored ONLY
347
357
  // when its binary is resolvable here. The persisted value for codex /
@@ -349,19 +359,36 @@ export async function resolveHostHarnessForRepo(repoRoot) {
349
359
  // sidecar whose binary is absent (best-effort: a probe error trusts the
350
360
  // sidecar — see {@link binaryResolvable}).
351
361
  const persisted = await readPersistedHostHarness(repoRoot);
352
- if (persisted && binaryResolvable(persistedBinary(persisted)))
353
- return persisted;
362
+ if (persisted && binaryResolvable(persistedBinary(persisted))) {
363
+ return { harness: persisted, source: 'persisted' };
364
+ }
354
365
  // (4) default.
355
- return 'claude';
366
+ return { harness: 'claude', source: 'default' };
367
+ }
368
+ export async function resolveHostHarnessForRepo(repoRoot) {
369
+ return (await resolveHostHarnessDetailsForRepo(repoRoot)).harness;
370
+ }
371
+ /**
372
+ * Resolve the host harness and persist only a confident host signal (explicit
373
+ * override or CODEX_/OPENCODE_ env). This is the side-effecting entry point for
374
+ * command handlers that are about to spawn env-less subagents; core report and
375
+ * trajectory readers should use the read-only resolver above.
376
+ */
377
+ export async function seedHostHarnessForRepo(repoRoot) {
378
+ const resolution = await resolveHostHarnessDetailsForRepo(repoRoot);
379
+ if (resolution.source === 'override' || resolution.source === 'env') {
380
+ await persistHostHarness(repoRoot, resolution.harness);
381
+ }
382
+ return resolution;
356
383
  }
357
384
  /**
358
385
  * Build the concrete `{binary, args, useStdin}` invocation for a headless run.
359
386
  *
360
387
  * Full escape hatch: if `SYNERGYSPEC_CODE_AGENT_COMMAND` is set, it is parsed as a
361
- * JSON `string[]` template. The literal tokens `{prompt}` and `{cwd}` are
362
- * substituted in each element; `binary = template[0]`, `args = template.slice(1)`.
363
- * `useStdin` is inferred true iff the template does NOT contain a `{prompt}`
364
- * token anywhere (so the caller streams the prompt to stdin instead).
388
+ * JSON `string[]` template. The literal token `{cwd}` is substituted in each
389
+ * element; `binary = template[0]`, `args = template.slice(1)`. `{prompt}` is
390
+ * deliberately rejected: loop-v2 prompts are too large for argv and must flow
391
+ * through stdin for every harness and override.
365
392
  *
366
393
  * Otherwise the command is derived from the harness (default
367
394
  * {@link resolveHostHarness}). Every harness streams the prompt over stdin
@@ -381,12 +408,14 @@ export function buildHeadlessCommand(prompt, opts) {
381
408
  throw new Error('SYNERGYSPEC_CODE_AGENT_COMMAND must be a non-empty JSON array of strings');
382
409
  }
383
410
  const rawTemplate = parsed;
384
- const useStdin = !rawTemplate.some((e) => e.includes('{prompt}'));
385
- const substituted = rawTemplate.map((e) => e.split('{prompt}').join(prompt).split('{cwd}').join(opts.cwd));
411
+ if (rawTemplate.some((e) => e.includes('{prompt}'))) {
412
+ throw new Error('SYNERGYSPEC_CODE_AGENT_COMMAND must not contain {prompt}; prompts are always streamed over stdin');
413
+ }
414
+ const substituted = rawTemplate.map((e) => e.split('{cwd}').join(opts.cwd));
386
415
  return {
387
416
  binary: substituted[0],
388
417
  args: substituted.slice(1),
389
- useStdin,
418
+ useStdin: true,
390
419
  };
391
420
  }
392
421
  const harness = opts.harness ?? resolveHostHarness();
@@ -418,6 +447,110 @@ export function buildHeadlessCommand(prompt, opts) {
418
447
  }
419
448
  }
420
449
  }
450
+ export function resolveHeadlessCommandForSpawn(command, opts = {}) {
451
+ const platform = opts.platform ?? process.platform;
452
+ if (platform !== 'win32') {
453
+ return { ...command, shell: false };
454
+ }
455
+ const resolved = resolveWindowsExecutable(command.binary, {
456
+ env: opts.env ?? process.env,
457
+ isExecutableFile: opts.isExecutableFile ??
458
+ ((candidate) => {
459
+ try {
460
+ return statSync(candidate).isFile();
461
+ }
462
+ catch {
463
+ return false;
464
+ }
465
+ }),
466
+ });
467
+ const binary = resolved ?? command.binary;
468
+ if (isUnsupportedWindowsExecutable(binary)) {
469
+ throw new Error(`Windows headless agent binary '${binary}' has unsupported extension '${path.win32
470
+ .extname(binary)
471
+ .toLowerCase()}'; use a .cmd, .bat, .exe, or .com shim, or invoke the interpreter explicitly via SYNERGYSPEC_CODE_AGENT_COMMAND.`);
472
+ }
473
+ if (isWindowsShellScript(binary)) {
474
+ const wrapper = wrapWindowsShellScript(binary, command.args, opts.env ?? process.env);
475
+ return {
476
+ ...command,
477
+ binary: wrapper.binary,
478
+ args: wrapper.args,
479
+ shell: false,
480
+ };
481
+ }
482
+ return {
483
+ ...command,
484
+ binary,
485
+ shell: false,
486
+ };
487
+ }
488
+ function executableExtensions(binary, isWindows, pathext) {
489
+ if (!isWindows)
490
+ return [''];
491
+ if (path.win32.extname(binary))
492
+ return [''];
493
+ return (pathext ?? '.COM;.EXE;.BAT;.CMD')
494
+ .split(';')
495
+ .map((ext) => ext.trim())
496
+ .filter(Boolean);
497
+ }
498
+ function resolveWindowsExecutable(binary, opts) {
499
+ if (!binary || binary.trim().length === 0)
500
+ return null;
501
+ const exts = executableExtensions(binary, true, opts.env.PATHEXT);
502
+ const candidates = [];
503
+ const hasPathSeparator = binary.includes('/') || binary.includes('\\');
504
+ if (hasPathSeparator) {
505
+ candidates.push(...exts.map((ext) => binary + ext));
506
+ }
507
+ else {
508
+ const entries = (opts.env.PATH ?? '').split(';').filter(Boolean);
509
+ for (const dir of entries) {
510
+ for (const ext of exts)
511
+ candidates.push(path.win32.join(dir, binary + ext));
512
+ }
513
+ }
514
+ let firstUnsupported = null;
515
+ for (const candidate of candidates) {
516
+ if (!opts.isExecutableFile(candidate, true))
517
+ continue;
518
+ if (isWindowsSpawnCompatibleExecutable(candidate))
519
+ return candidate;
520
+ firstUnsupported ??= candidate;
521
+ }
522
+ if (firstUnsupported) {
523
+ throw new Error(`Windows headless agent binary resolved to '${firstUnsupported}', but that extension cannot be spawned with shell:false; use a .cmd, .bat, .exe, or .com shim, or invoke the interpreter explicitly via SYNERGYSPEC_CODE_AGENT_COMMAND.`);
524
+ }
525
+ return null;
526
+ }
527
+ function isWindowsShellScript(binary) {
528
+ const ext = path.win32.extname(binary).toLowerCase();
529
+ return ext === '.cmd' || ext === '.bat';
530
+ }
531
+ function isWindowsSpawnCompatibleExecutable(binary) {
532
+ const ext = path.win32.extname(binary).toLowerCase();
533
+ return ext === '' || ext === '.com' || ext === '.exe' || ext === '.bat' || ext === '.cmd';
534
+ }
535
+ function isUnsupportedWindowsExecutable(binary) {
536
+ const ext = path.win32.extname(binary).toLowerCase();
537
+ return ext.length > 0 && !isWindowsSpawnCompatibleExecutable(binary);
538
+ }
539
+ function wrapWindowsShellScript(binary, args, env) {
540
+ const comspec = firstNonBlankEnv(env, 'ComSpec', 'COMSPEC') ?? 'cmd.exe';
541
+ return {
542
+ binary: comspec,
543
+ args: ['/d', '/s', '/c', 'call', binary, ...args],
544
+ };
545
+ }
546
+ function firstNonBlankEnv(env, ...keys) {
547
+ for (const key of keys) {
548
+ const value = env[key];
549
+ if (typeof value === 'string' && value.trim().length > 0)
550
+ return value;
551
+ }
552
+ return undefined;
553
+ }
421
554
  /**
422
555
  * The claude-default binary fallback: `SYNERGYSPEC_SELFEVOLVING_CLAUDE_BIN` when
423
556
  * non-empty, else `'claude'`. Kept here so {@link buildHeadlessCommand} is the
@@ -453,16 +586,27 @@ function claudeDefaultBinary() {
453
586
  */
454
587
  export async function runHeadlessAgent(prompt, opts) {
455
588
  const spawnImpl = opts.spawn ?? nodeSpawn;
456
- const command = buildHeadlessCommand(prompt, {
457
- cwd: opts.cwd,
458
- harness: opts.harness,
459
- binaryOverride: opts.binaryOverride,
460
- });
589
+ let spawnCommand;
590
+ try {
591
+ const command = buildHeadlessCommand(prompt, {
592
+ cwd: opts.cwd,
593
+ harness: opts.harness,
594
+ binaryOverride: opts.binaryOverride,
595
+ });
596
+ spawnCommand = resolveHeadlessCommandForSpawn(command);
597
+ }
598
+ catch (e) {
599
+ return {
600
+ exitCode: -1,
601
+ stdout: '',
602
+ stderr: e instanceof Error ? e.message : String(e),
603
+ };
604
+ }
461
605
  return await new Promise((resolve) => {
462
606
  let child;
463
607
  try {
464
- child = spawnImpl(command.binary, command.args, {
465
- shell: false,
608
+ child = spawnImpl(spawnCommand.binary, spawnCommand.args, {
609
+ shell: spawnCommand.shell,
466
610
  cwd: opts.cwd,
467
611
  });
468
612
  }
@@ -520,7 +664,7 @@ export async function runHeadlessAgent(prompt, opts) {
520
664
  // ignore
521
665
  }
522
666
  };
523
- if (command.useStdin) {
667
+ if (spawnCommand.useStdin) {
524
668
  // Swallow stdin stream errors (e.g. EPIPE when the child exits before it
525
669
  // has read the whole — possibly 100KB+ — prompt). The real failure is
526
670
  // reported via the child's own 'error'/'close' handlers below; an
@@ -581,7 +725,6 @@ export async function runHeadlessAgent(prompt, opts) {
581
725
  if (settled)
582
726
  return;
583
727
  const elapsedS = Math.round((Date.now() - startedAt) / 1000);
584
- // eslint-disable-next-line no-console
585
728
  console.error(`[self-evolution] headless agent running: ${elapsedS}s elapsed, ${bytesReceived} bytes received`);
586
729
  }, HEARTBEAT_INTERVAL_MS);
587
730
  heartbeatTimer.unref?.();
@@ -22,6 +22,7 @@ export * from './eval-report.js';
22
22
  export * from './edits-contract.js';
23
23
  export * from './proposer-slice.js';
24
24
  export * from './promotion.js';
25
+ export * from './dream.js';
25
26
  export * from './policy/index.js';
26
27
  export * from './episode-store.js';
27
28
  export * from './line-diff.js';
@@ -22,6 +22,7 @@ export * from './eval-report.js';
22
22
  export * from './edits-contract.js';
23
23
  export * from './proposer-slice.js';
24
24
  export * from './promotion.js';
25
+ export * from './dream.js';
25
26
  // ── Loop v2 (self-evolution as in-context RL) ────────────────────────────────
26
27
  // Policy (ledger + reject-buffer + fs-safe), the disk episode store, the three
27
28
  // agents (critic / reward / evolving), the line-diff + scope gate, and the
@@ -7,7 +7,7 @@ export declare const POLICY_SNAPSHOT_DELTA_FILE = "delta.patch";
7
7
  /** An in-flight episode older than this is stale and its slot reclaimable. */
8
8
  export declare const IN_FLIGHT_STALE_MS: number;
9
9
  /** What a ledger entry records happened to the lineage. */
10
- export type PolicyLedgerAction = 'init' | 'evolve' | 'rollback' | 'refused';
10
+ export type PolicyLedgerAction = 'init' | 'evolve' | 'promote' | 'rollback' | 'refused';
11
11
  /** One file the policy version covers, content-addressed for verification. */
12
12
  export interface PolicyLedgerFileEntry {
13
13
  /** Repo-relative POSIX path of the live policy file. */
@@ -25,7 +25,7 @@ export interface PolicyPrediction {
25
25
  direction: 'down' | 'up';
26
26
  checkBy: string;
27
27
  }
28
- /** Size of an 'evolve'/'rollback' step, derived from its `delta.patch`. */
28
+ /** Size of an 'evolve'/'promote'/'rollback' step, derived from its `delta.patch`. */
29
29
  export interface PolicyDeltaStats {
30
30
  filesChanged: number;
31
31
  linesAdded: number;
@@ -156,6 +156,23 @@ export interface AdvancePolicyVersionOptions {
156
156
  * and `deltaStats`) is appended only after every live write succeeded.
157
157
  */
158
158
  export declare function advancePolicyVersion(opts: AdvancePolicyVersionOptions): Promise<PolicyLedgerEntry>;
159
+ export interface AdoptPromotedPolicyVersionOptions {
160
+ repoRoot: string;
161
+ targetId: string;
162
+ candidateId: string;
163
+ reason?: string;
164
+ }
165
+ /**
166
+ * Adopt an already-applied candidate/human promotion into an existing policy
167
+ * lineage. This is deliberately separate from {@link advancePolicyVersion}: a
168
+ * human promotion is not an optimizer step and carries no falsifiable prediction,
169
+ * but it still must bump the snapshot/ledger head when it changes a versioned
170
+ * policy target.
171
+ *
172
+ * Returns `null` when the target has no initialized lineage or when the live
173
+ * files are already byte-identical to the head snapshot.
174
+ */
175
+ export declare function adoptPromotedPolicyVersion(opts: AdoptPromotedPolicyVersionOptions): Promise<PolicyLedgerEntry | null>;
159
176
  export interface RollbackPolicyVersionOptions {
160
177
  repoRoot: string;
161
178
  targetId: string;
@@ -31,6 +31,8 @@
31
31
  * CRITIC AGENT(基线智能体 baseline agent)'s skip condition
32
32
  * reads: the policy did not change, so there is no new arm
33
33
  * to compare against the baseline.
34
+ * - 'promote' → vN+1: an approved human/candidate promotion that already
35
+ * wrote the live file is adopted into the same lineage.
34
36
  * - 'rollback' → vN+1 whose files are byte-identical to snapshot
35
37
  * v<toVersion>. Rolling FORWARD to old content (git-revert
36
38
  * style) keeps the 单一血统 single lineage monotonic: the
@@ -110,6 +112,7 @@ function isValidLedgerEntry(value) {
110
112
  return false;
111
113
  if (e.action !== 'init' &&
112
114
  e.action !== 'evolve' &&
115
+ e.action !== 'promote' &&
113
116
  e.action !== 'rollback' &&
114
117
  e.action !== 'refused') {
115
118
  return false;
@@ -534,6 +537,88 @@ export async function advancePolicyVersion(opts) {
534
537
  throw err;
535
538
  }
536
539
  }
540
+ /**
541
+ * Adopt an already-applied candidate/human promotion into an existing policy
542
+ * lineage. This is deliberately separate from {@link advancePolicyVersion}: a
543
+ * human promotion is not an optimizer step and carries no falsifiable prediction,
544
+ * but it still must bump the snapshot/ledger head when it changes a versioned
545
+ * policy target.
546
+ *
547
+ * Returns `null` when the target has no initialized lineage or when the live
548
+ * files are already byte-identical to the head snapshot.
549
+ */
550
+ export async function adoptPromotedPolicyVersion(opts) {
551
+ assertNonEmptyString(opts.targetId, 'targetId');
552
+ assertNonEmptyString(opts.candidateId, 'candidateId');
553
+ const repoRoot = path.resolve(opts.repoRoot);
554
+ const layout = resolvePolicyLayout(repoRoot);
555
+ const entries = await readPolicyLedger(repoRoot, opts.targetId);
556
+ if (entries.length === 0)
557
+ return null;
558
+ const headVersion = entries[entries.length - 1].version;
559
+ const headFiles = await readPolicySnapshotFiles(repoRoot, opts.targetId, headVersion);
560
+ const headByPath = new Map(headFiles.map((f) => [f.relPath, f.content]));
561
+ const liveFiles = [];
562
+ for (const f of headFiles) {
563
+ const abs = path.resolve(repoRoot, ...f.relPath.split('/'));
564
+ assertWithinRepo(repoRoot, abs);
565
+ let live;
566
+ try {
567
+ live = await fs.readFile(abs, 'utf8');
568
+ }
569
+ catch (err) {
570
+ if (err.code === 'ENOENT') {
571
+ throw new Error(`Refusing to adopt promotion for ${opts.targetId}: live file ${f.relPath} is missing.`);
572
+ }
573
+ throw err;
574
+ }
575
+ liveFiles.push({ relPath: f.relPath, content: live });
576
+ }
577
+ const changed = liveFiles.filter((f) => f.content !== headByPath.get(f.relPath));
578
+ if (changed.length === 0)
579
+ return null;
580
+ const deltaPatch = changed
581
+ .map((f) => renderFileDelta(f.relPath, headByPath.get(f.relPath), f.content))
582
+ .join('\n');
583
+ const deltaStats = countDeltaStats(deltaPatch, changed.length);
584
+ const newVersion = headVersion + 1;
585
+ const at = new Date().toISOString();
586
+ const manifestFiles = await writeSnapshot(layout, {
587
+ targetId: opts.targetId,
588
+ version: newVersion,
589
+ at,
590
+ files: liveFiles,
591
+ deltaPatch,
592
+ });
593
+ const entry = {
594
+ schemaVersion: 1,
595
+ version: newVersion,
596
+ targetId: opts.targetId,
597
+ at,
598
+ action: 'promote',
599
+ episodeId: null,
600
+ files: manifestFiles,
601
+ reason: opts.reason ??
602
+ `candidate promotion ${opts.candidateId} adopted into policy lineage`,
603
+ deltaStats,
604
+ };
605
+ try {
606
+ await appendLedgerEntry(layout, entry);
607
+ }
608
+ catch (err) {
609
+ try {
610
+ await fs.rm(policySnapshotDir(layout, opts.targetId, newVersion), {
611
+ recursive: true,
612
+ force: true,
613
+ });
614
+ }
615
+ catch {
616
+ // ignore cleanup failure; surface the durable append error
617
+ }
618
+ throw err;
619
+ }
620
+ return entry;
621
+ }
537
622
  /**
538
623
  * Restore the live policy files byte-for-byte from snapshot v<toVersion> and
539
624
  * append a 'rollback' ledger entry. The restore is recorded as a NEW head
@@ -1,3 +1,4 @@
1
+ import { type PolicyLedgerEntry } from './policy/policy-store.js';
1
2
  import { type TargetEvolutionPolicy } from './target-evolution.js';
2
3
  import { updateCandidateStatus, type CandidateRepoLayout, type CanonicalCandidateStatus } from './candidates.js';
3
4
  export interface AppliedFile {
@@ -15,6 +16,8 @@ export interface PromotionApplyResult {
15
16
  /** Absolute path of the pre-image snapshot dir (for rollback). */
16
17
  rollbackDir: string;
17
18
  targetIds: string[];
19
+ /** Policy ledger rows appended for file-backed policy targets, if any. */
20
+ policyLedgerEntries: PolicyLedgerEntry[];
18
21
  }
19
22
  export interface RollbackResult {
20
23
  candidateId: string;
@@ -45,9 +48,9 @@ export interface ApplyCandidateOptions {
45
48
  * + status promoted), or files byte-unchanged". A status-write failure leaves
46
49
  * the candidate at its prior status (re-runnable), never half-applied.
47
50
  *
48
- * Preconditions: the candidate must carry structured `edits.json` (i.e. it was
49
- * proposed with `--agent`) and be in status `ready-for-eval` or `eval-passed`
50
- * (i.e. the static gate passed).
51
+ * Preconditions: the candidate must carry structured `edits.json` from a
52
+ * host-authored or externally packaged proposal and be in status
53
+ * `ready-for-eval` or `eval-passed` (i.e. the static gate passed).
51
54
  */
52
55
  export declare function applyCandidatePromotion(layout: CandidateRepoLayout, candidateId: string, opts: ApplyCandidateOptions): Promise<PromotionApplyResult>;
53
56
  /**
@@ -174,8 +177,7 @@ export interface EvidenceCheck {
174
177
  * 1. `fitnessSample.loss !== null` (a functional loss was actually computed), and
175
178
  * 2. `fitnessSample.trajectoryFacts.verified === true` (a real test-runner was
176
179
  * observed in the agent trajectory, not just a hand-authored test-report), and
177
- * 3. the observed run was GREEN: `observedStatus === 'success'` OR
178
- * `observedPassRate >= 1`, and
180
+ * 3. the observed run was GREEN: `observedStatus === 'success'`, and
179
181
  * 4. NO learn observation with code `trajectory-report-conflict` (report claims
180
182
  * green but the observed run failed) or `trajectory-unverified-pass` (report
181
183
  * claims green but no run was observed).