synergyspec-selfevolving 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +31 -18
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +151 -11
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +423 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +114 -866
  8. package/dist/core/archive.d.ts +0 -1
  9. package/dist/core/archive.js +0 -58
  10. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  11. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  12. package/dist/core/fitness/loss.d.ts +5 -5
  13. package/dist/core/fitness/loss.js +4 -4
  14. package/dist/core/project-config.d.ts +2 -0
  15. package/dist/core/project-config.js +28 -0
  16. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  17. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  18. package/dist/core/self-evolution/candidates.d.ts +0 -9
  19. package/dist/core/self-evolution/critic-agent.d.ts +150 -0
  20. package/dist/core/self-evolution/critic-agent.js +487 -0
  21. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  22. package/dist/core/self-evolution/edits-contract.js +89 -0
  23. package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
  24. package/dist/core/self-evolution/episode-orchestrator.js +534 -0
  25. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  26. package/dist/core/self-evolution/episode-store.js +573 -0
  27. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  28. package/dist/core/self-evolution/evolution-switches.js +5 -10
  29. package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
  30. package/dist/core/self-evolution/evolving-agent.js +449 -0
  31. package/dist/core/self-evolution/host-harness.d.ts +1 -2
  32. package/dist/core/self-evolution/host-harness.js +1 -2
  33. package/dist/core/self-evolution/index.d.ts +9 -6
  34. package/dist/core/self-evolution/index.js +18 -6
  35. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  36. package/dist/core/self-evolution/line-diff.js +130 -0
  37. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  38. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  39. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  40. package/dist/core/self-evolution/policy/index.js +13 -0
  41. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  42. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  43. package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
  44. package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
  45. package/dist/core/self-evolution/promote.d.ts +1 -1
  46. package/dist/core/self-evolution/promote.js +6 -33
  47. package/dist/core/self-evolution/promotion.js +1 -2
  48. package/dist/core/self-evolution/reward-agent.d.ts +234 -0
  49. package/dist/core/self-evolution/reward-agent.js +564 -0
  50. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  51. package/dist/core/self-evolution/scope-gate.js +107 -0
  52. package/dist/core/self-evolution/success-channel.js +2 -2
  53. package/dist/core/self-evolution/tool-evolution.js +2 -13
  54. package/dist/core/self-evolution/verdict.d.ts +8 -5
  55. package/dist/core/self-evolution/verdict.js +4 -7
  56. package/dist/core/templates/workflows/learn.d.ts +3 -2
  57. package/dist/core/templates/workflows/learn.js +18 -16
  58. package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
  59. package/dist/core/templates/workflows/self-evolving.js +62 -172
  60. package/dist/dashboard/data.d.ts +25 -51
  61. package/dist/dashboard/data.js +68 -180
  62. package/dist/dashboard/react-client.js +458 -503
  63. package/dist/dashboard/react-styles.js +3 -3
  64. package/dist/dashboard/server.js +23 -17
  65. package/dist/ui/ascii-patterns.d.ts +7 -15
  66. package/dist/ui/ascii-patterns.js +123 -54
  67. package/dist/ui/welcome-screen.d.ts +0 -14
  68. package/dist/ui/welcome-screen.js +16 -35
  69. package/package.json +1 -1
@@ -1,18 +1,19 @@
1
1
  import * as fs from 'node:fs';
2
2
  import * as path from 'node:path';
3
- import * as crypto from 'node:crypto';
4
3
  import fastGlob from 'fast-glob';
5
- import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, generateEvolutionHints, persistLearnHints, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, groupCandidatesByTarget, listCandidates, rankCandidatesForTarget, readPromotedBaselineLoss, buildOptimizationTrajectory, renderTrajectoryBlock, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, VARIANT_ANGLES, makeReplayRunChange, scoreCandidatesByReplay, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, runCanonicalProposerAgentWithRepair, buildProposerSlice, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, readTemplateVariantManifest, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, selectTemplateVariant, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
4
+ import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
6
5
  import { generateLearnReport } from '../core/learn.js';
7
- import { listExemplarFiles, mineSuccessSignals, readProtections, renderDoNotPruneBlock, } from '../core/self-evolution/success-channel.js';
8
6
  import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
9
- import { resolveMetricSource } from '../core/fitness/index.js';
10
- import { validateChangeExists, validateSchemaExists } from './workflow/shared.js';
7
+ import { validateChangeExists } from './workflow/shared.js';
11
8
  import { readProjectConfig } from '../core/project-config.js';
9
+ import { attachSelfEvolutionEpisodeCommands } from './self-evolution-episode.js';
12
10
  export function registerSelfEvolutionCommand(program) {
13
11
  const cmd = program
14
12
  .command('self-evolution')
15
- .description('Inspect SynergySpec-SelfEvolving Evaluation Lab self-evolution signals for templates, tasks, verification, memory, and tool changes');
13
+ .description('Inspect SynergySpec-SelfEvolving self-evolution signals for templates, tasks, verification, memory, and tool changes');
14
+ // Loop-v2 (self-evolution as in-context RL): the `episode` + `policy`
15
+ // subcommands.
16
+ attachSelfEvolutionEpisodeCommands(cmd);
16
17
  cmd
17
18
  .command('parts')
18
19
  .description('List evolvable parts that can be enabled or disabled per run')
@@ -38,41 +39,6 @@ export function registerSelfEvolutionCommand(program) {
38
39
  console.log(`- ${id}: ${mark} - ${EVOLVABLE_PART_DESCRIPTIONS[id]}`);
39
40
  }
40
41
  });
41
- cmd
42
- .command('templates')
43
- .description('Show selected evolving template variant for an artifact')
44
- .option('--schema <name>', 'schema name', 'spec-driven')
45
- .requiredOption('--artifact <id>', 'artifact id such as proposal, specs, design, or tasks')
46
- .option('--evolve <parts>', 'comma-separated evolvable parts allowed for this run, or all/none')
47
- .option('--evolve-deny <parts>', 'comma-separated evolvable parts disabled for this run')
48
- .option('--json', 'output JSON')
49
- .action(async (options) => {
50
- const projectRoot = process.cwd();
51
- const schemaName = validateSchemaExists(options.schema ?? 'spec-driven', projectRoot);
52
- const switches = parseEvolutionSwitchOptions(options);
53
- if (!isEvolutionPartEnabled(switches, 'template-variants')) {
54
- printDisabled('template-variants', switches, options.json);
55
- return;
56
- }
57
- const selection = selectTemplateVariant({
58
- projectRoot,
59
- schemaName,
60
- artifactId: options.artifact,
61
- });
62
- const manifest = readTemplateVariantManifest(projectRoot);
63
- if (options.json) {
64
- console.log(JSON.stringify({ selection, manifest, switches }, null, 2));
65
- return;
66
- }
67
- if (!selection) {
68
- console.log(`No active project template variant for ${schemaName}/${options.artifact}.`);
69
- return;
70
- }
71
- console.log(`${selection.schema}/${selection.artifact}: ${selection.id}`);
72
- console.log(`Score: ${selection.score.toFixed(2)}`);
73
- console.log(`Path: ${selection.templatePath}`);
74
- console.log(`Reason: ${selection.reason}`);
75
- });
76
42
  cmd
77
43
  .command('memory [query]')
78
44
  .description('Retrieve similar archived change experience')
@@ -220,19 +186,6 @@ export function registerSelfEvolutionCommand(program) {
220
186
  }
221
187
  printCanonicalTargetTable(targets);
222
188
  });
223
- cmd
224
- .command('trajectory <targetId>')
225
- .description('Print the OPTIMIZATION TRAJECTORY block for a canonical target — the scored history of prior candidates (loss, verdict, approach) + the promoted baseline to beat — exactly as the headless proposer sees it. Read-only; the host learn skill calls this before authoring an --from-edits improvement.')
226
- .option('--max-entries <n>', 'cap the number of prior candidates shown (default 6)')
227
- .option('--json', 'output { targetId, baselineLoss, baselineCandidateId, entries } JSON')
228
- .action(async (targetId, options) => {
229
- const result = await runTrajectoryCommand({
230
- targetId,
231
- maxEntries: options.maxEntries !== undefined ? Number(options.maxEntries) : undefined,
232
- json: options.json,
233
- }, { repoRoot: process.cwd() });
234
- process.exitCode = result.exitCode;
235
- });
236
189
  cmd
237
190
  .command('hints <jsonFile...>')
238
191
  .description('Aggregate LearnEvolutionHint JSON files into scored candidate-request groups (no canonical files are written)')
@@ -281,30 +234,6 @@ export function registerSelfEvolutionCommand(program) {
281
234
  const result = await runPromotionReportCommand({ candidateId, write: options.write, json: options.json }, { repoRoot: process.cwd() });
282
235
  process.exitCode = result.exitCode;
283
236
  });
284
- cmd
285
- .command('evolve')
286
- .description('Run the GA outer loop: group candidates by target, score by accumulated fitness (or --replay a corpus), rank/select the best, and generate a human-gated promotion report. Never auto-promotes.')
287
- .option('--target <targetId>', 'restrict the loop to a single canonical target id')
288
- .option('--replay', 'score candidates by replaying a change corpus through baseline + candidate (requires --change); default uses already-accumulated fitness records')
289
- .option('--change <ids...>', 'change ids forming the replay corpus (only used with --replay)')
290
- .option('--write', 'write a promotion-report.md for each selected best candidate (still human-gated; never auto-promotes)')
291
- .option('--mark-outcompeted', 'mark proven sibling variants that lost the ranking to the best (same variant cohort, higher loss) with verdict `outcompeted` so future proposals see them as negative examples (never changes status)')
292
- .option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve (supports all/none)')
293
- .option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
294
- .option('--json', 'output the full RunEvolveOuterLoopResult JSON')
295
- .action(async (options) => {
296
- const result = await runEvolveOuterLoopCommand({
297
- target: options.target,
298
- replay: options.replay,
299
- changeIds: options.change,
300
- write: options.write,
301
- markOutcompeted: options.markOutcompeted,
302
- evolveTarget: options.evolveTarget,
303
- freezeTarget: options.freezeTarget,
304
- json: options.json,
305
- }, { repoRoot: process.cwd() });
306
- process.exitCode = result.exitCode;
307
- });
308
237
  cmd
309
238
  .command('propose-canonical')
310
239
  .description('Convert aggregated learn-stage hints into candidate package directories (proposal only; no canonical file is modified)')
@@ -315,9 +244,7 @@ export function registerSelfEvolutionCommand(program) {
315
244
  .option('--source <source>', `attribution source recorded on each candidate (one of: ${CANONICAL_CANDIDATE_SOURCES.join(', ')}); defaults to 'learn-threshold'`)
316
245
  .option('--dry-run', 'build candidate package contents in memory but do not write to disk')
317
246
  .option('--force', 'no effect in v1; the writer never overwrites existing candidates')
318
- .option('--from-edits <file>', "validate + package candidate edits the HOST agent already wrote (JSON { targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin). Preferred when running inside a host code agent; requires exactly one surviving group")
319
- .option('--agent', 'HEADLESS FALLBACK ONLY (cron/CI with no host agent): spawn the proposer agent to draft the diff. Prefer --from-edits when running inside a host code agent')
320
- .option('--variants <n>', 'draft N competing variant candidates (1-5; default 1) for the surviving group, each on a distinct improvement angle, so the GA outer loop can select the best. Requires --agent.')
247
+ .option('--from-edits <file>', "validate + package candidate edits the HOST agent already wrote (JSON { targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin). The host code agent authors the diff; requires exactly one surviving group")
321
248
  .option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve this run (supports all/none)')
322
249
  .option('--freeze-target <ids>', 'comma-separated canonical target ids frozen this run (supports all/none)')
323
250
  .option('--json', 'output JSON summary')
@@ -338,11 +265,6 @@ export function registerSelfEvolutionCommand(program) {
338
265
  // here we only resolve the file/stdin and shape-validate the payload.
339
266
  let editsInput;
340
267
  if (options.fromEdits) {
341
- if (options.agent) {
342
- process.stderr.write('error: --from-edits and --agent are mutually exclusive\n');
343
- process.exitCode = 2;
344
- return;
345
- }
346
268
  let raw;
347
269
  try {
348
270
  raw =
@@ -371,8 +293,6 @@ export function registerSelfEvolutionCommand(program) {
371
293
  source: options.source,
372
294
  dryRun: options.dryRun,
373
295
  force: options.force,
374
- agent: options.agent,
375
- variants: options.variants !== undefined ? Number(options.variants) : undefined,
376
296
  editsInput,
377
297
  evolveTarget: options.evolveTarget,
378
298
  freezeTarget: options.freezeTarget,
@@ -408,30 +328,6 @@ export function registerSelfEvolutionCommand(program) {
408
328
  const result = await runRejectCommand({ candidateId, reason: options.reason, json: options.json }, { repoRoot: process.cwd() });
409
329
  process.exitCode = result.exitCode;
410
330
  });
411
- cmd
412
- .command('auto-evolve')
413
- .description('ONE-BUTTON self-evolution: run learn -> hints -> propose(--agent) -> gate -> promote for a change, auto-applying the gate-passing winner onto the canonical template (no per-change approval). Honors the per-target switch + oracle freeze; snapshots every change for rollback.')
414
- .requiredOption('--change <names...>', 'one or more completed changes to learn from and evolve (a SINGLE one is enough)')
415
- .option('--no-auto', 'run the full pipeline but stop BEFORE applying (gate + select only)')
416
- .option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
417
- .option('--min-occurrences <n>', 'min occurrences a signal must reach to evolve (default 1; raise + pass several --change to require recurrence across them)')
418
- .option('--threshold-key <key>', 'force-propose only this aggregated signal (bypasses the recurrence trigger)')
419
- .option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve (supports all/none)')
420
- .option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
421
- .option('--json', 'output the full AutoEvolveReport JSON')
422
- .action(async (options) => {
423
- const result = await runAutoEvolve({
424
- changeNames: options.change,
425
- auto: options.auto,
426
- requireProven: options.requireProven,
427
- minOccurrences: options.minOccurrences !== undefined ? Number(options.minOccurrences) : undefined,
428
- thresholdKey: options.thresholdKey,
429
- evolveTarget: options.evolveTarget,
430
- freezeTarget: options.freezeTarget,
431
- json: options.json,
432
- }, { repoRoot: process.cwd() });
433
- process.exitCode = result.exitCode;
434
- });
435
331
  cmd
436
332
  .command('evolve-from-edits')
437
333
  .description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns the proposer; --agent is refused.')
@@ -602,37 +498,13 @@ export function parseHostEditsInput(raw) {
602
498
  *
603
499
  * SAFETY:
604
500
  * - Never writes outside `<repoRoot>/.synergyspec-selfevolving/self-evolution/candidates/`.
605
- * - Generation is EITHER the host-agent `--from-edits` path (validate + package)
606
- * OR the headless `--agent` proposer fallback; without either, diff.patch is
607
- * the placeholder for a human to complete.
501
+ * - Generation is the host-agent `--from-edits` path (validate + package); when
502
+ * absent, diff.patch is the placeholder for a human to complete.
608
503
  */
609
504
  export async function runProposeCanonical(args, opts) {
610
505
  const stdout = opts.stdout ?? ((line) => console.log(line));
611
506
  const stderr = opts.stderr ?? ((line) => console.error(line));
612
507
  const now = opts.now ?? (() => new Date());
613
- // Host-authored edits and the headless proposer are mutually exclusive. The
614
- // CLI action also rejects the combo, but mirror it here so both entry points
615
- // share one contract (a programmatic caller can't silently get host-precedence).
616
- if (args.editsInput && args.agent) {
617
- stderr('--from-edits and --agent are mutually exclusive');
618
- return {
619
- exitCode: 2,
620
- proposed: [],
621
- skipped: [],
622
- errors: ['--from-edits and --agent are mutually exclusive'],
623
- };
624
- }
625
- // Population-based generation: clamp variants to 1-5. N>1 requires the headless
626
- // proposer (divergence is prompt-side, one prompt per variant); it is invalid
627
- // with --from-edits (one host payload = one candidate) and pointless for the
628
- // placeholder path (N identical placeholders).
629
- const requestedVariants = Number.isFinite(args.variants) ? Math.trunc(args.variants) : 1;
630
- const variantCount = Math.max(1, Math.min(5, requestedVariants || 1));
631
- if (variantCount > 1 && !args.agent) {
632
- const msg = '--variants > 1 requires --agent (variant divergence is prompt-side; --from-edits carries one payload).';
633
- stderr(msg);
634
- return { exitCode: 2, proposed: [], skipped: [], errors: [msg] };
635
- }
636
508
  const filePaths = args.fromLearn ?? [];
637
509
  // 1) Resolve & validate --target up front (deterministic throw on unknown).
638
510
  if (args.target !== undefined) {
@@ -704,7 +576,7 @@ export async function runProposeCanonical(args, opts) {
704
576
  ? { ...hint, affectedTargetId: pinId, thresholdKey: `${pinId}:${hint.proposedChangeType}` }
705
577
  : hint;
706
578
  });
707
- // 4) Aggregate. `aggregationOptions` lets auto-evolve act on a single change
579
+ // 4) Aggregate. `aggregationOptions` lets the host evolve-from-edits path act on a single change
708
580
  // (one forward pass = one loss); omitted = conservative cross-change defaults.
709
581
  const allGroups = aggregateLearnEvolutionHints(scopedHints, args.aggregationOptions);
710
582
  // 5) Filter.
@@ -845,221 +717,112 @@ export async function runProposeCanonical(args, opts) {
845
717
  const evalPlanSummary = evalPlanMd;
846
718
  const rationaleSummary = rationaleMd;
847
719
  const riskSummary = riskReportMd;
848
- // Population-based generation: draft `variantCount` competing candidates from
849
- // this one group, each on a distinct improvement angle, sharing a variantGroup
850
- // id so the GA outer loop can later mark the losers `outcompeted`. The default
851
- // (variantCount === 1) sets no variantGroup/angle — byte-identical to the
852
- // single-candidate path.
853
- const variantGroup = variantCount > 1
854
- ? `vg-${crypto
855
- .createHash('sha256')
856
- .update(group.thresholdKey)
857
- .digest('hex')
858
- .slice(0, 8)}-${now().toISOString().replace(/[^0-9]/g, '').slice(0, 14)}`
859
- : undefined;
860
- for (let variantIndex = 0; variantIndex < variantCount; variantIndex++) {
861
- const candidateId = generateCandidateId();
862
- const ts = now().toISOString();
863
- const variantAngle = variantCount > 1 ? VARIANT_ANGLES[variantIndex % VARIANT_ANGLES.length] : undefined;
864
- const candidate = {
865
- id: candidateId,
866
- createdAt: ts,
867
- updatedAt: ts,
868
- source,
869
- sourceHints: [...group.hintIds],
870
- sourceAggregatedKey: group.thresholdKey,
871
- targetIds,
872
- changedFiles: [],
873
- status: 'draft',
874
- expectedBenefit,
875
- riskLevel: group.dominantRisk,
876
- rollbackPlan: 'Delete candidate directory or transition candidate to rejected. Canonical files are unchanged because no diff is applied at proposal time.',
877
- proposalSummary,
878
- evalPlanSummary,
879
- rationaleSummary,
880
- riskSummary,
881
- ...(variantGroup ? { variantGroup } : {}),
882
- ...(variantAngle ? { variantAngle } : {}),
883
- };
884
- let diffPatch = '# No diff yet. Apply candidate change manually before invoking static gate.\n';
885
- let rationaleForPkg = rationaleMd;
886
- // Structured full-file edits (persisted as edits.json so the promote/apply
887
- // step can write the new content deterministically). Sourced EITHER from the
888
- // host code agent (`--from-edits`, preferred) OR the headless proposer agent.
889
- let agentEdits;
890
- // Generate a REAL diff for this group's target (proposal-only). Two sources,
891
- // one validation + packaging contract:
892
- // • HOST path (`--from-edits`): the host agent already wrote the new file;
893
- // the CLI only validates (frozen + target scope) and packages it.
894
- // • HEADLESS FALLBACK (`--agent`): spawn the proposer agent (cron/CI with
895
- // no host agent).
896
- // Both are skipped under --dry-run so a dry run never touches disk (resolving
897
- // local files can MATERIALIZE a template default). Edits are validated against
898
- // the target's resolved LOCAL files and rejected if they touch a frozen/gate-
899
- // defining file. Any failure (no-op, invalid output, missing binary) falls
900
- // back to the placeholder so the candidate is still written for a human.
901
- const useHostEdits = !!args.editsInput && targetIds.length > 0 && !args.dryRun;
902
- const useAgentFallback = !args.editsInput && !!args.agent && targetIds.length > 0 && !args.dryRun;
903
- if (useHostEdits || useAgentFallback) {
904
- const agentTarget = lookupCanonicalTarget(targetIds[0]);
905
- if (agentTarget) {
906
- try {
907
- // Resolve to the user's LOCAL editable files (installed SKILL.md, or a
908
- // materialized project-local template/schema) so evolution writes to
909
- // THEIR repo — no rebuild/republish. Dev repo resolves to source.
910
- const resolved = await resolveTargetLocalFiles(targetIds[0], opts.repoRoot);
911
- if (resolved.files.length === 0) {
912
- throw new Error(`no local editable file for target ${targetIds[0]} in this repo`);
913
- }
914
- const allowedRel = resolved.files.map((f) => f.relPath);
915
- const currentFiles = resolved.files.map((f) => ({
916
- relPath: f.relPath,
917
- content: f.content,
918
- }));
919
- // Close the feedback loop into the proposer (OPRO/AlphaEvolve-style):
920
- // feed the current promoted-baseline loss + a scored history of prior
921
- // candidates for this target so the agent can beat the best prior
922
- // attempt and avoid rejected/high-loss approaches. Headless-agent path
923
- // only (the host --from-edits path has no prompt). Best-effort: any
924
- // failure degrades to no trajectory and never blocks proposing. For
925
- // N>1 variants this is what makes siblings diverge from prior attempts.
926
- let trajectoryContext;
927
- let baselineLoss = null;
928
- // SUCCESS-CHANNEL constraints (R4): the DO-NOT-PRUNE block + real
929
- // exemplar excerpts from verified-green runs, so the headless
930
- // proposer cannot "improve" a target by hollowing out the sections
931
- // that demonstrably produce passing runs. Best-effort like the
932
- // trajectory block; populated only when non-empty so prompts on
933
- // repos with no green history stay byte-identical.
934
- let doNotPrune;
935
- let exemplarsContext;
936
- // CREDIT-PATH SLICE (R6): the real artifact text along the failing
937
- // paths, built from the group hints' creditPaths. Deterministic and
938
- // bounded; '' when no hint carries a path (legacy corpora).
939
- let sliceContext;
940
- if (useAgentFallback) {
941
- const slice = buildProposerSlice(groupHints);
942
- if (slice.length > 0)
943
- sliceContext = slice;
944
- try {
945
- const baseline = await readPromotedBaselineLoss(layout, targetIds[0]);
946
- baselineLoss = baseline?.meanLoss ?? null;
947
- const traj = await buildOptimizationTrajectory(layout, targetIds[0]);
948
- trajectoryContext = renderTrajectoryBlock(traj, {
949
- baselineLoss,
950
- baselineCandidateId: baseline?.candidateId,
951
- });
952
- }
953
- catch {
954
- // best-effort scored history; never block proposing.
955
- }
956
- try {
957
- const protections = await readProtections(opts.repoRoot, targetIds[0]);
958
- const exemplarPaths = await listExemplarFiles(opts.repoRoot, targetIds[0]);
959
- const block = renderDoNotPruneBlock(protections, exemplarPaths);
960
- if (block.length > 0)
961
- doNotPrune = block;
962
- // Up to 3 exemplar files, each capped at 1500 chars, 4000 total.
963
- const chunks = [];
964
- let totalChars = 0;
965
- for (const exemplarPath of exemplarPaths.slice(0, 3)) {
966
- let content;
967
- try {
968
- content = await fs.promises.readFile(exemplarPath, 'utf8');
969
- }
970
- catch {
971
- continue;
972
- }
973
- const capped = content.length > 1500 ? `${content.slice(0, 1499)}…` : content;
974
- if (totalChars + capped.length > 4000)
975
- break;
976
- chunks.push(capped.trimEnd());
977
- totalChars += capped.length;
978
- }
979
- if (chunks.length > 0)
980
- exemplarsContext = chunks.join('\n\n---\n\n');
981
- }
982
- catch {
983
- // best-effort success-channel context; never block proposing.
984
- }
985
- }
986
- const out = useHostEdits
987
- ? packageHostEdits(args.editsInput, allowedRel, currentFiles, group, targetIds[0])
988
- : // Repair-loop variant (R6): on a VALIDATION failure the agent is
989
- // re-prompted with the concrete error (the unidecode large-file
990
- // JSON-fragility defect), bounded at 2 repairs; exhaustion still
991
- // falls to the placeholder via the catch below.
992
- await runCanonicalProposerAgentWithRepair({
993
- group,
994
- groupHints,
995
- // Bind the editable surface to the resolved LOCAL files, not the
996
- // registry's package-source paths.
997
- target: { ...agentTarget, files: allowedRel },
998
- currentFiles,
999
- trajectoryContext,
1000
- baselineLoss,
1001
- variantAngle,
1002
- doNotPrune,
1003
- exemplarsContext,
1004
- sliceContext,
1005
- spawn: opts.proposerSpawn,
1006
- binary: opts.proposerBinary,
1007
- });
1008
- diffPatch = out.diffPatch + '\n';
1009
- candidate.changedFiles = out.changedFiles;
1010
- agentEdits = out.edits;
1011
- rationaleForPkg = `${out.rationale}\n\n---\n\n${rationaleMd}`;
1012
- }
1013
- catch (err) {
1014
- stderr(`${useHostEdits ? 'host edits' : 'proposer agent'} did not produce a diff for ${candidateId}; keeping placeholder: ${err instanceof Error ? err.message : String(err)}`);
720
+ const candidateId = generateCandidateId();
721
+ const ts = now().toISOString();
722
+ const candidate = {
723
+ id: candidateId,
724
+ createdAt: ts,
725
+ updatedAt: ts,
726
+ source,
727
+ sourceHints: [...group.hintIds],
728
+ sourceAggregatedKey: group.thresholdKey,
729
+ targetIds,
730
+ changedFiles: [],
731
+ status: 'draft',
732
+ expectedBenefit,
733
+ riskLevel: group.dominantRisk,
734
+ rollbackPlan: 'Delete candidate directory or transition candidate to rejected. Canonical files are unchanged because no diff is applied at proposal time.',
735
+ proposalSummary,
736
+ evalPlanSummary,
737
+ rationaleSummary,
738
+ riskSummary,
739
+ };
740
+ let diffPatch = '# No diff yet. Apply candidate change manually before invoking static gate.\n';
741
+ let rationaleForPkg = rationaleMd;
742
+ // Structured full-file edits (persisted as edits.json so the promote/apply
743
+ // step can write the new content deterministically). Sourced from the host
744
+ // code agent (`--from-edits`), the host-authored channel.
745
+ let agentEdits;
746
+ // Generate a REAL diff for this group's target (proposal-only) via the HOST
747
+ // path (`--from-edits`): the host code agent already wrote the new file; the
748
+ // CLI only validates (frozen + target scope) and packages it. Skipped under
749
+ // --dry-run so a dry run never touches disk (resolving local files can
750
+ // MATERIALIZE a template default). Edits are validated against the target's
751
+ // resolved LOCAL files and rejected if they touch a frozen/gate-defining
752
+ // file. Any failure (no-op, invalid edits) falls back to the placeholder so
753
+ // the candidate is still written for a human.
754
+ const useHostEdits = !!args.editsInput && targetIds.length > 0 && !args.dryRun;
755
+ if (useHostEdits) {
756
+ const agentTarget = lookupCanonicalTarget(targetIds[0]);
757
+ if (agentTarget) {
758
+ try {
759
+ // Resolve to the user's LOCAL editable files (installed SKILL.md, or a
760
+ // materialized project-local template/schema) so evolution writes to
761
+ // THEIR repo — no rebuild/republish. Dev repo resolves to source.
762
+ const resolved = await resolveTargetLocalFiles(targetIds[0], opts.repoRoot);
763
+ if (resolved.files.length === 0) {
764
+ throw new Error(`no local editable file for target ${targetIds[0]} in this repo`);
1015
765
  }
766
+ const allowedRel = resolved.files.map((f) => f.relPath);
767
+ const currentFiles = resolved.files.map((f) => ({
768
+ relPath: f.relPath,
769
+ content: f.content,
770
+ }));
771
+ const out = packageHostEdits(args.editsInput, allowedRel, currentFiles, group, targetIds[0]);
772
+ diffPatch = out.diffPatch + '\n';
773
+ candidate.changedFiles = out.changedFiles;
774
+ agentEdits = out.edits;
775
+ rationaleForPkg = `${out.rationale}\n\n---\n\n${rationaleMd}`;
1016
776
  }
1017
- }
1018
- // Tell the operator when a supplied --from-edits payload was NOT applied to
1019
- // this candidate (so a placeholder is silently written instead of their edit).
1020
- if (args.editsInput && !useHostEdits) {
1021
- stderr(`--from-edits payload not applied to ${candidateId}: ${args.dryRun
1022
- ? 'host edits are not validated under --dry-run'
1023
- : 'surviving group has no resolved canonical target'}.`);
1024
- }
1025
- const pkg = {
1026
- candidate,
1027
- proposalMd,
1028
- diffPatch,
1029
- rationaleMd: rationaleForPkg,
1030
- evalPlanMd,
1031
- riskReportMd,
1032
- ...(agentEdits && agentEdits.length > 0 ? { edits: agentEdits } : {}),
1033
- };
1034
- if (args.dryRun) {
1035
- const dryRunPath = path.join(layout.baseDir, candidateId);
1036
- proposed.push({
1037
- candidateId,
1038
- path: dryRunPath,
1039
- targetIds,
1040
- riskLevel: candidate.riskLevel,
1041
- source,
1042
- });
1043
- if (!args.json) {
1044
- stdout(`[dry-run] would propose ${candidateId} (target=${targetIds.join(',') || '(none)'}, risk=${candidate.riskLevel})`);
777
+ catch (err) {
778
+ stderr(`host edits did not produce a diff for ${candidateId}; keeping placeholder: ${err instanceof Error ? err.message : String(err)}`);
1045
779
  }
1046
- continue;
1047
- }
1048
- try {
1049
- const written = await writeCandidatePackage(layout, pkg);
1050
- proposed.push({
1051
- candidateId,
1052
- path: written.candidateDir,
1053
- targetIds,
1054
- riskLevel: candidate.riskLevel,
1055
- source,
1056
- });
1057
780
  }
1058
- catch (err) {
1059
- const message = err instanceof Error ? err.message : String(err);
1060
- errors.push(`failed to write candidate ${candidateId}: ${message}`);
1061
- stderr(`Failed to write candidate ${candidateId}: ${message}`);
781
+ }
782
+ // Tell the operator when a supplied --from-edits payload was NOT applied to
783
+ // this candidate (so a placeholder is silently written instead of their edit).
784
+ if (args.editsInput && !useHostEdits) {
785
+ stderr(`--from-edits payload not applied to ${candidateId}: ${args.dryRun
786
+ ? 'host edits are not validated under --dry-run'
787
+ : 'surviving group has no resolved canonical target'}.`);
788
+ }
789
+ const pkg = {
790
+ candidate,
791
+ proposalMd,
792
+ diffPatch,
793
+ rationaleMd: rationaleForPkg,
794
+ evalPlanMd,
795
+ riskReportMd,
796
+ ...(agentEdits && agentEdits.length > 0 ? { edits: agentEdits } : {}),
797
+ };
798
+ if (args.dryRun) {
799
+ const dryRunPath = path.join(layout.baseDir, candidateId);
800
+ proposed.push({
801
+ candidateId,
802
+ path: dryRunPath,
803
+ targetIds,
804
+ riskLevel: candidate.riskLevel,
805
+ source,
806
+ });
807
+ if (!args.json) {
808
+ stdout(`[dry-run] would propose ${candidateId} (target=${targetIds.join(',') || '(none)'}, risk=${candidate.riskLevel})`);
1062
809
  }
810
+ continue;
811
+ }
812
+ try {
813
+ const written = await writeCandidatePackage(layout, pkg);
814
+ proposed.push({
815
+ candidateId,
816
+ path: written.candidateDir,
817
+ targetIds,
818
+ riskLevel: candidate.riskLevel,
819
+ source,
820
+ });
821
+ }
822
+ catch (err) {
823
+ const message = err instanceof Error ? err.message : String(err);
824
+ errors.push(`failed to write candidate ${candidateId}: ${message}`);
825
+ stderr(`Failed to write candidate ${candidateId}: ${message}`);
1063
826
  }
1064
827
  }
1065
828
  // 7) Build result + output.
@@ -1093,7 +856,7 @@ export async function runProposeCanonical(args, opts) {
1093
856
  }
1094
857
  /**
1095
858
  * Programmatic entrypoint for `self-evolution promote <id>` — the close-the-loop
1096
- * apply/rollback. Exported so tests + auto-evolve can drive it directly.
859
+ * apply/rollback. Exported so tests + the host evolve-from-edits path can drive it directly.
1097
860
  */
1098
861
  export async function runPromoteCommand(args, opts) {
1099
862
  const stdout = opts.stdout ?? ((l) => console.log(l));
@@ -1240,354 +1003,6 @@ export async function runRejectCommand(args, opts) {
1240
1003
  return { exitCode: 1, error: message };
1241
1004
  }
1242
1005
  }
1243
- /**
1244
- * Programmatic entrypoint for `self-evolution trajectory <targetId>` — a
1245
- * READ-ONLY view of the scored optimization-trajectory block the headless
1246
- * proposer receives, so a HOST code agent (which authors edits via
1247
- * `--from-edits` and never sees that prompt) can read the same prior-candidate
1248
- * loss/verdict history before authoring. Reuses the exact builder/renderer the
1249
- * proposer uses. Never mutates anything.
1250
- */
1251
- export async function runTrajectoryCommand(args, opts) {
1252
- const stdout = opts.stdout ?? ((l) => console.log(l));
1253
- const stderr = opts.stderr ?? ((l) => console.error(l));
1254
- if (!lookupCanonicalTarget(args.targetId)) {
1255
- const message = `Unknown canonical target: ${args.targetId}`;
1256
- if (args.json)
1257
- stdout(JSON.stringify({ error: message }, null, 2));
1258
- else
1259
- stderr(message);
1260
- return { exitCode: 1, error: message };
1261
- }
1262
- const layout = resolveCandidateRepo(opts.repoRoot);
1263
- try {
1264
- const baseline = await readPromotedBaselineLoss(layout, args.targetId);
1265
- const maxEntries = args.maxEntries !== undefined && Number.isFinite(args.maxEntries) && args.maxEntries > 0
1266
- ? Math.trunc(args.maxEntries)
1267
- : undefined;
1268
- const entries = await buildOptimizationTrajectory(layout, args.targetId, maxEntries !== undefined ? { maxEntries } : undefined);
1269
- // SUCCESS-CHANNEL view (R4): the host agent authoring a --from-edits
1270
- // improvement never sees the headless proposer's prompt, so this READ-ONLY
1271
- // command is where it learns which sections are load-bearing. Best-effort:
1272
- // a missing/corrupt protections file degrades to no section (and the
1273
- // baseline output stays byte-identical — fields/sections omit-when-empty).
1274
- let doNotPrune = '';
1275
- let exemplarPaths = [];
1276
- try {
1277
- const protections = await readProtections(opts.repoRoot, args.targetId);
1278
- exemplarPaths = await listExemplarFiles(opts.repoRoot, args.targetId);
1279
- doNotPrune = renderDoNotPruneBlock(protections, exemplarPaths);
1280
- }
1281
- catch {
1282
- // read-only convenience view; never fail the command over it.
1283
- }
1284
- if (args.json) {
1285
- stdout(JSON.stringify({
1286
- targetId: args.targetId,
1287
- baselineLoss: baseline?.meanLoss ?? null,
1288
- baselineCandidateId: baseline?.candidateId ?? null,
1289
- entries,
1290
- ...(doNotPrune.length > 0 ? { doNotPrune } : {}),
1291
- ...(exemplarPaths.length > 0 ? { exemplarPaths } : {}),
1292
- }, null, 2));
1293
- return { exitCode: 0 };
1294
- }
1295
- const block = renderTrajectoryBlock(entries, {
1296
- baselineLoss: baseline?.meanLoss ?? null,
1297
- baselineCandidateId: baseline?.candidateId,
1298
- });
1299
- if (block.trim().length === 0) {
1300
- stdout(`No prior candidates with signal for ${args.targetId} yet.`);
1301
- }
1302
- else {
1303
- stdout(block);
1304
- }
1305
- if (doNotPrune.length > 0) {
1306
- stdout('');
1307
- stdout('# DO-NOT-PRUNE (load-bearing — implicated in passing runs)');
1308
- stdout(doNotPrune);
1309
- }
1310
- return { exitCode: 0 };
1311
- }
1312
- catch (err) {
1313
- const message = err instanceof Error ? err.message : String(err);
1314
- if (args.json)
1315
- stdout(JSON.stringify({ error: message }, null, 2));
1316
- else
1317
- stderr(`trajectory failed: ${message}`);
1318
- return { exitCode: 1, error: message };
1319
- }
1320
- }
1321
- /**
1322
- * ONE-BUTTON auto-evolve: learn → hints → propose(--agent) → gate → promote, in
1323
- * one motion. Auto-applies the gate-passing winner per target onto the canonical
1324
- * template (no per-change human approval), honoring the per-target switch + the
1325
- * oracle/gate freeze, and snapshotting every write for rollback.
1326
- *
1327
- * Exported + fully injectable (proposer spawn, clock, io) so it is unit-testable
1328
- * without a real `claude` binary.
1329
- */
1330
- export async function runAutoEvolve(args, opts) {
1331
- const stdout = opts.stdout ?? ((l) => console.log(l));
1332
- const stderr = opts.stderr ?? ((l) => console.error(l));
1333
- const now = opts.now ?? (() => new Date());
1334
- const autoPromote = args.auto !== false; // default true
1335
- const layout = resolveCandidateRepo(opts.repoRoot);
1336
- const policy = resolveTargetEvolutionPolicy({
1337
- config: readProjectConfig(opts.repoRoot),
1338
- evolveTarget: args.evolveTarget,
1339
- freezeTarget: args.freezeTarget,
1340
- });
1341
- const report = {
1342
- exitCode: 0,
1343
- changeNames: args.changeNames,
1344
- loss: null,
1345
- hintCount: 0,
1346
- hintsPaths: [],
1347
- proposed: [],
1348
- gated: [],
1349
- promoted: [],
1350
- skipped: [],
1351
- };
1352
- // Stage 1 — LEARN (backward pass) over EACH change: a single change is enough,
1353
- // and several aggregate a recurring signal across them. Each change's hints are
1354
- // persisted; a failed change is skipped, not fatal.
1355
- const losses = [];
1356
- const healthSignals = [];
1357
- const hintsPaths = [];
1358
- let totalHints = 0;
1359
- for (const changeName of args.changeNames) {
1360
- let learnReport;
1361
- try {
1362
- learnReport = await generateLearnReport({ projectRoot: opts.repoRoot, changeName });
1363
- }
1364
- catch (err) {
1365
- report.skipped.push({
1366
- candidateId: `learn:${changeName}`,
1367
- reason: `learn failed: ${err instanceof Error ? err.message : String(err)}`,
1368
- });
1369
- continue;
1370
- }
1371
- const l = learnReport.fitnessSample?.loss?.loss;
1372
- if (typeof l === 'number')
1373
- losses.push(l);
1374
- const h = learnReport.fitnessSample?.healthSignal;
1375
- if (typeof h === 'number')
1376
- healthSignals.push(h);
1377
- // SUCCESS CHANNEL (R4), cron parity with `learn --apply`: a verified-GREEN
1378
- // change mines load-bearing protections + exemplars (side-writes only —
1379
- // never a candidate; a clean run still proposes nothing). No-op when the
1380
- // run is not observed-green, and best-effort so a side-write failure never
1381
- // fails the evolve run.
1382
- try {
1383
- const minedSignals = await mineSuccessSignals({
1384
- projectRoot: opts.repoRoot,
1385
- report: learnReport,
1386
- now,
1387
- });
1388
- if (minedSignals.protectionsWritten > 0 && args.json !== true) {
1389
- stdout(`Success channel: recorded ${minedSignals.protectionsWritten} protection(s) for ${minedSignals.protectedTargets.join(', ')} from "${changeName}".`);
1390
- }
1391
- }
1392
- catch {
1393
- // side-write only; the evolve pipeline continues regardless.
1394
- }
1395
- const hints = generateEvolutionHints(learnReport, policy);
1396
- totalHints += hints.length;
1397
- if (hints.length === 0)
1398
- continue;
1399
- hintsPaths.push(await persistLearnHints({ projectRoot: opts.repoRoot, changeName, hints, now }));
1400
- }
1401
- report.loss = losses.length > 0 ? losses.reduce((a, b) => a + b, 0) / losses.length : null;
1402
- // Mean RAW health signal across the change(s); null when none were measured
1403
- // (stub source / no signal) ⇒ the health gate below cannot fire.
1404
- const meanHealth = healthSignals.length > 0
1405
- ? healthSignals.reduce((a, b) => a + b, 0) / healthSignals.length
1406
- : null;
1407
- report.healthPenalty = meanHealth;
1408
- const healthBaseline = await readHealthBaseline(opts.repoRoot);
1409
- report.hintCount = totalHints;
1410
- report.hintsPaths = hintsPaths;
1411
- if (hintsPaths.length === 0) {
1412
- finishAutoEvolve(report, args.json, stdout, stderr, 'No evolution hints from the change(s) — nothing to evolve.');
1413
- return report;
1414
- }
1415
- // Stage 2 — PROPOSE real candidate diffs via the proposer agent.
1416
- // A single change is enough (minOccurrences defaults to 1). Raise it to require
1417
- // a signal to RECUR across the provided changes before it evolves — neither
1418
- // single-change nor multi-change is forced.
1419
- const minOcc = args.minOccurrences && args.minOccurrences > 0 ? args.minOccurrences : 1;
1420
- const proposeResult = await runProposeCanonical({
1421
- fromLearn: hintsPaths,
1422
- agent: true,
1423
- thresholdKey: args.thresholdKey,
1424
- aggregationOptions: {
1425
- criticalMinOccurrences: minOcc,
1426
- highMinOccurrences: minOcc,
1427
- mediumOrLowMinOccurrences: minOcc,
1428
- minDiversityForLowSeverity: minOcc,
1429
- },
1430
- evolveTarget: args.evolveTarget,
1431
- freezeTarget: args.freezeTarget,
1432
- }, {
1433
- repoRoot: opts.repoRoot,
1434
- now,
1435
- proposerSpawn: opts.proposerSpawn,
1436
- proposerBinary: opts.proposerBinary,
1437
- stdout: () => { },
1438
- stderr: (l) => stderr(l),
1439
- });
1440
- report.proposed = proposeResult.proposed.map((p) => p.candidateId);
1441
- // Stage 4 + 5 — GATE then SELECT/PROMOTE, one target at a time.
1442
- const handledTargets = new Set();
1443
- for (const candidateId of report.proposed) {
1444
- let gate;
1445
- try {
1446
- gate = await runStaticCandidateGate(layout, candidateId, {
1447
- applyTransition: true,
1448
- targetPolicy: policy,
1449
- });
1450
- }
1451
- catch (err) {
1452
- report.skipped.push({
1453
- candidateId,
1454
- reason: `gate error: ${err instanceof Error ? err.message : String(err)}`,
1455
- });
1456
- continue;
1457
- }
1458
- report.gated.push({ candidateId, passed: gate.passed });
1459
- if (!gate.passed) {
1460
- report.skipped.push({
1461
- candidateId,
1462
- reason: 'static gate failed (placeholder/no-op diff or frozen target)',
1463
- });
1464
- await recordVerdictBestEffort(layout, candidateId, {
1465
- verdict: 'gate-failed',
1466
- at: now().toISOString(),
1467
- reason: 'static gate failed (placeholder/no-op diff or frozen target)',
1468
- lossAtDecision: (await readCandidateFitness(layout, candidateId)).meanLoss,
1469
- decidedBy: 'static-gate',
1470
- });
1471
- continue;
1472
- }
1473
- let pkg;
1474
- try {
1475
- pkg = await readCandidatePackage(layout, candidateId);
1476
- }
1477
- catch (err) {
1478
- report.skipped.push({
1479
- candidateId,
1480
- reason: `read failed: ${err instanceof Error ? err.message : String(err)}`,
1481
- });
1482
- continue;
1483
- }
1484
- const targetId = pkg.candidate.targetIds[0];
1485
- if (!targetId) {
1486
- report.skipped.push({ candidateId, reason: 'candidate has no canonical target id' });
1487
- continue;
1488
- }
1489
- if (handledTargets.has(targetId)) {
1490
- report.skipped.push({ candidateId, reason: `target ${targetId} already handled this run` });
1491
- continue;
1492
- }
1493
- handledTargets.add(targetId);
1494
- const fitness = await readCandidateFitness(layout, candidateId);
1495
- const decision = shouldAutoPromote({
1496
- gatePassed: true,
1497
- targetEvolvable: isCanonicalTargetEvolvable(targetId, policy),
1498
- accumulatedCount: fitness.count,
1499
- meanLoss: fitness.meanLoss,
1500
- baselineLoss: report.loss,
1501
- requireProvenImprovement: args.requireProven === true,
1502
- healthPenalty: meanHealth,
1503
- baselineHealthPenalty: healthBaseline?.healthPenalty ?? null,
1504
- });
1505
- if (!autoPromote) {
1506
- report.skipped.push({
1507
- candidateId,
1508
- reason: `--no-auto: gated & ready (would promote: ${decision.promote ? 'yes' : `no — ${decision.reason}`})`,
1509
- });
1510
- continue;
1511
- }
1512
- if (!decision.promote) {
1513
- report.skipped.push({ candidateId, reason: decision.reason });
1514
- await recordVerdictBestEffort(layout, candidateId, {
1515
- verdict: 'declined',
1516
- at: now().toISOString(),
1517
- reason: decision.reason,
1518
- lossAtDecision: fitness.meanLoss,
1519
- baselineLoss: report.loss,
1520
- decidedBy: 'auto-evolve',
1521
- });
1522
- continue;
1523
- }
1524
- try {
1525
- const applied = await applyCandidatePromotion(layout, candidateId, {
1526
- repoRoot: opts.repoRoot,
1527
- policy,
1528
- });
1529
- report.promoted.push({
1530
- candidateId,
1531
- targetIds: applied.targetIds,
1532
- files: applied.appliedFiles.map((f) => f.file),
1533
- });
1534
- await recordVerdictBestEffort(layout, candidateId, {
1535
- verdict: 'promoted',
1536
- at: now().toISOString(),
1537
- reason: `auto-evolve: ${decision.reason}`,
1538
- lossAtDecision: fitness.meanLoss,
1539
- baselineLoss: report.loss,
1540
- decidedBy: 'auto-evolve',
1541
- });
1542
- }
1543
- catch (err) {
1544
- report.skipped.push({
1545
- candidateId,
1546
- reason: `promote failed: ${err instanceof Error ? err.message : String(err)}`,
1547
- });
1548
- }
1549
- }
1550
- // Record the accepted health as the new per-repo baseline (best-effort) when
1551
- // this run promoted something and had a real health signal. The next run's
1552
- // health gate compares against this value.
1553
- if (report.promoted.length > 0 && meanHealth != null) {
1554
- await writeHealthBaseline(opts.repoRoot, {
1555
- healthPenalty: meanHealth,
1556
- updatedAt: now().toISOString(),
1557
- sourceChange: args.changeNames.join(','),
1558
- });
1559
- }
1560
- finishAutoEvolve(report, args.json, stdout, stderr);
1561
- return report;
1562
- }
1563
- function finishAutoEvolve(report, json, stdout, stderr, note) {
1564
- if (json) {
1565
- stdout(JSON.stringify(report, null, 2));
1566
- return;
1567
- }
1568
- if (report.error) {
1569
- stderr(`auto-evolve failed: ${report.error}`);
1570
- return;
1571
- }
1572
- stdout(`Auto-evolve: ${report.changeNames.join(', ')}`);
1573
- stdout(` loss (functional⊕health): ${report.loss === null ? 'n/a' : report.loss.toFixed(3)}`);
1574
- stdout(` hints: ${report.hintCount}`);
1575
- if (note) {
1576
- stdout(` ${note}`);
1577
- return;
1578
- }
1579
- stdout(` proposed: ${report.proposed.length} | gate-pass: ${report.gated.filter((g) => g.passed).length} | PROMOTED: ${report.promoted.length} | skipped: ${report.skipped.length}`);
1580
- for (const p of report.promoted) {
1581
- stdout(` ✓ PROMOTED ${p.candidateId} -> [${p.targetIds.join(', ')}]: ${p.files.join(', ')}`);
1582
- }
1583
- for (const s of report.skipped) {
1584
- stdout(` · skipped ${s.candidateId}: ${s.reason}`);
1585
- }
1586
- if (report.promoted.length > 0) {
1587
- stdout(' Rebuild + republish to ship the evolved template(s).');
1588
- stdout(' Revert any promotion with: self-evolution promote <id> --rollback');
1589
- }
1590
- }
1591
1006
  /**
1592
1007
  * HOST-AUTHORED one-button evolve. The single non-interactive
1593
1008
  * host-authored-edit → gate → observed-verified promote command.
@@ -1649,7 +1064,7 @@ export async function runEvolveFromEdits(args, opts) {
1649
1064
  }
1650
1065
  return report;
1651
1066
  };
1652
- // Non-interactive contract: --yes is required (mirrors auto-evolve's one-button
1067
+ // Non-interactive contract: --yes is required (one-button host-authored
1653
1068
  // confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
1654
1069
  if (args.agent) {
1655
1070
  return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.', false);
@@ -2267,173 +1682,6 @@ export async function runPromotionReportCommand(args, opts) {
2267
1682
  }
2268
1683
  return { exitCode: 0, report };
2269
1684
  }
2270
- /**
2271
- * Programmatic entrypoint for `self-evolution evolve` — the GA outer loop.
2272
- *
2273
- * Chains the previously-inert pieces into one live pass:
2274
- * groupCandidatesByTarget → (optional replay scoring that APPENDS fitness)
2275
- * → rankCandidatesForTarget → select best → human-gated promotion report.
2276
- *
2277
- * Invariants: frozen targets (per the resolved policy) are skipped; promotion
2278
- * is NEVER applied here (the report keeps its human-review gate); the oracle is
2279
- * never touched (replay only runs tests).
2280
- */
2281
- export async function runEvolveOuterLoopCommand(args, opts) {
2282
- const stdout = opts.stdout ?? ((line) => console.log(line));
2283
- const stderr = opts.stderr ?? ((line) => console.error(line));
2284
- const layout = resolveCandidateRepo(opts.repoRoot);
2285
- const config = readProjectConfig(opts.repoRoot);
2286
- const policy = resolveTargetEvolutionPolicy({
2287
- config,
2288
- evolveTarget: args.evolveTarget,
2289
- freezeTarget: args.freezeTarget,
2290
- });
2291
- const at = args.at ?? new Date().toISOString();
2292
- let byTarget;
2293
- try {
2294
- byTarget = await groupCandidatesByTarget(layout);
2295
- }
2296
- catch (err) {
2297
- const message = err instanceof Error ? err.message : String(err);
2298
- if (args.json)
2299
- stdout(JSON.stringify({ error: message }, null, 2));
2300
- else
2301
- stderr(message);
2302
- return { exitCode: 1, targets: [], error: message };
2303
- }
2304
- let targetIds = [...byTarget.keys()].sort();
2305
- if (args.target)
2306
- targetIds = targetIds.filter((t) => t === args.target);
2307
- const replayMode = args.replay === true && (args.changeIds?.length ?? 0) > 0;
2308
- if (args.replay === true && !replayMode) {
2309
- stderr('--replay requires at least one --change <id>; falling back to accumulated fitness');
2310
- }
2311
- const runChange = args.runChange ??
2312
- makeReplayRunChange({ repoRoot: opts.repoRoot, healthSource: resolveMetricSource(config) });
2313
- const summaries = [];
2314
- for (const targetId of targetIds) {
2315
- const candidateIds = byTarget.get(targetId) ?? [];
2316
- if (!isCanonicalTargetEvolvable(targetId, policy)) {
2317
- stdout(`target ${targetId}: frozen by policy — skipped (${candidateIds.length} candidate(s))`);
2318
- summaries.push({ targetId, candidateIds, frozen: true, ranked: [], best: null });
2319
- continue;
2320
- }
2321
- let scored;
2322
- if (replayMode) {
2323
- scored = await scoreCandidatesByReplay({
2324
- layout,
2325
- candidateIds,
2326
- changeIds: args.changeIds,
2327
- runChange,
2328
- at,
2329
- log: (line) => stdout(` ${line}`),
2330
- });
2331
- }
2332
- const ranked = await rankCandidatesForTarget(layout, candidateIds, targetId);
2333
- const best = ranked.length > 0 ? ranked[0].candidateId : null;
2334
- let promotionReportPath;
2335
- if (best && args.write) {
2336
- // Human-gated: writes the report + records its path; NEVER transitions
2337
- // status / promotes. The report retains its "human review required" gate.
2338
- await generatePromotionReport(layout, best, { write: true });
2339
- promotionReportPath = path.join(layout.baseDir, best, 'promotion-report.md');
2340
- }
2341
- // Mark sibling-variant losers `outcompeted` (advisory metadata; status is
2342
- // never changed). A loser is a PROVEN candidate (meanLoss !== null) in the
2343
- // SAME variantGroup as `best` with a strictly higher meanLoss. The
2344
- // optimization-trajectory block then renders them as negative examples.
2345
- let outcompeted;
2346
- if (args.markOutcompeted && best) {
2347
- const bestRow = ranked.find((r) => r.candidateId === best);
2348
- if (bestRow && bestRow.meanLoss !== null) {
2349
- const metas = await listCandidates(layout, { targetId });
2350
- const vgById = new Map(metas.map((c) => [c.id, c.variantGroup]));
2351
- const bestVg = vgById.get(best);
2352
- if (bestVg) {
2353
- outcompeted = [];
2354
- for (const r of ranked) {
2355
- if (r.candidateId === best || r.meanLoss === null)
2356
- continue;
2357
- if (vgById.get(r.candidateId) !== bestVg)
2358
- continue;
2359
- if (r.meanLoss > bestRow.meanLoss) {
2360
- await recordVerdictBestEffort(layout, r.candidateId, {
2361
- verdict: 'outcompeted',
2362
- at,
2363
- reason: `lost GA ranking to ${best} (meanLoss ${r.meanLoss.toFixed(3)} vs ${bestRow.meanLoss.toFixed(3)})`,
2364
- lossAtDecision: r.meanLoss,
2365
- baselineLoss: bestRow.meanLoss,
2366
- decidedBy: 'evolve-outer-loop',
2367
- });
2368
- outcompeted.push(r.candidateId);
2369
- }
2370
- }
2371
- }
2372
- }
2373
- }
2374
- summaries.push({
2375
- targetId,
2376
- candidateIds,
2377
- frozen: false,
2378
- scored,
2379
- ranked,
2380
- best,
2381
- promotionReportPath,
2382
- ...(outcompeted ? { outcompeted } : {}),
2383
- });
2384
- }
2385
- if (args.json) {
2386
- stdout(JSON.stringify({ exitCode: 0, targets: summaries }, null, 2));
2387
- }
2388
- else {
2389
- stdout(renderEvolveOuterLoopSummary(summaries, { replayMode, write: args.write === true }));
2390
- }
2391
- return { exitCode: 0, targets: summaries };
2392
- }
2393
- function renderEvolveOuterLoopSummary(targets, ctx) {
2394
- const lines = [];
2395
- lines.push('# GA outer loop');
2396
- lines.push(`mode: ${ctx.replayMode ? 'replay (corpus scoring)' : 'accumulated fitness'} · promotion: human-gated (never auto-promoted)`);
2397
- if (targets.length === 0) {
2398
- lines.push('', 'No candidates found.');
2399
- return lines.join('\n');
2400
- }
2401
- for (const t of targets) {
2402
- lines.push('', `## ${t.targetId} (${t.candidateIds.length} candidate(s))`);
2403
- if (t.frozen) {
2404
- lines.push('- frozen by policy — skipped');
2405
- continue;
2406
- }
2407
- if (t.scored) {
2408
- const ok = t.scored.filter((s) => !s.error).length;
2409
- const skipped = t.scored.length - ok;
2410
- lines.push(`- replay: scored ${ok}, skipped ${skipped}`);
2411
- for (const s of t.scored.filter((x) => x.error)) {
2412
- lines.push(` - skipped ${s.candidateId}: ${s.error}`);
2413
- }
2414
- }
2415
- for (const r of t.ranked) {
2416
- const proven = r.meanLoss !== null;
2417
- const detail = proven
2418
- ? `meanLoss=${r.meanLoss.toFixed(4)} meanPass=${(r.meanPassRate ?? 0).toFixed(4)} n=${r.count} trend=${r.trend}`
2419
- : 'unproven (no fitness records yet)';
2420
- lines.push(`- ${r.candidateId === t.best ? '★' : ' '} ${r.candidateId}: ${detail}`);
2421
- }
2422
- if (t.best) {
2423
- lines.push(`- best: ${t.best}`);
2424
- if (ctx.write && t.promotionReportPath) {
2425
- lines.push(`- promotion report (human review required): ${t.promotionReportPath}`);
2426
- }
2427
- if (t.outcompeted && t.outcompeted.length > 0) {
2428
- lines.push(`- marked outcompeted: ${t.outcompeted.join(', ')}`);
2429
- }
2430
- }
2431
- else {
2432
- lines.push('- best: (none)');
2433
- }
2434
- }
2435
- return lines.join('\n');
2436
- }
2437
1685
  function parseRequireDiff(value) {
2438
1686
  if (value === undefined)
2439
1687
  return true;