synergyspec-selfevolving 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +50 -19
  2. package/dist/commands/learn.d.ts +12 -1
  3. package/dist/commands/learn.js +373 -31
  4. package/dist/commands/self-evolution-episode.d.ts +177 -0
  5. package/dist/commands/self-evolution-episode.js +423 -0
  6. package/dist/commands/self-evolution.d.ts +12 -190
  7. package/dist/commands/self-evolution.js +179 -786
  8. package/dist/commands/workflow/status.js +3 -1
  9. package/dist/core/archive.d.ts +0 -1
  10. package/dist/core/archive.js +0 -58
  11. package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
  12. package/dist/core/artifact-graph/instruction-loader.js +3 -31
  13. package/dist/core/config-prompts.js +4 -0
  14. package/dist/core/fitness/health/health-metrics.d.ts +26 -56
  15. package/dist/core/fitness/health/health-metrics.js +19 -58
  16. package/dist/core/fitness/health/index.d.ts +15 -2
  17. package/dist/core/fitness/health/index.js +25 -1
  18. package/dist/core/fitness/health/local-source.d.ts +43 -4
  19. package/dist/core/fitness/health/local-source.js +181 -25
  20. package/dist/core/fitness/health/metric-source.d.ts +48 -19
  21. package/dist/core/fitness/health/metric-source.js +8 -18
  22. package/dist/core/fitness/health/resolve-source.js +4 -1
  23. package/dist/core/fitness/loss.d.ts +7 -7
  24. package/dist/core/fitness/loss.js +6 -6
  25. package/dist/core/fitness/sample.d.ts +10 -0
  26. package/dist/core/fitness/test-failures.d.ts +30 -0
  27. package/dist/core/fitness/test-failures.js +123 -0
  28. package/dist/core/learn/credit-path.d.ts +36 -0
  29. package/dist/core/learn/credit-path.js +198 -0
  30. package/dist/core/learn/trajectory-discovery.d.ts +39 -0
  31. package/dist/core/learn/trajectory-discovery.js +140 -0
  32. package/dist/core/learn.d.ts +39 -5
  33. package/dist/core/learn.js +131 -14
  34. package/dist/core/project-config.d.ts +4 -0
  35. package/dist/core/project-config.js +52 -1
  36. package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
  37. package/dist/core/self-evolution/candidate-fitness.js +31 -5
  38. package/dist/core/self-evolution/candidates.d.ts +0 -9
  39. package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
  40. package/dist/core/self-evolution/canonical-targets.js +8 -4
  41. package/dist/core/self-evolution/critic-agent.d.ts +150 -0
  42. package/dist/core/self-evolution/critic-agent.js +487 -0
  43. package/dist/core/self-evolution/edits-contract.d.ts +53 -0
  44. package/dist/core/self-evolution/edits-contract.js +89 -0
  45. package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
  46. package/dist/core/self-evolution/episode-orchestrator.js +534 -0
  47. package/dist/core/self-evolution/episode-store.d.ts +266 -0
  48. package/dist/core/self-evolution/episode-store.js +573 -0
  49. package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
  50. package/dist/core/self-evolution/evolution-switches.js +5 -10
  51. package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
  52. package/dist/core/self-evolution/evolving-agent.js +449 -0
  53. package/dist/core/self-evolution/health-baseline.d.ts +25 -6
  54. package/dist/core/self-evolution/health-baseline.js +30 -6
  55. package/dist/core/self-evolution/host-harness.d.ts +1 -2
  56. package/dist/core/self-evolution/host-harness.js +1 -2
  57. package/dist/core/self-evolution/index.d.ts +10 -6
  58. package/dist/core/self-evolution/index.js +19 -6
  59. package/dist/core/self-evolution/learn-hints.d.ts +31 -0
  60. package/dist/core/self-evolution/learn-hints.js +16 -0
  61. package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
  62. package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
  63. package/dist/core/self-evolution/line-diff.d.ts +60 -0
  64. package/dist/core/self-evolution/line-diff.js +130 -0
  65. package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
  66. package/dist/core/self-evolution/policy/fs-safe.js +89 -0
  67. package/dist/core/self-evolution/policy/index.d.ts +13 -0
  68. package/dist/core/self-evolution/policy/index.js +13 -0
  69. package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
  70. package/dist/core/self-evolution/policy/policy-store.js +774 -0
  71. package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
  72. package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
  73. package/dist/core/self-evolution/promote.d.ts +1 -1
  74. package/dist/core/self-evolution/promote.js +6 -33
  75. package/dist/core/self-evolution/promotion.js +1 -2
  76. package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
  77. package/dist/core/self-evolution/proposer-agent.js +94 -13
  78. package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
  79. package/dist/core/self-evolution/proposer-slice.js +54 -0
  80. package/dist/core/self-evolution/reward-agent.d.ts +234 -0
  81. package/dist/core/self-evolution/reward-agent.js +564 -0
  82. package/dist/core/self-evolution/scope-gate.d.ts +66 -0
  83. package/dist/core/self-evolution/scope-gate.js +107 -0
  84. package/dist/core/self-evolution/success-channel.d.ts +79 -0
  85. package/dist/core/self-evolution/success-channel.js +361 -0
  86. package/dist/core/self-evolution/target-evolution.d.ts +11 -0
  87. package/dist/core/self-evolution/target-evolution.js +2 -0
  88. package/dist/core/self-evolution/tool-evolution.js +2 -13
  89. package/dist/core/self-evolution/verdict.d.ts +8 -5
  90. package/dist/core/self-evolution/verdict.js +4 -7
  91. package/dist/core/templates/skill-templates.d.ts +1 -0
  92. package/dist/core/templates/skill-templates.js +1 -0
  93. package/dist/core/templates/workflow-manifest.js +2 -0
  94. package/dist/core/templates/workflows/learn.d.ts +4 -2
  95. package/dist/core/templates/workflows/learn.js +25 -166
  96. package/dist/core/templates/workflows/self-evolving.d.ts +13 -0
  97. package/dist/core/templates/workflows/self-evolving.js +127 -0
  98. package/dist/core/trajectory/facts.d.ts +16 -0
  99. package/dist/core/trajectory/facts.js +12 -4
  100. package/dist/core/trajectory/skeleton.d.ts +43 -0
  101. package/dist/core/trajectory/skeleton.js +239 -0
  102. package/dist/dashboard/data.d.ts +25 -51
  103. package/dist/dashboard/data.js +68 -180
  104. package/dist/dashboard/react-client.js +458 -503
  105. package/dist/dashboard/react-styles.js +3 -3
  106. package/dist/dashboard/server.js +23 -17
  107. package/dist/ui/ascii-patterns.d.ts +7 -15
  108. package/dist/ui/ascii-patterns.js +123 -54
  109. package/dist/ui/welcome-screen.d.ts +0 -14
  110. package/dist/ui/welcome-screen.js +16 -35
  111. package/package.json +3 -1
  112. package/scripts/code-health.py +1066 -638
  113. package/scripts/slop_rules.yaml +2151 -0
@@ -1,16 +1,19 @@
1
1
  import * as fs from 'node:fs';
2
2
  import * as path from 'node:path';
3
- import * as crypto from 'node:crypto';
4
3
  import fastGlob from 'fast-glob';
5
- import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, generateEvolutionHints, persistLearnHints, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, groupCandidatesByTarget, listCandidates, rankCandidatesForTarget, readPromotedBaselineLoss, buildOptimizationTrajectory, renderTrajectoryBlock, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, VARIANT_ANGLES, makeReplayRunChange, scoreCandidatesByReplay, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, runCanonicalProposerAgent, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, readTemplateVariantManifest, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, selectTemplateVariant, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
4
+ import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
6
5
  import { generateLearnReport } from '../core/learn.js';
7
- import { resolveMetricSource } from '../core/fitness/index.js';
8
- import { validateChangeExists, validateSchemaExists } from './workflow/shared.js';
6
+ import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
7
+ import { validateChangeExists } from './workflow/shared.js';
9
8
  import { readProjectConfig } from '../core/project-config.js';
9
+ import { attachSelfEvolutionEpisodeCommands } from './self-evolution-episode.js';
10
10
  export function registerSelfEvolutionCommand(program) {
11
11
  const cmd = program
12
12
  .command('self-evolution')
13
- .description('Inspect SynergySpec-SelfEvolving Evaluation Lab self-evolution signals for templates, tasks, verification, memory, and tool changes');
13
+ .description('Inspect SynergySpec-SelfEvolving self-evolution signals for templates, tasks, verification, memory, and tool changes');
14
+ // Loop-v2 (self-evolution as in-context RL): the `episode` + `policy`
15
+ // subcommands.
16
+ attachSelfEvolutionEpisodeCommands(cmd);
14
17
  cmd
15
18
  .command('parts')
16
19
  .description('List evolvable parts that can be enabled or disabled per run')
@@ -36,41 +39,6 @@ export function registerSelfEvolutionCommand(program) {
36
39
  console.log(`- ${id}: ${mark} - ${EVOLVABLE_PART_DESCRIPTIONS[id]}`);
37
40
  }
38
41
  });
39
- cmd
40
- .command('templates')
41
- .description('Show selected evolving template variant for an artifact')
42
- .option('--schema <name>', 'schema name', 'spec-driven')
43
- .requiredOption('--artifact <id>', 'artifact id such as proposal, specs, design, or tasks')
44
- .option('--evolve <parts>', 'comma-separated evolvable parts allowed for this run, or all/none')
45
- .option('--evolve-deny <parts>', 'comma-separated evolvable parts disabled for this run')
46
- .option('--json', 'output JSON')
47
- .action(async (options) => {
48
- const projectRoot = process.cwd();
49
- const schemaName = validateSchemaExists(options.schema ?? 'spec-driven', projectRoot);
50
- const switches = parseEvolutionSwitchOptions(options);
51
- if (!isEvolutionPartEnabled(switches, 'template-variants')) {
52
- printDisabled('template-variants', switches, options.json);
53
- return;
54
- }
55
- const selection = selectTemplateVariant({
56
- projectRoot,
57
- schemaName,
58
- artifactId: options.artifact,
59
- });
60
- const manifest = readTemplateVariantManifest(projectRoot);
61
- if (options.json) {
62
- console.log(JSON.stringify({ selection, manifest, switches }, null, 2));
63
- return;
64
- }
65
- if (!selection) {
66
- console.log(`No active project template variant for ${schemaName}/${options.artifact}.`);
67
- return;
68
- }
69
- console.log(`${selection.schema}/${selection.artifact}: ${selection.id}`);
70
- console.log(`Score: ${selection.score.toFixed(2)}`);
71
- console.log(`Path: ${selection.templatePath}`);
72
- console.log(`Reason: ${selection.reason}`);
73
- });
74
42
  cmd
75
43
  .command('memory [query]')
76
44
  .description('Retrieve similar archived change experience')
@@ -218,19 +186,6 @@ export function registerSelfEvolutionCommand(program) {
218
186
  }
219
187
  printCanonicalTargetTable(targets);
220
188
  });
221
- cmd
222
- .command('trajectory <targetId>')
223
- .description('Print the OPTIMIZATION TRAJECTORY block for a canonical target — the scored history of prior candidates (loss, verdict, approach) + the promoted baseline to beat — exactly as the headless proposer sees it. Read-only; the host learn skill calls this before authoring an --from-edits improvement.')
224
- .option('--max-entries <n>', 'cap the number of prior candidates shown (default 6)')
225
- .option('--json', 'output { targetId, baselineLoss, baselineCandidateId, entries } JSON')
226
- .action(async (targetId, options) => {
227
- const result = await runTrajectoryCommand({
228
- targetId,
229
- maxEntries: options.maxEntries !== undefined ? Number(options.maxEntries) : undefined,
230
- json: options.json,
231
- }, { repoRoot: process.cwd() });
232
- process.exitCode = result.exitCode;
233
- });
234
189
  cmd
235
190
  .command('hints <jsonFile...>')
236
191
  .description('Aggregate LearnEvolutionHint JSON files into scored candidate-request groups (no canonical files are written)')
@@ -279,30 +234,6 @@ export function registerSelfEvolutionCommand(program) {
279
234
  const result = await runPromotionReportCommand({ candidateId, write: options.write, json: options.json }, { repoRoot: process.cwd() });
280
235
  process.exitCode = result.exitCode;
281
236
  });
282
- cmd
283
- .command('evolve')
284
- .description('Run the GA outer loop: group candidates by target, score by accumulated fitness (or --replay a corpus), rank/select the best, and generate a human-gated promotion report. Never auto-promotes.')
285
- .option('--target <targetId>', 'restrict the loop to a single canonical target id')
286
- .option('--replay', 'score candidates by replaying a change corpus through baseline + candidate (requires --change); default uses already-accumulated fitness records')
287
- .option('--change <ids...>', 'change ids forming the replay corpus (only used with --replay)')
288
- .option('--write', 'write a promotion-report.md for each selected best candidate (still human-gated; never auto-promotes)')
289
- .option('--mark-outcompeted', 'mark proven sibling variants that lost the ranking to the best (same variant cohort, higher loss) with verdict `outcompeted` so future proposals see them as negative examples (never changes status)')
290
- .option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve (supports all/none)')
291
- .option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
292
- .option('--json', 'output the full RunEvolveOuterLoopResult JSON')
293
- .action(async (options) => {
294
- const result = await runEvolveOuterLoopCommand({
295
- target: options.target,
296
- replay: options.replay,
297
- changeIds: options.change,
298
- write: options.write,
299
- markOutcompeted: options.markOutcompeted,
300
- evolveTarget: options.evolveTarget,
301
- freezeTarget: options.freezeTarget,
302
- json: options.json,
303
- }, { repoRoot: process.cwd() });
304
- process.exitCode = result.exitCode;
305
- });
306
237
  cmd
307
238
  .command('propose-canonical')
308
239
  .description('Convert aggregated learn-stage hints into candidate package directories (proposal only; no canonical file is modified)')
@@ -313,9 +244,7 @@ export function registerSelfEvolutionCommand(program) {
313
244
  .option('--source <source>', `attribution source recorded on each candidate (one of: ${CANONICAL_CANDIDATE_SOURCES.join(', ')}); defaults to 'learn-threshold'`)
314
245
  .option('--dry-run', 'build candidate package contents in memory but do not write to disk')
315
246
  .option('--force', 'no effect in v1; the writer never overwrites existing candidates')
316
- .option('--from-edits <file>', "validate + package candidate edits the HOST agent already wrote (JSON { targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin). Preferred when running inside a host code agent; requires exactly one surviving group")
317
- .option('--agent', 'HEADLESS FALLBACK ONLY (cron/CI with no host agent): spawn the proposer agent to draft the diff. Prefer --from-edits when running inside a host code agent')
318
- .option('--variants <n>', 'draft N competing variant candidates (1-5; default 1) for the surviving group, each on a distinct improvement angle, so the GA outer loop can select the best. Requires --agent.')
247
+ .option('--from-edits <file>', "validate + package candidate edits the HOST agent already wrote (JSON { targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin). The host code agent authors the diff; requires exactly one surviving group")
319
248
  .option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve this run (supports all/none)')
320
249
  .option('--freeze-target <ids>', 'comma-separated canonical target ids frozen this run (supports all/none)')
321
250
  .option('--json', 'output JSON summary')
@@ -336,11 +265,6 @@ export function registerSelfEvolutionCommand(program) {
336
265
  // here we only resolve the file/stdin and shape-validate the payload.
337
266
  let editsInput;
338
267
  if (options.fromEdits) {
339
- if (options.agent) {
340
- process.stderr.write('error: --from-edits and --agent are mutually exclusive\n');
341
- process.exitCode = 2;
342
- return;
343
- }
344
268
  let raw;
345
269
  try {
346
270
  raw =
@@ -369,8 +293,6 @@ export function registerSelfEvolutionCommand(program) {
369
293
  source: options.source,
370
294
  dryRun: options.dryRun,
371
295
  force: options.force,
372
- agent: options.agent,
373
- variants: options.variants !== undefined ? Number(options.variants) : undefined,
374
296
  editsInput,
375
297
  evolveTarget: options.evolveTarget,
376
298
  freezeTarget: options.freezeTarget,
@@ -406,30 +328,6 @@ export function registerSelfEvolutionCommand(program) {
406
328
  const result = await runRejectCommand({ candidateId, reason: options.reason, json: options.json }, { repoRoot: process.cwd() });
407
329
  process.exitCode = result.exitCode;
408
330
  });
409
- cmd
410
- .command('auto-evolve')
411
- .description('ONE-BUTTON self-evolution: run learn -> hints -> propose(--agent) -> gate -> promote for a change, auto-applying the gate-passing winner onto the canonical template (no per-change approval). Honors the per-target switch + oracle freeze; snapshots every change for rollback.')
412
- .requiredOption('--change <names...>', 'one or more completed changes to learn from and evolve (a SINGLE one is enough)')
413
- .option('--no-auto', 'run the full pipeline but stop BEFORE applying (gate + select only)')
414
- .option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
415
- .option('--min-occurrences <n>', 'min occurrences a signal must reach to evolve (default 1; raise + pass several --change to require recurrence across them)')
416
- .option('--threshold-key <key>', 'force-propose only this aggregated signal (bypasses the recurrence trigger)')
417
- .option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve (supports all/none)')
418
- .option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
419
- .option('--json', 'output the full AutoEvolveReport JSON')
420
- .action(async (options) => {
421
- const result = await runAutoEvolve({
422
- changeNames: options.change,
423
- auto: options.auto,
424
- requireProven: options.requireProven,
425
- minOccurrences: options.minOccurrences !== undefined ? Number(options.minOccurrences) : undefined,
426
- thresholdKey: options.thresholdKey,
427
- evolveTarget: options.evolveTarget,
428
- freezeTarget: options.freezeTarget,
429
- json: options.json,
430
- }, { repoRoot: process.cwd() });
431
- process.exitCode = result.exitCode;
432
- });
433
331
  cmd
434
332
  .command('evolve-from-edits')
435
333
  .description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns the proposer; --agent is refused.')
@@ -439,22 +337,74 @@ export function registerSelfEvolutionCommand(program) {
439
337
  .option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
440
338
  .option('--threshold-key <key>', 'when the learn handoff aggregates into several change-type groups for the target, force only this aggregated signal (copy one key from the refusal list)')
441
339
  .option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
340
+ .option('--transcript <path>', 'Explicit transcript .jsonl to grade (bypasses change-window discovery; Claude transcript store only)')
341
+ .option('--session-id <id>', 'Explicit Claude session id to grade (bypasses change-window discovery; Claude transcript store only)')
442
342
  .option('--agent', 'REFUSED: this path is host-authored and never spawns the proposer')
443
343
  .option('--yes', 'required: confirm the non-interactive auto-promote')
444
344
  .option('--json', 'output the full EvolveFromEditsReport JSON')
445
345
  .action(async (options) => {
446
- const result = await runEvolveFromEdits({
447
- fromLearn: options.fromLearn,
448
- evolveTarget: options.evolveTarget,
449
- fromEdits: options.fromEdits,
450
- thresholdKey: options.thresholdKey,
451
- freezeTarget: options.freezeTarget,
452
- requireProven: options.requireProven,
453
- agent: options.agent,
454
- yes: options.yes,
455
- json: options.json,
456
- }, { repoRoot: process.cwd() });
457
- process.exitCode = result.exitCode;
346
+ // USER-TYPED handle flags are validated up front and fail LOUD
347
+ // (exit 1) on a miss — unlike the env-var channel, which keeps the
348
+ // fail-closed refusal semantics inside discovery (empty result, the
349
+ // observed-verified gate refuses). Validated BEFORE the env is mutated
350
+ // below so a bad flag never leaks into the environment.
351
+ const handleError = await validateExplicitTrajectoryHandle({
352
+ projectRoot: process.cwd(),
353
+ transcriptPath: options.transcript,
354
+ sessionId: options.sessionId,
355
+ });
356
+ if (handleError) {
357
+ if (options.json) {
358
+ console.log(JSON.stringify({ error: handleError }, null, 2));
359
+ }
360
+ else {
361
+ console.error(`Error: ${handleError}`);
362
+ }
363
+ process.exitCode = 1;
364
+ return;
365
+ }
366
+ // Explicit trajectory handle: surfaced to the discovery layer via env
367
+ // (the observed-verified gate re-grades the change inside
368
+ // runEvolveFromEdits), kept strictly in the action layer so the
369
+ // injected-generateReport test seam stays byte-identical.
370
+ const prevTranscriptEnv = process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT;
371
+ const prevSessionEnv = process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID;
372
+ if (options.transcript)
373
+ process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT = options.transcript;
374
+ if (options.sessionId)
375
+ process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID = options.sessionId;
376
+ try {
377
+ const result = await runEvolveFromEdits({
378
+ fromLearn: options.fromLearn,
379
+ evolveTarget: options.evolveTarget,
380
+ fromEdits: options.fromEdits,
381
+ thresholdKey: options.thresholdKey,
382
+ freezeTarget: options.freezeTarget,
383
+ requireProven: options.requireProven,
384
+ agent: options.agent,
385
+ yes: options.yes,
386
+ json: options.json,
387
+ }, { repoRoot: process.cwd() });
388
+ process.exitCode = result.exitCode;
389
+ }
390
+ finally {
391
+ if (options.transcript) {
392
+ if (prevTranscriptEnv === undefined) {
393
+ delete process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT;
394
+ }
395
+ else {
396
+ process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT = prevTranscriptEnv;
397
+ }
398
+ }
399
+ if (options.sessionId) {
400
+ if (prevSessionEnv === undefined) {
401
+ delete process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID;
402
+ }
403
+ else {
404
+ process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID = prevSessionEnv;
405
+ }
406
+ }
407
+ }
458
408
  });
459
409
  }
460
410
  /**
@@ -548,37 +498,13 @@ export function parseHostEditsInput(raw) {
548
498
  *
549
499
  * SAFETY:
550
500
  * - Never writes outside `<repoRoot>/.synergyspec-selfevolving/self-evolution/candidates/`.
551
- * - Generation is EITHER the host-agent `--from-edits` path (validate + package)
552
- * OR the headless `--agent` proposer fallback; without either, diff.patch is
553
- * the placeholder for a human to complete.
501
+ * - Generation is the host-agent `--from-edits` path (validate + package); when
502
+ * absent, diff.patch is the placeholder for a human to complete.
554
503
  */
555
504
  export async function runProposeCanonical(args, opts) {
556
505
  const stdout = opts.stdout ?? ((line) => console.log(line));
557
506
  const stderr = opts.stderr ?? ((line) => console.error(line));
558
507
  const now = opts.now ?? (() => new Date());
559
- // Host-authored edits and the headless proposer are mutually exclusive. The
560
- // CLI action also rejects the combo, but mirror it here so both entry points
561
- // share one contract (a programmatic caller can't silently get host-precedence).
562
- if (args.editsInput && args.agent) {
563
- stderr('--from-edits and --agent are mutually exclusive');
564
- return {
565
- exitCode: 2,
566
- proposed: [],
567
- skipped: [],
568
- errors: ['--from-edits and --agent are mutually exclusive'],
569
- };
570
- }
571
- // Population-based generation: clamp variants to 1-5. N>1 requires the headless
572
- // proposer (divergence is prompt-side, one prompt per variant); it is invalid
573
- // with --from-edits (one host payload = one candidate) and pointless for the
574
- // placeholder path (N identical placeholders).
575
- const requestedVariants = Number.isFinite(args.variants) ? Math.trunc(args.variants) : 1;
576
- const variantCount = Math.max(1, Math.min(5, requestedVariants || 1));
577
- if (variantCount > 1 && !args.agent) {
578
- const msg = '--variants > 1 requires --agent (variant divergence is prompt-side; --from-edits carries one payload).';
579
- stderr(msg);
580
- return { exitCode: 2, proposed: [], skipped: [], errors: [msg] };
581
- }
582
508
  const filePaths = args.fromLearn ?? [];
583
509
  // 1) Resolve & validate --target up front (deterministic throw on unknown).
584
510
  if (args.target !== undefined) {
@@ -650,7 +576,7 @@ export async function runProposeCanonical(args, opts) {
650
576
  ? { ...hint, affectedTargetId: pinId, thresholdKey: `${pinId}:${hint.proposedChangeType}` }
651
577
  : hint;
652
578
  });
653
- // 4) Aggregate. `aggregationOptions` lets auto-evolve act on a single change
579
+ // 4) Aggregate. `aggregationOptions` lets the host evolve-from-edits path act on a single change
654
580
  // (one forward pass = one loss); omitted = conservative cross-change defaults.
655
581
  const allGroups = aggregateLearnEvolutionHints(scopedHints, args.aggregationOptions);
656
582
  // 5) Filter.
@@ -791,170 +717,112 @@ export async function runProposeCanonical(args, opts) {
791
717
  const evalPlanSummary = evalPlanMd;
792
718
  const rationaleSummary = rationaleMd;
793
719
  const riskSummary = riskReportMd;
794
- // Population-based generation: draft `variantCount` competing candidates from
795
- // this one group, each on a distinct improvement angle, sharing a variantGroup
796
- // id so the GA outer loop can later mark the losers `outcompeted`. The default
797
- // (variantCount === 1) sets no variantGroup/angle — byte-identical to the
798
- // single-candidate path.
799
- const variantGroup = variantCount > 1
800
- ? `vg-${crypto
801
- .createHash('sha256')
802
- .update(group.thresholdKey)
803
- .digest('hex')
804
- .slice(0, 8)}-${now().toISOString().replace(/[^0-9]/g, '').slice(0, 14)}`
805
- : undefined;
806
- for (let variantIndex = 0; variantIndex < variantCount; variantIndex++) {
807
- const candidateId = generateCandidateId();
808
- const ts = now().toISOString();
809
- const variantAngle = variantCount > 1 ? VARIANT_ANGLES[variantIndex % VARIANT_ANGLES.length] : undefined;
810
- const candidate = {
811
- id: candidateId,
812
- createdAt: ts,
813
- updatedAt: ts,
814
- source,
815
- sourceHints: [...group.hintIds],
816
- sourceAggregatedKey: group.thresholdKey,
817
- targetIds,
818
- changedFiles: [],
819
- status: 'draft',
820
- expectedBenefit,
821
- riskLevel: group.dominantRisk,
822
- rollbackPlan: 'Delete candidate directory or transition candidate to rejected. Canonical files are unchanged because no diff is applied at proposal time.',
823
- proposalSummary,
824
- evalPlanSummary,
825
- rationaleSummary,
826
- riskSummary,
827
- ...(variantGroup ? { variantGroup } : {}),
828
- ...(variantAngle ? { variantAngle } : {}),
829
- };
830
- let diffPatch = '# No diff yet. Apply candidate change manually before invoking static gate.\n';
831
- let rationaleForPkg = rationaleMd;
832
- // Structured full-file edits (persisted as edits.json so the promote/apply
833
- // step can write the new content deterministically). Sourced EITHER from the
834
- // host code agent (`--from-edits`, preferred) OR the headless proposer agent.
835
- let agentEdits;
836
- // Generate a REAL diff for this group's target (proposal-only). Two sources,
837
- // one validation + packaging contract:
838
- // • HOST path (`--from-edits`): the host agent already wrote the new file;
839
- // the CLI only validates (frozen + target scope) and packages it.
840
- // • HEADLESS FALLBACK (`--agent`): spawn the proposer agent (cron/CI with
841
- // no host agent).
842
- // Both are skipped under --dry-run so a dry run never touches disk (resolving
843
- // local files can MATERIALIZE a template default). Edits are validated against
844
- // the target's resolved LOCAL files and rejected if they touch a frozen/gate-
845
- // defining file. Any failure (no-op, invalid output, missing binary) falls
846
- // back to the placeholder so the candidate is still written for a human.
847
- const useHostEdits = !!args.editsInput && targetIds.length > 0 && !args.dryRun;
848
- const useAgentFallback = !args.editsInput && !!args.agent && targetIds.length > 0 && !args.dryRun;
849
- if (useHostEdits || useAgentFallback) {
850
- const agentTarget = lookupCanonicalTarget(targetIds[0]);
851
- if (agentTarget) {
852
- try {
853
- // Resolve to the user's LOCAL editable files (installed SKILL.md, or a
854
- // materialized project-local template/schema) so evolution writes to
855
- // THEIR repo — no rebuild/republish. Dev repo resolves to source.
856
- const resolved = await resolveTargetLocalFiles(targetIds[0], opts.repoRoot);
857
- if (resolved.files.length === 0) {
858
- throw new Error(`no local editable file for target ${targetIds[0]} in this repo`);
859
- }
860
- const allowedRel = resolved.files.map((f) => f.relPath);
861
- const currentFiles = resolved.files.map((f) => ({
862
- relPath: f.relPath,
863
- content: f.content,
864
- }));
865
- // Close the feedback loop into the proposer (OPRO/AlphaEvolve-style):
866
- // feed the current promoted-baseline loss + a scored history of prior
867
- // candidates for this target so the agent can beat the best prior
868
- // attempt and avoid rejected/high-loss approaches. Headless-agent path
869
- // only (the host --from-edits path has no prompt). Best-effort: any
870
- // failure degrades to no trajectory and never blocks proposing. For
871
- // N>1 variants this is what makes siblings diverge from prior attempts.
872
- let trajectoryContext;
873
- let baselineLoss = null;
874
- if (useAgentFallback) {
875
- try {
876
- const baseline = await readPromotedBaselineLoss(layout, targetIds[0]);
877
- baselineLoss = baseline?.meanLoss ?? null;
878
- const traj = await buildOptimizationTrajectory(layout, targetIds[0]);
879
- trajectoryContext = renderTrajectoryBlock(traj, {
880
- baselineLoss,
881
- baselineCandidateId: baseline?.candidateId,
882
- });
883
- }
884
- catch {
885
- // best-effort scored history; never block proposing.
886
- }
887
- }
888
- const out = useHostEdits
889
- ? packageHostEdits(args.editsInput, allowedRel, currentFiles, group, targetIds[0])
890
- : await runCanonicalProposerAgent({
891
- group,
892
- groupHints,
893
- // Bind the editable surface to the resolved LOCAL files, not the
894
- // registry's package-source paths.
895
- target: { ...agentTarget, files: allowedRel },
896
- currentFiles,
897
- trajectoryContext,
898
- baselineLoss,
899
- variantAngle,
900
- spawn: opts.proposerSpawn,
901
- binary: opts.proposerBinary,
902
- });
903
- diffPatch = out.diffPatch + '\n';
904
- candidate.changedFiles = out.changedFiles;
905
- agentEdits = out.edits;
906
- rationaleForPkg = `${out.rationale}\n\n---\n\n${rationaleMd}`;
907
- }
908
- catch (err) {
909
- stderr(`${useHostEdits ? 'host edits' : 'proposer agent'} did not produce a diff for ${candidateId}; keeping placeholder: ${err instanceof Error ? err.message : String(err)}`);
720
+ const candidateId = generateCandidateId();
721
+ const ts = now().toISOString();
722
+ const candidate = {
723
+ id: candidateId,
724
+ createdAt: ts,
725
+ updatedAt: ts,
726
+ source,
727
+ sourceHints: [...group.hintIds],
728
+ sourceAggregatedKey: group.thresholdKey,
729
+ targetIds,
730
+ changedFiles: [],
731
+ status: 'draft',
732
+ expectedBenefit,
733
+ riskLevel: group.dominantRisk,
734
+ rollbackPlan: 'Delete candidate directory or transition candidate to rejected. Canonical files are unchanged because no diff is applied at proposal time.',
735
+ proposalSummary,
736
+ evalPlanSummary,
737
+ rationaleSummary,
738
+ riskSummary,
739
+ };
740
+ let diffPatch = '# No diff yet. Apply candidate change manually before invoking static gate.\n';
741
+ let rationaleForPkg = rationaleMd;
742
+ // Structured full-file edits (persisted as edits.json so the promote/apply
743
+ // step can write the new content deterministically). Sourced from the host
744
+ // code agent (`--from-edits`), the host-authored channel.
745
+ let agentEdits;
746
+ // Generate a REAL diff for this group's target (proposal-only) via the HOST
747
+ // path (`--from-edits`): the host code agent already wrote the new file; the
748
+ // CLI only validates (frozen + target scope) and packages it. Skipped under
749
+ // --dry-run so a dry run never touches disk (resolving local files can
750
+ // MATERIALIZE a template default). Edits are validated against the target's
751
+ // resolved LOCAL files and rejected if they touch a frozen/gate-defining
752
+ // file. Any failure (no-op, invalid edits) falls back to the placeholder so
753
+ // the candidate is still written for a human.
754
+ const useHostEdits = !!args.editsInput && targetIds.length > 0 && !args.dryRun;
755
+ if (useHostEdits) {
756
+ const agentTarget = lookupCanonicalTarget(targetIds[0]);
757
+ if (agentTarget) {
758
+ try {
759
+ // Resolve to the user's LOCAL editable files (installed SKILL.md, or a
760
+ // materialized project-local template/schema) so evolution writes to
761
+ // THEIR repo — no rebuild/republish. Dev repo resolves to source.
762
+ const resolved = await resolveTargetLocalFiles(targetIds[0], opts.repoRoot);
763
+ if (resolved.files.length === 0) {
764
+ throw new Error(`no local editable file for target ${targetIds[0]} in this repo`);
910
765
  }
766
+ const allowedRel = resolved.files.map((f) => f.relPath);
767
+ const currentFiles = resolved.files.map((f) => ({
768
+ relPath: f.relPath,
769
+ content: f.content,
770
+ }));
771
+ const out = packageHostEdits(args.editsInput, allowedRel, currentFiles, group, targetIds[0]);
772
+ diffPatch = out.diffPatch + '\n';
773
+ candidate.changedFiles = out.changedFiles;
774
+ agentEdits = out.edits;
775
+ rationaleForPkg = `${out.rationale}\n\n---\n\n${rationaleMd}`;
911
776
  }
912
- }
913
- // Tell the operator when a supplied --from-edits payload was NOT applied to
914
- // this candidate (so a placeholder is silently written instead of their edit).
915
- if (args.editsInput && !useHostEdits) {
916
- stderr(`--from-edits payload not applied to ${candidateId}: ${args.dryRun
917
- ? 'host edits are not validated under --dry-run'
918
- : 'surviving group has no resolved canonical target'}.`);
919
- }
920
- const pkg = {
921
- candidate,
922
- proposalMd,
923
- diffPatch,
924
- rationaleMd: rationaleForPkg,
925
- evalPlanMd,
926
- riskReportMd,
927
- ...(agentEdits && agentEdits.length > 0 ? { edits: agentEdits } : {}),
928
- };
929
- if (args.dryRun) {
930
- const dryRunPath = path.join(layout.baseDir, candidateId);
931
- proposed.push({
932
- candidateId,
933
- path: dryRunPath,
934
- targetIds,
935
- riskLevel: candidate.riskLevel,
936
- source,
937
- });
938
- if (!args.json) {
939
- stdout(`[dry-run] would propose ${candidateId} (target=${targetIds.join(',') || '(none)'}, risk=${candidate.riskLevel})`);
777
+ catch (err) {
778
+ stderr(`host edits did not produce a diff for ${candidateId}; keeping placeholder: ${err instanceof Error ? err.message : String(err)}`);
940
779
  }
941
- continue;
942
780
  }
943
- try {
944
- const written = await writeCandidatePackage(layout, pkg);
945
- proposed.push({
946
- candidateId,
947
- path: written.candidateDir,
948
- targetIds,
949
- riskLevel: candidate.riskLevel,
950
- source,
951
- });
952
- }
953
- catch (err) {
954
- const message = err instanceof Error ? err.message : String(err);
955
- errors.push(`failed to write candidate ${candidateId}: ${message}`);
956
- stderr(`Failed to write candidate ${candidateId}: ${message}`);
781
+ }
782
+ // Tell the operator when a supplied --from-edits payload was NOT applied to
783
+ // this candidate (so a placeholder is silently written instead of their edit).
784
+ if (args.editsInput && !useHostEdits) {
785
+ stderr(`--from-edits payload not applied to ${candidateId}: ${args.dryRun
786
+ ? 'host edits are not validated under --dry-run'
787
+ : 'surviving group has no resolved canonical target'}.`);
788
+ }
789
+ const pkg = {
790
+ candidate,
791
+ proposalMd,
792
+ diffPatch,
793
+ rationaleMd: rationaleForPkg,
794
+ evalPlanMd,
795
+ riskReportMd,
796
+ ...(agentEdits && agentEdits.length > 0 ? { edits: agentEdits } : {}),
797
+ };
798
+ if (args.dryRun) {
799
+ const dryRunPath = path.join(layout.baseDir, candidateId);
800
+ proposed.push({
801
+ candidateId,
802
+ path: dryRunPath,
803
+ targetIds,
804
+ riskLevel: candidate.riskLevel,
805
+ source,
806
+ });
807
+ if (!args.json) {
808
+ stdout(`[dry-run] would propose ${candidateId} (target=${targetIds.join(',') || '(none)'}, risk=${candidate.riskLevel})`);
957
809
  }
810
+ continue;
811
+ }
812
+ try {
813
+ const written = await writeCandidatePackage(layout, pkg);
814
+ proposed.push({
815
+ candidateId,
816
+ path: written.candidateDir,
817
+ targetIds,
818
+ riskLevel: candidate.riskLevel,
819
+ source,
820
+ });
821
+ }
822
+ catch (err) {
823
+ const message = err instanceof Error ? err.message : String(err);
824
+ errors.push(`failed to write candidate ${candidateId}: ${message}`);
825
+ stderr(`Failed to write candidate ${candidateId}: ${message}`);
958
826
  }
959
827
  }
960
828
  // 7) Build result + output.
@@ -988,7 +856,7 @@ export async function runProposeCanonical(args, opts) {
988
856
  }
989
857
  /**
990
858
  * Programmatic entrypoint for `self-evolution promote <id>` — the close-the-loop
991
- * apply/rollback. Exported so tests + auto-evolve can drive it directly.
859
+ * apply/rollback. Exported so tests + the host evolve-from-edits path can drive it directly.
992
860
  */
993
861
  export async function runPromoteCommand(args, opts) {
994
862
  const stdout = opts.stdout ?? ((l) => console.log(l));
@@ -1135,314 +1003,6 @@ export async function runRejectCommand(args, opts) {
1135
1003
  return { exitCode: 1, error: message };
1136
1004
  }
1137
1005
  }
1138
- /**
1139
- * Programmatic entrypoint for `self-evolution trajectory <targetId>` — a
1140
- * READ-ONLY view of the scored optimization-trajectory block the headless
1141
- * proposer receives, so a HOST code agent (which authors edits via
1142
- * `--from-edits` and never sees that prompt) can read the same prior-candidate
1143
- * loss/verdict history before authoring. Reuses the exact builder/renderer the
1144
- * proposer uses. Never mutates anything.
1145
- */
1146
- export async function runTrajectoryCommand(args, opts) {
1147
- const stdout = opts.stdout ?? ((l) => console.log(l));
1148
- const stderr = opts.stderr ?? ((l) => console.error(l));
1149
- if (!lookupCanonicalTarget(args.targetId)) {
1150
- const message = `Unknown canonical target: ${args.targetId}`;
1151
- if (args.json)
1152
- stdout(JSON.stringify({ error: message }, null, 2));
1153
- else
1154
- stderr(message);
1155
- return { exitCode: 1, error: message };
1156
- }
1157
- const layout = resolveCandidateRepo(opts.repoRoot);
1158
- try {
1159
- const baseline = await readPromotedBaselineLoss(layout, args.targetId);
1160
- const maxEntries = args.maxEntries !== undefined && Number.isFinite(args.maxEntries) && args.maxEntries > 0
1161
- ? Math.trunc(args.maxEntries)
1162
- : undefined;
1163
- const entries = await buildOptimizationTrajectory(layout, args.targetId, maxEntries !== undefined ? { maxEntries } : undefined);
1164
- if (args.json) {
1165
- stdout(JSON.stringify({
1166
- targetId: args.targetId,
1167
- baselineLoss: baseline?.meanLoss ?? null,
1168
- baselineCandidateId: baseline?.candidateId ?? null,
1169
- entries,
1170
- }, null, 2));
1171
- return { exitCode: 0 };
1172
- }
1173
- const block = renderTrajectoryBlock(entries, {
1174
- baselineLoss: baseline?.meanLoss ?? null,
1175
- baselineCandidateId: baseline?.candidateId,
1176
- });
1177
- if (block.trim().length === 0) {
1178
- stdout(`No prior candidates with signal for ${args.targetId} yet.`);
1179
- }
1180
- else {
1181
- stdout(block);
1182
- }
1183
- return { exitCode: 0 };
1184
- }
1185
- catch (err) {
1186
- const message = err instanceof Error ? err.message : String(err);
1187
- if (args.json)
1188
- stdout(JSON.stringify({ error: message }, null, 2));
1189
- else
1190
- stderr(`trajectory failed: ${message}`);
1191
- return { exitCode: 1, error: message };
1192
- }
1193
- }
1194
- /**
1195
- * ONE-BUTTON auto-evolve: learn → hints → propose(--agent) → gate → promote, in
1196
- * one motion. Auto-applies the gate-passing winner per target onto the canonical
1197
- * template (no per-change human approval), honoring the per-target switch + the
1198
- * oracle/gate freeze, and snapshotting every write for rollback.
1199
- *
1200
- * Exported + fully injectable (proposer spawn, clock, io) so it is unit-testable
1201
- * without a real `claude` binary.
1202
- */
1203
- export async function runAutoEvolve(args, opts) {
1204
- const stdout = opts.stdout ?? ((l) => console.log(l));
1205
- const stderr = opts.stderr ?? ((l) => console.error(l));
1206
- const now = opts.now ?? (() => new Date());
1207
- const autoPromote = args.auto !== false; // default true
1208
- const layout = resolveCandidateRepo(opts.repoRoot);
1209
- const policy = resolveTargetEvolutionPolicy({
1210
- config: readProjectConfig(opts.repoRoot),
1211
- evolveTarget: args.evolveTarget,
1212
- freezeTarget: args.freezeTarget,
1213
- });
1214
- const report = {
1215
- exitCode: 0,
1216
- changeNames: args.changeNames,
1217
- loss: null,
1218
- hintCount: 0,
1219
- hintsPaths: [],
1220
- proposed: [],
1221
- gated: [],
1222
- promoted: [],
1223
- skipped: [],
1224
- };
1225
- // Stage 1 — LEARN (backward pass) over EACH change: a single change is enough,
1226
- // and several aggregate a recurring signal across them. Each change's hints are
1227
- // persisted; a failed change is skipped, not fatal.
1228
- const losses = [];
1229
- const healthSignals = [];
1230
- const hintsPaths = [];
1231
- let totalHints = 0;
1232
- for (const changeName of args.changeNames) {
1233
- let learnReport;
1234
- try {
1235
- learnReport = await generateLearnReport({ projectRoot: opts.repoRoot, changeName });
1236
- }
1237
- catch (err) {
1238
- report.skipped.push({
1239
- candidateId: `learn:${changeName}`,
1240
- reason: `learn failed: ${err instanceof Error ? err.message : String(err)}`,
1241
- });
1242
- continue;
1243
- }
1244
- const l = learnReport.fitnessSample?.loss?.loss;
1245
- if (typeof l === 'number')
1246
- losses.push(l);
1247
- const h = learnReport.fitnessSample?.healthSignal;
1248
- if (typeof h === 'number')
1249
- healthSignals.push(h);
1250
- const hints = generateEvolutionHints(learnReport, policy);
1251
- totalHints += hints.length;
1252
- if (hints.length === 0)
1253
- continue;
1254
- hintsPaths.push(await persistLearnHints({ projectRoot: opts.repoRoot, changeName, hints, now }));
1255
- }
1256
- report.loss = losses.length > 0 ? losses.reduce((a, b) => a + b, 0) / losses.length : null;
1257
- // Mean RAW health signal across the change(s); null when none were measured
1258
- // (stub source / no signal) ⇒ the health gate below cannot fire.
1259
- const meanHealth = healthSignals.length > 0
1260
- ? healthSignals.reduce((a, b) => a + b, 0) / healthSignals.length
1261
- : null;
1262
- report.healthPenalty = meanHealth;
1263
- const healthBaseline = await readHealthBaseline(opts.repoRoot);
1264
- report.hintCount = totalHints;
1265
- report.hintsPaths = hintsPaths;
1266
- if (hintsPaths.length === 0) {
1267
- finishAutoEvolve(report, args.json, stdout, stderr, 'No evolution hints from the change(s) — nothing to evolve.');
1268
- return report;
1269
- }
1270
- // Stage 2 — PROPOSE real candidate diffs via the proposer agent.
1271
- // A single change is enough (minOccurrences defaults to 1). Raise it to require
1272
- // a signal to RECUR across the provided changes before it evolves — neither
1273
- // single-change nor multi-change is forced.
1274
- const minOcc = args.minOccurrences && args.minOccurrences > 0 ? args.minOccurrences : 1;
1275
- const proposeResult = await runProposeCanonical({
1276
- fromLearn: hintsPaths,
1277
- agent: true,
1278
- thresholdKey: args.thresholdKey,
1279
- aggregationOptions: {
1280
- criticalMinOccurrences: minOcc,
1281
- highMinOccurrences: minOcc,
1282
- mediumOrLowMinOccurrences: minOcc,
1283
- minDiversityForLowSeverity: minOcc,
1284
- },
1285
- evolveTarget: args.evolveTarget,
1286
- freezeTarget: args.freezeTarget,
1287
- }, {
1288
- repoRoot: opts.repoRoot,
1289
- now,
1290
- proposerSpawn: opts.proposerSpawn,
1291
- proposerBinary: opts.proposerBinary,
1292
- stdout: () => { },
1293
- stderr: (l) => stderr(l),
1294
- });
1295
- report.proposed = proposeResult.proposed.map((p) => p.candidateId);
1296
- // Stage 4 + 5 — GATE then SELECT/PROMOTE, one target at a time.
1297
- const handledTargets = new Set();
1298
- for (const candidateId of report.proposed) {
1299
- let gate;
1300
- try {
1301
- gate = await runStaticCandidateGate(layout, candidateId, {
1302
- applyTransition: true,
1303
- targetPolicy: policy,
1304
- });
1305
- }
1306
- catch (err) {
1307
- report.skipped.push({
1308
- candidateId,
1309
- reason: `gate error: ${err instanceof Error ? err.message : String(err)}`,
1310
- });
1311
- continue;
1312
- }
1313
- report.gated.push({ candidateId, passed: gate.passed });
1314
- if (!gate.passed) {
1315
- report.skipped.push({
1316
- candidateId,
1317
- reason: 'static gate failed (placeholder/no-op diff or frozen target)',
1318
- });
1319
- await recordVerdictBestEffort(layout, candidateId, {
1320
- verdict: 'gate-failed',
1321
- at: now().toISOString(),
1322
- reason: 'static gate failed (placeholder/no-op diff or frozen target)',
1323
- lossAtDecision: (await readCandidateFitness(layout, candidateId)).meanLoss,
1324
- decidedBy: 'static-gate',
1325
- });
1326
- continue;
1327
- }
1328
- let pkg;
1329
- try {
1330
- pkg = await readCandidatePackage(layout, candidateId);
1331
- }
1332
- catch (err) {
1333
- report.skipped.push({
1334
- candidateId,
1335
- reason: `read failed: ${err instanceof Error ? err.message : String(err)}`,
1336
- });
1337
- continue;
1338
- }
1339
- const targetId = pkg.candidate.targetIds[0];
1340
- if (!targetId) {
1341
- report.skipped.push({ candidateId, reason: 'candidate has no canonical target id' });
1342
- continue;
1343
- }
1344
- if (handledTargets.has(targetId)) {
1345
- report.skipped.push({ candidateId, reason: `target ${targetId} already handled this run` });
1346
- continue;
1347
- }
1348
- handledTargets.add(targetId);
1349
- const fitness = await readCandidateFitness(layout, candidateId);
1350
- const decision = shouldAutoPromote({
1351
- gatePassed: true,
1352
- targetEvolvable: isCanonicalTargetEvolvable(targetId, policy),
1353
- accumulatedCount: fitness.count,
1354
- meanLoss: fitness.meanLoss,
1355
- baselineLoss: report.loss,
1356
- requireProvenImprovement: args.requireProven === true,
1357
- healthPenalty: meanHealth,
1358
- baselineHealthPenalty: healthBaseline?.healthPenalty ?? null,
1359
- });
1360
- if (!autoPromote) {
1361
- report.skipped.push({
1362
- candidateId,
1363
- reason: `--no-auto: gated & ready (would promote: ${decision.promote ? 'yes' : `no — ${decision.reason}`})`,
1364
- });
1365
- continue;
1366
- }
1367
- if (!decision.promote) {
1368
- report.skipped.push({ candidateId, reason: decision.reason });
1369
- await recordVerdictBestEffort(layout, candidateId, {
1370
- verdict: 'declined',
1371
- at: now().toISOString(),
1372
- reason: decision.reason,
1373
- lossAtDecision: fitness.meanLoss,
1374
- baselineLoss: report.loss,
1375
- decidedBy: 'auto-evolve',
1376
- });
1377
- continue;
1378
- }
1379
- try {
1380
- const applied = await applyCandidatePromotion(layout, candidateId, {
1381
- repoRoot: opts.repoRoot,
1382
- policy,
1383
- });
1384
- report.promoted.push({
1385
- candidateId,
1386
- targetIds: applied.targetIds,
1387
- files: applied.appliedFiles.map((f) => f.file),
1388
- });
1389
- await recordVerdictBestEffort(layout, candidateId, {
1390
- verdict: 'promoted',
1391
- at: now().toISOString(),
1392
- reason: `auto-evolve: ${decision.reason}`,
1393
- lossAtDecision: fitness.meanLoss,
1394
- baselineLoss: report.loss,
1395
- decidedBy: 'auto-evolve',
1396
- });
1397
- }
1398
- catch (err) {
1399
- report.skipped.push({
1400
- candidateId,
1401
- reason: `promote failed: ${err instanceof Error ? err.message : String(err)}`,
1402
- });
1403
- }
1404
- }
1405
- // Record the accepted health as the new per-repo baseline (best-effort) when
1406
- // this run promoted something and had a real health signal. The next run's
1407
- // health gate compares against this value.
1408
- if (report.promoted.length > 0 && meanHealth != null) {
1409
- await writeHealthBaseline(opts.repoRoot, {
1410
- healthPenalty: meanHealth,
1411
- updatedAt: now().toISOString(),
1412
- sourceChange: args.changeNames.join(','),
1413
- });
1414
- }
1415
- finishAutoEvolve(report, args.json, stdout, stderr);
1416
- return report;
1417
- }
1418
- function finishAutoEvolve(report, json, stdout, stderr, note) {
1419
- if (json) {
1420
- stdout(JSON.stringify(report, null, 2));
1421
- return;
1422
- }
1423
- if (report.error) {
1424
- stderr(`auto-evolve failed: ${report.error}`);
1425
- return;
1426
- }
1427
- stdout(`Auto-evolve: ${report.changeNames.join(', ')}`);
1428
- stdout(` loss (functional⊕health): ${report.loss === null ? 'n/a' : report.loss.toFixed(3)}`);
1429
- stdout(` hints: ${report.hintCount}`);
1430
- if (note) {
1431
- stdout(` ${note}`);
1432
- return;
1433
- }
1434
- stdout(` proposed: ${report.proposed.length} | gate-pass: ${report.gated.filter((g) => g.passed).length} | PROMOTED: ${report.promoted.length} | skipped: ${report.skipped.length}`);
1435
- for (const p of report.promoted) {
1436
- stdout(` ✓ PROMOTED ${p.candidateId} -> [${p.targetIds.join(', ')}]: ${p.files.join(', ')}`);
1437
- }
1438
- for (const s of report.skipped) {
1439
- stdout(` · skipped ${s.candidateId}: ${s.reason}`);
1440
- }
1441
- if (report.promoted.length > 0) {
1442
- stdout(' Rebuild + republish to ship the evolved template(s).');
1443
- stdout(' Revert any promotion with: self-evolution promote <id> --rollback');
1444
- }
1445
- }
1446
1006
  /**
1447
1007
  * HOST-AUTHORED one-button evolve. The single non-interactive
1448
1008
  * host-authored-edit → gate → observed-verified promote command.
@@ -1504,7 +1064,7 @@ export async function runEvolveFromEdits(args, opts) {
1504
1064
  }
1505
1065
  return report;
1506
1066
  };
1507
- // Non-interactive contract: --yes is required (mirrors auto-evolve's one-button
1067
+ // Non-interactive contract: --yes is required (one-button host-authored
1508
1068
  // confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
1509
1069
  if (args.agent) {
1510
1070
  return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.', false);
@@ -2122,173 +1682,6 @@ export async function runPromotionReportCommand(args, opts) {
2122
1682
  }
2123
1683
  return { exitCode: 0, report };
2124
1684
  }
2125
- /**
2126
- * Programmatic entrypoint for `self-evolution evolve` — the GA outer loop.
2127
- *
2128
- * Chains the previously-inert pieces into one live pass:
2129
- * groupCandidatesByTarget → (optional replay scoring that APPENDS fitness)
2130
- * → rankCandidatesForTarget → select best → human-gated promotion report.
2131
- *
2132
- * Invariants: frozen targets (per the resolved policy) are skipped; promotion
2133
- * is NEVER applied here (the report keeps its human-review gate); the oracle is
2134
- * never touched (replay only runs tests).
2135
- */
2136
- export async function runEvolveOuterLoopCommand(args, opts) {
2137
- const stdout = opts.stdout ?? ((line) => console.log(line));
2138
- const stderr = opts.stderr ?? ((line) => console.error(line));
2139
- const layout = resolveCandidateRepo(opts.repoRoot);
2140
- const config = readProjectConfig(opts.repoRoot);
2141
- const policy = resolveTargetEvolutionPolicy({
2142
- config,
2143
- evolveTarget: args.evolveTarget,
2144
- freezeTarget: args.freezeTarget,
2145
- });
2146
- const at = args.at ?? new Date().toISOString();
2147
- let byTarget;
2148
- try {
2149
- byTarget = await groupCandidatesByTarget(layout);
2150
- }
2151
- catch (err) {
2152
- const message = err instanceof Error ? err.message : String(err);
2153
- if (args.json)
2154
- stdout(JSON.stringify({ error: message }, null, 2));
2155
- else
2156
- stderr(message);
2157
- return { exitCode: 1, targets: [], error: message };
2158
- }
2159
- let targetIds = [...byTarget.keys()].sort();
2160
- if (args.target)
2161
- targetIds = targetIds.filter((t) => t === args.target);
2162
- const replayMode = args.replay === true && (args.changeIds?.length ?? 0) > 0;
2163
- if (args.replay === true && !replayMode) {
2164
- stderr('--replay requires at least one --change <id>; falling back to accumulated fitness');
2165
- }
2166
- const runChange = args.runChange ??
2167
- makeReplayRunChange({ repoRoot: opts.repoRoot, healthSource: resolveMetricSource(config) });
2168
- const summaries = [];
2169
- for (const targetId of targetIds) {
2170
- const candidateIds = byTarget.get(targetId) ?? [];
2171
- if (!isCanonicalTargetEvolvable(targetId, policy)) {
2172
- stdout(`target ${targetId}: frozen by policy — skipped (${candidateIds.length} candidate(s))`);
2173
- summaries.push({ targetId, candidateIds, frozen: true, ranked: [], best: null });
2174
- continue;
2175
- }
2176
- let scored;
2177
- if (replayMode) {
2178
- scored = await scoreCandidatesByReplay({
2179
- layout,
2180
- candidateIds,
2181
- changeIds: args.changeIds,
2182
- runChange,
2183
- at,
2184
- log: (line) => stdout(` ${line}`),
2185
- });
2186
- }
2187
- const ranked = await rankCandidatesForTarget(layout, candidateIds, targetId);
2188
- const best = ranked.length > 0 ? ranked[0].candidateId : null;
2189
- let promotionReportPath;
2190
- if (best && args.write) {
2191
- // Human-gated: writes the report + records its path; NEVER transitions
2192
- // status / promotes. The report retains its "human review required" gate.
2193
- await generatePromotionReport(layout, best, { write: true });
2194
- promotionReportPath = path.join(layout.baseDir, best, 'promotion-report.md');
2195
- }
2196
- // Mark sibling-variant losers `outcompeted` (advisory metadata; status is
2197
- // never changed). A loser is a PROVEN candidate (meanLoss !== null) in the
2198
- // SAME variantGroup as `best` with a strictly higher meanLoss. The
2199
- // optimization-trajectory block then renders them as negative examples.
2200
- let outcompeted;
2201
- if (args.markOutcompeted && best) {
2202
- const bestRow = ranked.find((r) => r.candidateId === best);
2203
- if (bestRow && bestRow.meanLoss !== null) {
2204
- const metas = await listCandidates(layout, { targetId });
2205
- const vgById = new Map(metas.map((c) => [c.id, c.variantGroup]));
2206
- const bestVg = vgById.get(best);
2207
- if (bestVg) {
2208
- outcompeted = [];
2209
- for (const r of ranked) {
2210
- if (r.candidateId === best || r.meanLoss === null)
2211
- continue;
2212
- if (vgById.get(r.candidateId) !== bestVg)
2213
- continue;
2214
- if (r.meanLoss > bestRow.meanLoss) {
2215
- await recordVerdictBestEffort(layout, r.candidateId, {
2216
- verdict: 'outcompeted',
2217
- at,
2218
- reason: `lost GA ranking to ${best} (meanLoss ${r.meanLoss.toFixed(3)} vs ${bestRow.meanLoss.toFixed(3)})`,
2219
- lossAtDecision: r.meanLoss,
2220
- baselineLoss: bestRow.meanLoss,
2221
- decidedBy: 'evolve-outer-loop',
2222
- });
2223
- outcompeted.push(r.candidateId);
2224
- }
2225
- }
2226
- }
2227
- }
2228
- }
2229
- summaries.push({
2230
- targetId,
2231
- candidateIds,
2232
- frozen: false,
2233
- scored,
2234
- ranked,
2235
- best,
2236
- promotionReportPath,
2237
- ...(outcompeted ? { outcompeted } : {}),
2238
- });
2239
- }
2240
- if (args.json) {
2241
- stdout(JSON.stringify({ exitCode: 0, targets: summaries }, null, 2));
2242
- }
2243
- else {
2244
- stdout(renderEvolveOuterLoopSummary(summaries, { replayMode, write: args.write === true }));
2245
- }
2246
- return { exitCode: 0, targets: summaries };
2247
- }
2248
- function renderEvolveOuterLoopSummary(targets, ctx) {
2249
- const lines = [];
2250
- lines.push('# GA outer loop');
2251
- lines.push(`mode: ${ctx.replayMode ? 'replay (corpus scoring)' : 'accumulated fitness'} · promotion: human-gated (never auto-promoted)`);
2252
- if (targets.length === 0) {
2253
- lines.push('', 'No candidates found.');
2254
- return lines.join('\n');
2255
- }
2256
- for (const t of targets) {
2257
- lines.push('', `## ${t.targetId} (${t.candidateIds.length} candidate(s))`);
2258
- if (t.frozen) {
2259
- lines.push('- frozen by policy — skipped');
2260
- continue;
2261
- }
2262
- if (t.scored) {
2263
- const ok = t.scored.filter((s) => !s.error).length;
2264
- const skipped = t.scored.length - ok;
2265
- lines.push(`- replay: scored ${ok}, skipped ${skipped}`);
2266
- for (const s of t.scored.filter((x) => x.error)) {
2267
- lines.push(` - skipped ${s.candidateId}: ${s.error}`);
2268
- }
2269
- }
2270
- for (const r of t.ranked) {
2271
- const proven = r.meanLoss !== null;
2272
- const detail = proven
2273
- ? `meanLoss=${r.meanLoss.toFixed(4)} meanPass=${(r.meanPassRate ?? 0).toFixed(4)} n=${r.count} trend=${r.trend}`
2274
- : 'unproven (no fitness records yet)';
2275
- lines.push(`- ${r.candidateId === t.best ? '★' : ' '} ${r.candidateId}: ${detail}`);
2276
- }
2277
- if (t.best) {
2278
- lines.push(`- best: ${t.best}`);
2279
- if (ctx.write && t.promotionReportPath) {
2280
- lines.push(`- promotion report (human review required): ${t.promotionReportPath}`);
2281
- }
2282
- if (t.outcompeted && t.outcompeted.length > 0) {
2283
- lines.push(`- marked outcompeted: ${t.outcompeted.join(', ')}`);
2284
- }
2285
- }
2286
- else {
2287
- lines.push('- best: (none)');
2288
- }
2289
- }
2290
- return lines.join('\n');
2291
- }
2292
1685
  function parseRequireDiff(value) {
2293
1686
  if (value === undefined)
2294
1687
  return true;