synergyspec-selfevolving 1.1.10 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +12 -3
  2. package/dist/commands/learn.js +78 -11
  3. package/dist/commands/self-evolution.d.ts +13 -0
  4. package/dist/commands/self-evolution.js +156 -20
  5. package/dist/commands/workflow/status.js +13 -0
  6. package/dist/core/change-readiness.d.ts +24 -0
  7. package/dist/core/change-readiness.js +47 -0
  8. package/dist/core/config-prompts.js +10 -0
  9. package/dist/core/fitness/health/local-source.d.ts +9 -6
  10. package/dist/core/fitness/health/local-source.js +9 -6
  11. package/dist/core/fitness/health/resolve-source.d.ts +4 -3
  12. package/dist/core/fitness/health/resolve-source.js +5 -4
  13. package/dist/core/fitness/sample.d.ts +17 -0
  14. package/dist/core/learn.d.ts +7 -0
  15. package/dist/core/learn.js +57 -5
  16. package/dist/core/project-config.d.ts +1 -0
  17. package/dist/core/project-config.js +11 -8
  18. package/dist/core/self-evolution/health-baseline.d.ts +24 -0
  19. package/dist/core/self-evolution/health-baseline.js +78 -0
  20. package/dist/core/self-evolution/index.d.ts +1 -0
  21. package/dist/core/self-evolution/index.js +1 -0
  22. package/dist/core/self-evolution/learn-observation-adapter.d.ts +16 -1
  23. package/dist/core/self-evolution/learn-observation-adapter.js +101 -15
  24. package/dist/core/self-evolution/promote.d.ts +25 -0
  25. package/dist/core/self-evolution/promote.js +21 -0
  26. package/dist/core/self-evolution/target-evolution.d.ts +7 -0
  27. package/dist/core/self-evolution/target-evolution.js +9 -0
  28. package/dist/core/templates/workflows/learn.js +10 -5
  29. package/package.json +2 -1
  30. package/scripts/code-health.py +1154 -0
package/README.md CHANGED
@@ -208,9 +208,18 @@ What actually works today:
208
208
  - **Per-change fitness loss** (`learn`): `loss = 0.7·(1 − pass_rate) +
209
209
  0.3·health_penalty`, in `[0,1]`. The functional term comes from the change's
210
210
  gen-test/run-test pass rate; the code-health term is fed by a swappable
211
- `MetricSource` selected via `health:` in `synergyspec-selfevolving/config.yaml`
212
- (`stub` no health signal by default; `local-python` `scripts/code-health.py`;
213
- `sonarqube`). With no `health` config the loss is functional-only.
211
+ `MetricSource` selected via `health:` in `synergyspec-selfevolving/config.yaml`.
212
+ New projects scaffold `source: local` (default-on): a dependency-free,
213
+ multi-language analyzer (`scripts/code-health.py`, Python 3 stdlib only) that
214
+ scores Python, Rust, C, and C++ — no server, no network. Set `source: stub` to
215
+ make the loss functional-only; `sonarqube` is also supported; `local-python` is
216
+ a back-compat alias for `local`. See
217
+ [docs/customization.md](docs/customization.md#code-health-metrics-self-evolution).
218
+ - **Code-health gate** (auto-evolve / `evolve-from-edits`): a measured code-health
219
+ regression vs the last accepted state blocks auto-promotion (and surfaces a
220
+ loud `health-signal-unavailable` observation if a configured analyzer can't
221
+ run). No health signal ⇒ no gate, so the loop is never blocked on a missing
222
+ measurement.
214
223
  - **Candidate proposals** (`self-evolution propose-canonical`): turns aggregated
215
224
  `learn` hints into human-gated candidate packages under
216
225
  `.synergyspec-selfevolving/self-evolution/candidates/`. Proposal-only — no
@@ -1,6 +1,6 @@
1
1
  import path from 'node:path';
2
2
  import { applyLearnCandidates, applyLearnMemoryCandidates, generateLearnReport, renderLearnReport, } from '../core/learn.js';
3
- import { generateEvolutionHints, lookupCanonicalTarget, persistLearnHints, resolveTargetEvolutionPolicy, resolveTargetLocalFilesReadonly, } from '../core/self-evolution/index.js';
3
+ import { detectUnbindableHintObservations, generateEvolutionHints, isCanonicalTargetEvolvable, listCanonicalTargets, lookupCanonicalTarget, persistLearnHints, resolveTargetEvolutionPolicy, resolveTargetLocalFilesReadonly, } from '../core/self-evolution/index.js';
4
4
  import { readProjectConfig } from '../core/project-config.js';
5
5
  import { assembleTrajectoryContext, } from '../core/learn/trajectory-assembler.js';
6
6
  import { findTranscriptsForChange, resolveChangeDir, } from '../core/learn/trajectory-discovery.js';
@@ -43,6 +43,17 @@ export function registerLearnCommand(program) {
43
43
  });
44
44
  const evolutionHints = generateEvolutionHints(report, targetPolicy);
45
45
  const evolutionPreview = await buildEvolutionPreview(evolutionHints, targetPolicy, projectRoot);
46
+ // Surface an unbindable kind-only hint (one that could not pin to a concrete
47
+ // target) as an actionable DEFECT observation, so a failed target binding is
48
+ // not silently rationalized as a safe gate refusal — but ONLY when the operator
49
+ // is actually trying to evolve (--apply / --persist-hints / a named
50
+ // --evolve-target). On a plain preview run the kind-only ambiguity is the
51
+ // designed state, not a defect, so a bare `learn <change>` stays byte-identical.
52
+ if (options.apply === true ||
53
+ options.persistHints === true ||
54
+ options.evolveTarget !== undefined) {
55
+ report.observations.push(...detectUnbindableHintObservations(evolutionHints, targetPolicy));
56
+ }
46
57
  const applied = options.apply === true
47
58
  ? await applyLearnCandidates({
48
59
  projectRoot,
@@ -349,22 +360,61 @@ function renderIngestHandoff(changeName, ingest, applied) {
349
360
  return lines.join('\n');
350
361
  }
351
362
  function printJson(report, applied, evolutionPreview, hintsPath) {
363
+ // `mode` only tracks whether MEMORY candidates were applied (--apply). It does
364
+ // NOT reflect that --persist-hints wrote a hints file, which is what made the
365
+ // old `mode:"preview"` read as "nothing written". `wrote` makes every write this
366
+ // run produced explicit.
367
+ const wrote = [];
368
+ if (hintsPath)
369
+ wrote.push(hintsPath);
370
+ if (applied) {
371
+ for (const item of applied.written)
372
+ wrote.push(`memory:${item.memoryId}`);
373
+ }
374
+ // Flat "how many hints, and is each ready to evolve or does it need a pin?" view
375
+ // so the agent never has to infer readiness from the richer evolutionPreview.
376
+ const evolution = {
377
+ hintsGenerated: evolutionPreview.hintCount,
378
+ targets: evolutionPreview.targets.map((target) => ({
379
+ targetId: target.targetId,
380
+ targetKind: target.targetKind,
381
+ pinned: target.targetId !== null,
382
+ unbindable: target.needsDisambiguation,
383
+ candidateTargetIds: target.candidateTargetIds,
384
+ hintIds: target.hintIds,
385
+ })),
386
+ };
352
387
  console.log(JSON.stringify({
353
388
  mode: applied ? 'apply' : 'preview',
354
389
  ...report,
355
390
  applied,
356
391
  evolutionPreview,
357
392
  ...(hintsPath ? { hintsPath } : {}),
393
+ wrote,
394
+ evolution,
358
395
  }, null, 2));
359
396
  }
360
397
  async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
361
398
  const byTarget = new Map();
362
399
  for (const hint of hints) {
363
- const targetId = hint.affectedTargetId ?? `${hint.affectedTargetKind}:unspecified`;
364
- const target = hint.affectedTargetId ? lookupCanonicalTarget(hint.affectedTargetId) : undefined;
365
- const current = byTarget.get(targetId) ?? {
366
- targetId,
400
+ const pinned = hint.affectedTargetId ?? null;
401
+ // Group key only an unpinned kind-only hint groups under an internal
402
+ // `__unbindable__:<kind>` key that is NEVER emitted (the emitted `targetId`
403
+ // stays null), so the `<kind>:unspecified` placeholder no longer leaks out.
404
+ const groupKey = pinned ?? `__unbindable__:${hint.affectedTargetKind}`;
405
+ const target = pinned ? lookupCanonicalTarget(pinned) : undefined;
406
+ const current = byTarget.get(groupKey) ?? {
407
+ targetId: pinned,
367
408
  targetKind: hint.affectedTargetKind,
409
+ needsDisambiguation: pinned === null,
410
+ // Only offer same-kind ids that are actually EVOLVABLE under the policy, so the
411
+ // operator is never told to `--evolve-target` a frozen id (which would then be
412
+ // refused). Matches detectUnbindableHintObservations' candidate list exactly.
413
+ candidateTargetIds: pinned === null
414
+ ? listCanonicalTargets({ kind: hint.affectedTargetKind })
415
+ .filter((candidate) => isCanonicalTargetEvolvable(candidate.id, targetPolicy))
416
+ .map((candidate) => candidate.id)
417
+ : [],
368
418
  files: target?.files ? [...target.files] : [],
369
419
  localFiles: [],
370
420
  hintIds: [],
@@ -391,7 +441,7 @@ async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
391
441
  });
392
442
  }
393
443
  }
394
- byTarget.set(targetId, current);
444
+ byTarget.set(groupKey, current);
395
445
  }
396
446
  // Resolve each concrete target to its LOCAL file path(s) in THIS repo,
397
447
  // best-effort and READ-ONLY (never materialize during a plain preview). A
@@ -399,7 +449,7 @@ async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
399
449
  // empty and the renderer falls back to the registry source path. On any throw
400
450
  // we leave localFiles empty and fall back as well.
401
451
  for (const target of byTarget.values()) {
402
- if (!lookupCanonicalTarget(target.targetId))
452
+ if (target.targetId === null)
403
453
  continue;
404
454
  try {
405
455
  target.localFiles = await resolveTargetLocalFilesReadonly(target.targetId, projectRoot);
@@ -418,7 +468,7 @@ async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
418
468
  ...(targetPolicy.source.cliEvolve ? { cliEvolve: targetPolicy.source.cliEvolve } : {}),
419
469
  ...(targetPolicy.source.cliFreeze ? { cliFreeze: targetPolicy.source.cliFreeze } : {}),
420
470
  },
421
- targets: [...byTarget.values()].sort((left, right) => left.targetId.localeCompare(right.targetId)),
471
+ targets: [...byTarget.values()].sort((left, right) => (left.targetId ?? `~${left.targetKind}`).localeCompare(right.targetId ?? `~${right.targetKind}`)),
422
472
  };
423
473
  }
424
474
  function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, options) {
@@ -463,7 +513,9 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
463
513
  }
464
514
  else {
465
515
  for (const target of evolutionPreview.targets) {
466
- lines.push(`- Target: ${target.targetId} (${target.targetKind})`);
516
+ lines.push(target.targetId === null
517
+ ? `- Target: (${target.targetKind}, unpinned — needs --evolve-target)`
518
+ : `- Target: ${target.targetId} (${target.targetKind})`);
467
519
  // Prefer the CONCRETE local file the writer would edit; fall back to the
468
520
  // registry source path. Only a genuinely kind-only/ambiguous group with no
469
521
  // resolvable file shows the 'not pinned' notice.
@@ -484,7 +536,7 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
484
536
  // A "concrete" target is one pinned to a registered canonical id (not a
485
537
  // kind-only `<kind>:unspecified` group). The host agent authors edits.json's
486
538
  // full new content for that target's resolved LOCAL file.
487
- const concreteTargets = evolutionPreview.targets.filter((target) => lookupCanonicalTarget(target.targetId));
539
+ const concreteTargets = evolutionPreview.targets.filter((target) => target.targetId !== null);
488
540
  const concreteTarget = concreteTargets.length > 0 ? concreteTargets[0] : undefined;
489
541
  if (hintsPath && concreteTarget) {
490
542
  lines.push(`- Hints written: ${hintsPath}`);
@@ -494,7 +546,22 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
494
546
  }
495
547
  else if (hintsPath) {
496
548
  lines.push(`- Hints written: ${hintsPath}`);
497
- lines.push('- No single concrete target resolved yet; add evidence or widen the target policy so a specific file can be pinned for evolve-from-edits.');
549
+ // A kind-only (`<kind>:unspecified`) hint can't be promoted by
550
+ // evolve-from-edits until it is pinned to ONE concrete target. The remedy is
551
+ // to NAME a single target via --evolve-target — NOT to "widen the policy"
552
+ // (widening keeps several same-kind targets evolvable, so the hint stays
553
+ // unpinned). List the registered candidates so the operator can pick one.
554
+ const candidates = [
555
+ ...new Set(evolutionPreview.targets
556
+ .filter((target) => target.targetId === null)
557
+ .flatMap((target) => target.candidateTargetIds)),
558
+ ];
559
+ if (candidates.length > 0) {
560
+ lines.push(`- No single concrete target resolved yet. Pin one by re-running with --evolve-target <id> (candidates: ${candidates.join(', ')}); then evolve-from-edits can consume the hint.`);
561
+ }
562
+ else {
563
+ lines.push('- No single concrete target resolved yet; add evidence so a specific file can be pinned for evolve-from-edits.');
564
+ }
498
565
  }
499
566
  else if (evolutionPreview.targets.length > 0) {
500
567
  lines.push(`- Persist the optimization evidence: synergyspec-selfevolving learn "${report.changeName}" --persist-hints${renderTargetArgs(options)}`);
@@ -179,6 +179,8 @@ export interface AutoEvolveReport {
179
179
  changeNames: string[];
180
180
  /** Mean per-change loss (functional ⊕ health) from learn; null when unmeasurable. */
181
181
  loss: number | null;
182
+ /** Mean RAW code-health penalty across the change(s); null when no health signal. */
183
+ healthPenalty?: number | null;
182
184
  hintCount: number;
183
185
  hintsPaths: string[];
184
186
  proposed: string[];
@@ -237,8 +239,19 @@ export interface RunEvolveFromEditsOptions {
237
239
  */
238
240
  generateReport?: (changeName: string) => Promise<LearnReport>;
239
241
  }
242
+ /**
243
+ * Typed, machine-readable result of an evolve-from-edits run. Refusals (the
244
+ * `refused-*` values) are LEGITIMATE non-promotions and keep `exitCode: 0` so the
245
+ * autonomous learn skill treats them as "safe, move on"; only the `error-*` values
246
+ * are non-zero. This is additive to the human-facing `reason`/`error` strings — it
247
+ * lets callers (and `status`) tell "did it promote, and if not, was that a safe
248
+ * refusal or a defect?" without parsing prose.
249
+ */
250
+ export type EvolveFromEditsOutcome = 'promoted' | 'refused-no-surviving-hint' | 'refused-static-gate' | 'refused-unverified-evidence' | 'refused-auto-promote-declined' | 'refused-health-regression' | 'error-unknown-target' | 'error-bad-input' | 'error-runtime';
240
251
  export interface EvolveFromEditsReport {
241
252
  exitCode: number;
253
+ /** Typed result. `refused-*` ⇒ exitCode 0 (safe); `error-*` ⇒ non-zero. */
254
+ outcome: EvolveFromEditsOutcome;
242
255
  targetId: string;
243
256
  /** The host candidate that was packaged (the gate/promote subject). */
244
257
  candidateId: string | null;
@@ -1,7 +1,7 @@
1
1
  import * as fs from 'node:fs';
2
2
  import * as path from 'node:path';
3
3
  import fastGlob from 'fast-glob';
4
- import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, generateEvolutionHints, persistLearnHints, readCandidateFitness, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, groupCandidatesByTarget, rankCandidatesForTarget, makeReplayRunChange, scoreCandidatesByReplay, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, runCanonicalProposerAgent, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, readTemplateVariantManifest, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, selectTemplateVariant, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
4
+ import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, generateEvolutionHints, persistLearnHints, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, groupCandidatesByTarget, rankCandidatesForTarget, makeReplayRunChange, scoreCandidatesByReplay, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, runCanonicalProposerAgent, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, readTemplateVariantManifest, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, selectTemplateVariant, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
5
5
  import { generateLearnReport } from '../core/learn.js';
6
6
  import { resolveMetricSource } from '../core/fitness/index.js';
7
7
  import { validateChangeExists, validateSchemaExists } from './workflow/shared.js';
@@ -957,6 +957,7 @@ export async function runAutoEvolve(args, opts) {
957
957
  // and several aggregate a recurring signal across them. Each change's hints are
958
958
  // persisted; a failed change is skipped, not fatal.
959
959
  const losses = [];
960
+ const healthSignals = [];
960
961
  const hintsPaths = [];
961
962
  let totalHints = 0;
962
963
  for (const changeName of args.changeNames) {
@@ -974,6 +975,9 @@ export async function runAutoEvolve(args, opts) {
974
975
  const l = learnReport.fitnessSample?.loss?.loss;
975
976
  if (typeof l === 'number')
976
977
  losses.push(l);
978
+ const h = learnReport.fitnessSample?.healthSignal;
979
+ if (typeof h === 'number')
980
+ healthSignals.push(h);
977
981
  const hints = generateEvolutionHints(learnReport, policy);
978
982
  totalHints += hints.length;
979
983
  if (hints.length === 0)
@@ -981,6 +985,13 @@ export async function runAutoEvolve(args, opts) {
981
985
  hintsPaths.push(await persistLearnHints({ projectRoot: opts.repoRoot, changeName, hints, now }));
982
986
  }
983
987
  report.loss = losses.length > 0 ? losses.reduce((a, b) => a + b, 0) / losses.length : null;
988
+ // Mean RAW health signal across the change(s); null when none were measured
989
+ // (stub source / no signal) ⇒ the health gate below cannot fire.
990
+ const meanHealth = healthSignals.length > 0
991
+ ? healthSignals.reduce((a, b) => a + b, 0) / healthSignals.length
992
+ : null;
993
+ report.healthPenalty = meanHealth;
994
+ const healthBaseline = await readHealthBaseline(opts.repoRoot);
984
995
  report.hintCount = totalHints;
985
996
  report.hintsPaths = hintsPaths;
986
997
  if (hintsPaths.length === 0) {
@@ -1067,6 +1078,8 @@ export async function runAutoEvolve(args, opts) {
1067
1078
  meanLoss: fitness.meanLoss,
1068
1079
  baselineLoss: report.loss,
1069
1080
  requireProvenImprovement: args.requireProven === true,
1081
+ healthPenalty: meanHealth,
1082
+ baselineHealthPenalty: healthBaseline?.healthPenalty ?? null,
1070
1083
  });
1071
1084
  if (!autoPromote) {
1072
1085
  report.skipped.push({
@@ -1097,6 +1110,16 @@ export async function runAutoEvolve(args, opts) {
1097
1110
  });
1098
1111
  }
1099
1112
  }
1113
+ // Record the accepted health as the new per-repo baseline (best-effort) when
1114
+ // this run promoted something and had a real health signal. The next run's
1115
+ // health gate compares against this value.
1116
+ if (report.promoted.length > 0 && meanHealth != null) {
1117
+ await writeHealthBaseline(opts.repoRoot, {
1118
+ healthPenalty: meanHealth,
1119
+ updatedAt: now().toISOString(),
1120
+ sourceChange: args.changeNames.join(','),
1121
+ });
1122
+ }
1100
1123
  finishAutoEvolve(report, args.json, stdout, stderr);
1101
1124
  return report;
1102
1125
  }
@@ -1156,6 +1179,9 @@ export async function runEvolveFromEdits(args, opts) {
1156
1179
  ((changeName) => generateLearnReport({ projectRoot: opts.repoRoot, changeName }));
1157
1180
  const report = {
1158
1181
  exitCode: 0,
1182
+ // Loud fallback: a terminal path that forgets to set its outcome surfaces as
1183
+ // an error rather than a silent (and wrong) success.
1184
+ outcome: 'error-runtime',
1159
1185
  targetId: args.evolveTarget,
1160
1186
  candidateId: null,
1161
1187
  gatePassed: false,
@@ -1163,9 +1189,21 @@ export async function runEvolveFromEdits(args, opts) {
1163
1189
  promotedFiles: [],
1164
1190
  loss: null,
1165
1191
  };
1166
- const fail = (code, message) => {
1192
+ // The change this run pertains to (derived from the hints path), used to write
1193
+ // the machine-readable evolution-result.json that `status` surfaces.
1194
+ const changeName = changeNameFromHints(args.fromLearn);
1195
+ const persist = () => persistEvolutionResult(opts.repoRoot, changeName, report, now);
1196
+ const fail = (code, outcome, message,
1197
+ // Pure CLI-misuse refusals (bad flags / unregistered target) do NOT write a
1198
+ // per-change evolution-result.json — they are not a defect of the change, and
1199
+ // a fat-fingered flag should not leave a durable `error-*` record that `status`
1200
+ // surfaces. Only paths that actually attempted the evolution persist a record.
1201
+ persistRecord = true) => {
1167
1202
  report.exitCode = code;
1203
+ report.outcome = outcome;
1168
1204
  report.error = message;
1205
+ if (persistRecord)
1206
+ persist();
1169
1207
  if (args.json) {
1170
1208
  stdout(JSON.stringify(report, null, 2));
1171
1209
  }
@@ -1177,10 +1215,25 @@ export async function runEvolveFromEdits(args, opts) {
1177
1215
  // Non-interactive contract: --yes is required (mirrors auto-evolve's one-button
1178
1216
  // confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
1179
1217
  if (args.agent) {
1180
- return fail(2, '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.');
1218
+ return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.', false);
1181
1219
  }
1182
1220
  if (!args.yes) {
1183
- return fail(2, '--yes is required: evolve-from-edits promotes onto your local files non-interactively.');
1221
+ return fail(2, 'error-bad-input', '--yes is required: evolve-from-edits promotes onto your local files non-interactively.', false);
1222
+ }
1223
+ // Reject an unregistered / kind-only sentinel target EARLY (before propose) with a
1224
+ // helpful list of concrete same-kind ids. An unpinned kind-only hint surfaces as
1225
+ // `<kind>:unspecified`, which is NOT a registered canonical target; feeding it back
1226
+ // here would otherwise fail late with a bare "Unknown canonical target". Exit code 1
1227
+ // matches the prior behavior (requireCanonicalTarget threw inside propose → exit 1).
1228
+ if (!lookupCanonicalTarget(args.evolveTarget)) {
1229
+ const kind = args.evolveTarget.includes(':') ? args.evolveTarget.split(':')[0] : '';
1230
+ const candidates = kind
1231
+ ? listCanonicalTargets({ kind: kind }).map((t) => t.id)
1232
+ : [];
1233
+ const hint = candidates.length > 0
1234
+ ? ` Concrete ${kind} targets you can pin: ${candidates.join(', ')}.`
1235
+ : ' Run `self-evolution targets` to list registered ids.';
1236
+ return fail(1, 'error-unknown-target', `--evolve-target "${args.evolveTarget}" is not a registered canonical target.${hint}`, false);
1184
1237
  }
1185
1238
  // 1) Read + shape-validate the host-authored edits (path or stdin).
1186
1239
  let editsInput;
@@ -1191,7 +1244,7 @@ export async function runEvolveFromEdits(args, opts) {
1191
1244
  editsInput = parseHostEditsInput(raw);
1192
1245
  }
1193
1246
  catch (err) {
1194
- return fail(2, `invalid --from-edits ${args.fromEdits}: ${err instanceof Error ? err.message : String(err)}`);
1247
+ return fail(2, 'error-bad-input', `invalid --from-edits ${args.fromEdits}: ${err instanceof Error ? err.message : String(err)}`, false);
1195
1248
  }
1196
1249
  const layout = resolveCandidateRepo(opts.repoRoot);
1197
1250
  const policy = resolveTargetEvolutionPolicy({
@@ -1226,7 +1279,14 @@ export async function runEvolveFromEdits(args, opts) {
1226
1279
  });
1227
1280
  }
1228
1281
  catch (err) {
1229
- return fail(1, `propose failed: ${err instanceof Error ? err.message : String(err)}`);
1282
+ const message = err instanceof Error ? err.message : String(err);
1283
+ // After the early --evolve-target check above this is mostly unreachable for
1284
+ // unknown targets, but keep the discrimination as defense in depth. Exit code 1
1285
+ // for both (matches the prior propose-catch behavior).
1286
+ const outcome = message.startsWith('Unknown canonical target')
1287
+ ? 'error-unknown-target'
1288
+ : 'error-runtime';
1289
+ return fail(1, outcome, `propose failed: ${message}`);
1230
1290
  }
1231
1291
  if (proposeResult.exitCode !== 0 || proposeResult.proposed.length === 0) {
1232
1292
  // A frozen target / unknown target / >1 group / no surviving group lands here.
@@ -1235,7 +1295,11 @@ export async function runEvolveFromEdits(args, opts) {
1235
1295
  proposeResult.skipped.map((s) => s.reason)[0] ??
1236
1296
  'no candidate was packaged from the host edits (target frozen, unknown, or no surviving signal)';
1237
1297
  report.reason = why;
1238
- finishEvolveFromEdits(report, args.json, stdout, stderr);
1298
+ // Legitimate refusal: exitCode stays 0 so the autonomous learn skill treats it
1299
+ // as "safe, move on" — do NOT propagate proposeResult.exitCode (2 for the
1300
+ // surviving-group guard).
1301
+ report.outcome = 'refused-no-surviving-hint';
1302
+ finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
1239
1303
  return report;
1240
1304
  }
1241
1305
  const candidateId = proposeResult.proposed[0].candidateId;
@@ -1249,7 +1313,7 @@ export async function runEvolveFromEdits(args, opts) {
1249
1313
  });
1250
1314
  }
1251
1315
  catch (err) {
1252
- return fail(1, `gate error: ${err instanceof Error ? err.message : String(err)}`);
1316
+ return fail(1, 'error-runtime', `gate error: ${err instanceof Error ? err.message : String(err)}`);
1253
1317
  }
1254
1318
  report.gatePassed = gate.passed;
1255
1319
  if (!gate.passed) {
@@ -1257,7 +1321,8 @@ export async function runEvolveFromEdits(args, opts) {
1257
1321
  .filter((f) => f.severity === 'error')
1258
1322
  .map((f) => f.message)
1259
1323
  .join('; ') || 'placeholder/no-op diff or frozen target'}`;
1260
- finishEvolveFromEdits(report, args.json, stdout, stderr);
1324
+ report.outcome = 'refused-static-gate';
1325
+ finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
1261
1326
  return report;
1262
1327
  }
1263
1328
  // 4) OBSERVED-VERIFIED evidence + auto-promote decision. Regenerate the change's
@@ -1268,16 +1333,22 @@ export async function runEvolveFromEdits(args, opts) {
1268
1333
  learnReport = await generateReport(changeNameFromHints(args.fromLearn));
1269
1334
  }
1270
1335
  catch (err) {
1271
- return fail(1, `learn report failed: ${err instanceof Error ? err.message : String(err)}`);
1336
+ return fail(1, 'error-runtime', `learn report failed: ${err instanceof Error ? err.message : String(err)}`);
1272
1337
  }
1273
1338
  report.loss = learnReport.fitnessSample?.loss?.loss ?? null;
1274
1339
  const evidence = isEvidenceComplete(learnReport);
1275
1340
  if (!evidence.ok) {
1276
1341
  report.reason = `evidence not observed-verified-green: ${evidence.reason}`;
1277
- finishEvolveFromEdits(report, args.json, stdout, stderr);
1342
+ report.outcome = 'refused-unverified-evidence';
1343
+ finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
1278
1344
  return report;
1279
1345
  }
1280
1346
  const fitness = await readCandidateFitness(layout, candidateId);
1347
+ // Default-on health gate: compare THIS change's measured health (post) against
1348
+ // the recorded per-repo baseline (pre). No signal (stub/analyzer failed) ⇒
1349
+ // healthSignal is null ⇒ the gate cannot fire (forward bet preserved).
1350
+ const healthSignal = learnReport.fitnessSample?.healthSignal ?? null;
1351
+ const baseline = await readHealthBaseline(opts.repoRoot);
1281
1352
  const decision = shouldAutoPromote({
1282
1353
  gatePassed: true,
1283
1354
  targetEvolvable: isCanonicalTargetEvolvable(args.evolveTarget, policy),
@@ -1285,10 +1356,15 @@ export async function runEvolveFromEdits(args, opts) {
1285
1356
  meanLoss: fitness.meanLoss,
1286
1357
  baselineLoss: report.loss,
1287
1358
  requireProvenImprovement: args.requireProven === true,
1359
+ healthPenalty: healthSignal,
1360
+ baselineHealthPenalty: baseline?.healthPenalty ?? null,
1288
1361
  });
1289
1362
  if (!decision.promote) {
1290
1363
  report.reason = `auto-promote declined: ${decision.reason}`;
1291
- finishEvolveFromEdits(report, args.json, stdout, stderr);
1364
+ report.outcome = decision.reason.startsWith('code-health regression')
1365
+ ? 'refused-health-regression'
1366
+ : 'refused-auto-promote-declined';
1367
+ finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
1292
1368
  return report;
1293
1369
  }
1294
1370
  // 5) PROMOTE onto the canonical LOCAL file(s).
@@ -1301,25 +1377,85 @@ export async function runEvolveFromEdits(args, opts) {
1301
1377
  report.promotedFiles = applied.appliedFiles.map((f) => f.file);
1302
1378
  }
1303
1379
  catch (err) {
1304
- return fail(1, `promote failed: ${err instanceof Error ? err.message : String(err)}`);
1380
+ return fail(1, 'error-runtime', `promote failed: ${err instanceof Error ? err.message : String(err)}`);
1381
+ }
1382
+ // Record the accepted health as the new baseline (best-effort; never fails the
1383
+ // promote) — only when this run had a real health signal. The next change's
1384
+ // gate compares against this value.
1385
+ if (healthSignal != null) {
1386
+ await writeHealthBaseline(opts.repoRoot, {
1387
+ healthPenalty: healthSignal,
1388
+ updatedAt: now().toISOString(),
1389
+ sourceChange: changeNameFromHints(args.fromLearn),
1390
+ candidateId,
1391
+ });
1305
1392
  }
1306
- finishEvolveFromEdits(report, args.json, stdout, stderr);
1393
+ report.outcome = 'promoted';
1394
+ finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
1307
1395
  return report;
1308
1396
  }
1309
1397
  /**
1310
- * Best-effort change-name for the learn report when one is needed. The hints
1311
- * file path's parent dir is the handoff dir (`<...>/<change>/hints.json`); fall
1312
- * back to the file basename. learn re-resolves the active change anyway, so this
1313
- * only steers `generateLearnReport` toward the right change directory.
1398
+ * Best-effort change-name for the hints path passed to `--from-learn`.
1399
+ *
1400
+ * `persistLearnHints` writes the canonical handoff at
1401
+ * `<root>/.synergyspec-selfevolving/learn-handoffs/<change>/<timestamp>/hints.json`,
1402
+ * so on the REAL autonomous path the change name is the GRANDPARENT of hints.json
1403
+ * (the immediate parent is the timestamp). We detect that shape via the
1404
+ * `learn-handoffs` marker and return the grandparent; otherwise we fall back to the
1405
+ * immediate parent dir (the 2-level shape used by tests / hand-built paths), then
1406
+ * the file basename. This is the name used for BOTH the learn report and the
1407
+ * evolution-result.json write, so they cannot diverge.
1314
1408
  */
1315
1409
  function changeNameFromHints(hintsPath) {
1316
1410
  const abs = path.resolve(hintsPath);
1317
- const parent = path.basename(path.dirname(abs));
1411
+ const workdir = path.dirname(abs);
1412
+ const parent = path.basename(workdir);
1413
+ const grandparent = path.basename(path.dirname(workdir));
1414
+ // Canonical: learn-handoffs/<change>/<timestamp>/hints.json → change = grandparent.
1415
+ if (path.basename(path.dirname(path.dirname(workdir))) === 'learn-handoffs' && grandparent) {
1416
+ return grandparent;
1417
+ }
1318
1418
  if (parent && parent !== '.' && parent !== path.sep)
1319
1419
  return parent;
1320
1420
  return path.basename(abs, path.extname(abs));
1321
1421
  }
1322
- function finishEvolveFromEdits(report, json, stdout, stderr) {
1422
+ /**
1423
+ * Write a machine-readable evolution outcome beside the change's other evidence
1424
+ * (`synergyspec-selfevolving/changes/<change>/evolution-result.json`) so `status`
1425
+ * can surface whether self-evolution promoted, was safely refused, or hit a defect
1426
+ * — instead of the outcome only living in the agent-written learn-report.md.
1427
+ *
1428
+ * Best-effort: it NEVER throws (a write failure must not turn a successful promote
1429
+ * into an error) and it does NOT create the change dir — if the change dir is absent
1430
+ * (a typo'd/invalid invocation, or an already-archived change) it simply skips.
1431
+ */
1432
+ function persistEvolutionResult(repoRoot, changeName, report, now) {
1433
+ try {
1434
+ const changeDir = path.join(repoRoot, 'synergyspec-selfevolving', 'changes', changeName);
1435
+ if (!fs.existsSync(changeDir))
1436
+ return;
1437
+ const record = {
1438
+ schemaVersion: 1,
1439
+ changeName,
1440
+ outcome: report.outcome,
1441
+ promoted: report.promoted,
1442
+ reason: report.reason ?? report.error ?? null,
1443
+ targetId: report.targetId,
1444
+ candidateId: report.candidateId,
1445
+ gatePassed: report.gatePassed,
1446
+ promotedFiles: report.promotedFiles,
1447
+ loss: report.loss,
1448
+ timestamp: now().toISOString(),
1449
+ tool: 'evolve-from-edits',
1450
+ };
1451
+ fs.writeFileSync(path.join(changeDir, 'evolution-result.json'), `${JSON.stringify(record, null, 2)}\n`);
1452
+ }
1453
+ catch {
1454
+ // best-effort: a status-annotation write must never break the evolve run.
1455
+ }
1456
+ }
1457
+ function finishEvolveFromEdits(report, json, stdout, stderr, persist) {
1458
+ persist?.();
1323
1459
  if (json) {
1324
1460
  stdout(JSON.stringify(report, null, 2));
1325
1461
  return;
@@ -57,6 +57,19 @@ export function printStatusText(status, readiness) {
57
57
  else {
58
58
  console.log(chalk.green('Evidence: complete'));
59
59
  }
60
+ // Self-evolution outcome (visibility only; never gates archive-ready). A
61
+ // refused/failed evolution is surfaced in YELLOW so a silently-failed evolution
62
+ // can't hide inside an otherwise-green, archive-ready run.
63
+ const evolution = readiness.evolution;
64
+ if (evolution.status === 'promoted' && evolution.promoted) {
65
+ console.log(chalk.green(`Evolution: promoted -> ${evolution.targetId ?? '(target)'} (${evolution.promotedFiles.length} file(s))`));
66
+ }
67
+ else if (evolution.status === 'refused' || evolution.status === 'error' || evolution.status === 'promoted') {
68
+ console.log(chalk.yellow(`Evolution: ${evolution.status}${evolution.reason ? ` — ${evolution.reason}` : ''}`));
69
+ }
70
+ else {
71
+ console.log(chalk.dim('Evolution: not run'));
72
+ }
60
73
  }
61
74
  console.log();
62
75
  for (const artifact of status.artifacts) {
@@ -2,6 +2,13 @@ import { type ChangeStatus } from './artifact-graph/index.js';
2
2
  export type ArtifactWorkflowStatus = 'complete' | 'ready' | 'in-progress' | 'blocked';
3
3
  export type ChangeReadinessStatus = ArtifactWorkflowStatus | 'ready-to-apply';
4
4
  export type TaskReadinessStatus = 'no-tasks' | 'complete' | 'in-progress';
5
+ /**
6
+ * Coarse, display-oriented self-evolution outcome for the change. Derived from the
7
+ * CLI-written `evolution-result.json` (NOT the agent-written learn-report.md). It is
8
+ * surfaced for visibility only — it does NOT gate `isArchiveReady` (a safe refusal
9
+ * must not block archiving a finished change).
10
+ */
11
+ export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'error';
5
12
  export interface ArtifactStatusSummary {
6
13
  done: number;
7
14
  ready: number;
@@ -26,6 +33,20 @@ export interface EvidenceReadiness {
26
33
  testPlanRequired: boolean;
27
34
  missing: string[];
28
35
  }
36
+ export interface EvolutionOutcomeReadiness {
37
+ /** `'not-run'` when no `evolution-result.json` exists for the change. */
38
+ status: EvolutionOutcomeStatus;
39
+ /** Why it stopped (refusal reason or error message); absent when promoted/not-run. */
40
+ reason?: string;
41
+ /** The canonical target the run was for (when known). */
42
+ targetId?: string;
43
+ /** Whether a canonical file was actually promoted. */
44
+ promoted: boolean;
45
+ /** LOCAL file paths written on promotion (empty unless promoted). */
46
+ promotedFiles: string[];
47
+ /** ISO timestamp of the recorded run. */
48
+ timestamp?: string;
49
+ }
29
50
  export interface ChangeReadiness {
30
51
  changeName: string;
31
52
  schemaName: string;
@@ -37,6 +58,8 @@ export interface ChangeReadiness {
37
58
  totalTasks: number;
38
59
  incompleteTasks: TaskItem[];
39
60
  evidence: EvidenceReadiness;
61
+ /** Self-evolution outcome for the change (visibility only; does not gate archive). */
62
+ evolution: EvolutionOutcomeReadiness;
40
63
  isArchiveReady: boolean;
41
64
  artifactGraph: ChangeStatus;
42
65
  }
@@ -54,6 +77,7 @@ export declare function toReadinessJson(readiness: ChangeReadiness): {
54
77
  totalTasks: number;
55
78
  incompleteTasks: TaskItem[];
56
79
  evidence: EvidenceReadiness;
80
+ evolution: EvolutionOutcomeReadiness;
57
81
  isArchiveReady: boolean;
58
82
  };
59
83
  //# sourceMappingURL=change-readiness.d.ts.map