synergyspec-selfevolving 1.1.10 → 1.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -3
- package/dist/commands/learn.js +78 -11
- package/dist/commands/self-evolution.d.ts +13 -0
- package/dist/commands/self-evolution.js +156 -20
- package/dist/commands/workflow/status.js +13 -0
- package/dist/core/change-readiness.d.ts +24 -0
- package/dist/core/change-readiness.js +47 -0
- package/dist/core/config-prompts.js +10 -0
- package/dist/core/fitness/health/local-source.d.ts +9 -6
- package/dist/core/fitness/health/local-source.js +9 -6
- package/dist/core/fitness/health/resolve-source.d.ts +4 -3
- package/dist/core/fitness/health/resolve-source.js +5 -4
- package/dist/core/fitness/sample.d.ts +17 -0
- package/dist/core/learn.d.ts +7 -0
- package/dist/core/learn.js +57 -5
- package/dist/core/project-config.d.ts +1 -0
- package/dist/core/project-config.js +11 -8
- package/dist/core/self-evolution/health-baseline.d.ts +24 -0
- package/dist/core/self-evolution/health-baseline.js +78 -0
- package/dist/core/self-evolution/index.d.ts +1 -0
- package/dist/core/self-evolution/index.js +1 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +16 -1
- package/dist/core/self-evolution/learn-observation-adapter.js +101 -15
- package/dist/core/self-evolution/promote.d.ts +25 -0
- package/dist/core/self-evolution/promote.js +21 -0
- package/dist/core/self-evolution/target-evolution.d.ts +7 -0
- package/dist/core/self-evolution/target-evolution.js +9 -0
- package/dist/core/templates/workflows/learn.js +10 -5
- package/package.json +2 -1
- package/scripts/code-health.py +1154 -0
package/README.md
CHANGED
|
@@ -208,9 +208,18 @@ What actually works today:
|
|
|
208
208
|
- **Per-change fitness loss** (`learn`): `loss = 0.7·(1 − pass_rate) +
|
|
209
209
|
0.3·health_penalty`, in `[0,1]`. The functional term comes from the change's
|
|
210
210
|
gen-test/run-test pass rate; the code-health term is fed by a swappable
|
|
211
|
-
`MetricSource` selected via `health:` in `synergyspec-selfevolving/config.yaml
|
|
212
|
-
|
|
213
|
-
|
|
211
|
+
`MetricSource` selected via `health:` in `synergyspec-selfevolving/config.yaml`.
|
|
212
|
+
New projects scaffold `source: local` (default-on): a dependency-free,
|
|
213
|
+
multi-language analyzer (`scripts/code-health.py`, Python 3 stdlib only) that
|
|
214
|
+
scores Python, Rust, C, and C++ — no server, no network. Set `source: stub` to
|
|
215
|
+
make the loss functional-only; `sonarqube` is also supported; `local-python` is
|
|
216
|
+
a back-compat alias for `local`. See
|
|
217
|
+
[docs/customization.md](docs/customization.md#code-health-metrics-self-evolution).
|
|
218
|
+
- **Code-health gate** (auto-evolve / `evolve-from-edits`): a measured code-health
|
|
219
|
+
regression vs the last accepted state blocks auto-promotion (and surfaces a
|
|
220
|
+
loud `health-signal-unavailable` observation if a configured analyzer can't
|
|
221
|
+
run). No health signal ⇒ no gate, so the loop is never blocked on a missing
|
|
222
|
+
measurement.
|
|
214
223
|
- **Candidate proposals** (`self-evolution propose-canonical`): turns aggregated
|
|
215
224
|
`learn` hints into human-gated candidate packages under
|
|
216
225
|
`.synergyspec-selfevolving/self-evolution/candidates/`. Proposal-only — no
|
package/dist/commands/learn.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { applyLearnCandidates, applyLearnMemoryCandidates, generateLearnReport, renderLearnReport, } from '../core/learn.js';
|
|
3
|
-
import { generateEvolutionHints, lookupCanonicalTarget, persistLearnHints, resolveTargetEvolutionPolicy, resolveTargetLocalFilesReadonly, } from '../core/self-evolution/index.js';
|
|
3
|
+
import { detectUnbindableHintObservations, generateEvolutionHints, isCanonicalTargetEvolvable, listCanonicalTargets, lookupCanonicalTarget, persistLearnHints, resolveTargetEvolutionPolicy, resolveTargetLocalFilesReadonly, } from '../core/self-evolution/index.js';
|
|
4
4
|
import { readProjectConfig } from '../core/project-config.js';
|
|
5
5
|
import { assembleTrajectoryContext, } from '../core/learn/trajectory-assembler.js';
|
|
6
6
|
import { findTranscriptsForChange, resolveChangeDir, } from '../core/learn/trajectory-discovery.js';
|
|
@@ -43,6 +43,17 @@ export function registerLearnCommand(program) {
|
|
|
43
43
|
});
|
|
44
44
|
const evolutionHints = generateEvolutionHints(report, targetPolicy);
|
|
45
45
|
const evolutionPreview = await buildEvolutionPreview(evolutionHints, targetPolicy, projectRoot);
|
|
46
|
+
// Surface an unbindable kind-only hint (one that could not pin to a concrete
|
|
47
|
+
// target) as an actionable DEFECT observation, so a failed target binding is
|
|
48
|
+
// not silently rationalized as a safe gate refusal — but ONLY when the operator
|
|
49
|
+
// is actually trying to evolve (--apply / --persist-hints / a named
|
|
50
|
+
// --evolve-target). On a plain preview run the kind-only ambiguity is the
|
|
51
|
+
// designed state, not a defect, so a bare `learn <change>` stays byte-identical.
|
|
52
|
+
if (options.apply === true ||
|
|
53
|
+
options.persistHints === true ||
|
|
54
|
+
options.evolveTarget !== undefined) {
|
|
55
|
+
report.observations.push(...detectUnbindableHintObservations(evolutionHints, targetPolicy));
|
|
56
|
+
}
|
|
46
57
|
const applied = options.apply === true
|
|
47
58
|
? await applyLearnCandidates({
|
|
48
59
|
projectRoot,
|
|
@@ -349,22 +360,61 @@ function renderIngestHandoff(changeName, ingest, applied) {
|
|
|
349
360
|
return lines.join('\n');
|
|
350
361
|
}
|
|
351
362
|
function printJson(report, applied, evolutionPreview, hintsPath) {
|
|
363
|
+
// `mode` only tracks whether MEMORY candidates were applied (--apply). It does
|
|
364
|
+
// NOT reflect that --persist-hints wrote a hints file, which is what made the
|
|
365
|
+
// old `mode:"preview"` read as "nothing written". `wrote` makes every write this
|
|
366
|
+
// run produced explicit.
|
|
367
|
+
const wrote = [];
|
|
368
|
+
if (hintsPath)
|
|
369
|
+
wrote.push(hintsPath);
|
|
370
|
+
if (applied) {
|
|
371
|
+
for (const item of applied.written)
|
|
372
|
+
wrote.push(`memory:${item.memoryId}`);
|
|
373
|
+
}
|
|
374
|
+
// Flat "how many hints, and is each ready to evolve or does it need a pin?" view
|
|
375
|
+
// so the agent never has to infer readiness from the richer evolutionPreview.
|
|
376
|
+
const evolution = {
|
|
377
|
+
hintsGenerated: evolutionPreview.hintCount,
|
|
378
|
+
targets: evolutionPreview.targets.map((target) => ({
|
|
379
|
+
targetId: target.targetId,
|
|
380
|
+
targetKind: target.targetKind,
|
|
381
|
+
pinned: target.targetId !== null,
|
|
382
|
+
unbindable: target.needsDisambiguation,
|
|
383
|
+
candidateTargetIds: target.candidateTargetIds,
|
|
384
|
+
hintIds: target.hintIds,
|
|
385
|
+
})),
|
|
386
|
+
};
|
|
352
387
|
console.log(JSON.stringify({
|
|
353
388
|
mode: applied ? 'apply' : 'preview',
|
|
354
389
|
...report,
|
|
355
390
|
applied,
|
|
356
391
|
evolutionPreview,
|
|
357
392
|
...(hintsPath ? { hintsPath } : {}),
|
|
393
|
+
wrote,
|
|
394
|
+
evolution,
|
|
358
395
|
}, null, 2));
|
|
359
396
|
}
|
|
360
397
|
async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
|
|
361
398
|
const byTarget = new Map();
|
|
362
399
|
for (const hint of hints) {
|
|
363
|
-
const
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
400
|
+
const pinned = hint.affectedTargetId ?? null;
|
|
401
|
+
// Group key only — an unpinned kind-only hint groups under an internal
|
|
402
|
+
// `__unbindable__:<kind>` key that is NEVER emitted (the emitted `targetId`
|
|
403
|
+
// stays null), so the `<kind>:unspecified` placeholder no longer leaks out.
|
|
404
|
+
const groupKey = pinned ?? `__unbindable__:${hint.affectedTargetKind}`;
|
|
405
|
+
const target = pinned ? lookupCanonicalTarget(pinned) : undefined;
|
|
406
|
+
const current = byTarget.get(groupKey) ?? {
|
|
407
|
+
targetId: pinned,
|
|
367
408
|
targetKind: hint.affectedTargetKind,
|
|
409
|
+
needsDisambiguation: pinned === null,
|
|
410
|
+
// Only offer same-kind ids that are actually EVOLVABLE under the policy, so the
|
|
411
|
+
// operator is never told to `--evolve-target` a frozen id (which would then be
|
|
412
|
+
// refused). Matches detectUnbindableHintObservations' candidate list exactly.
|
|
413
|
+
candidateTargetIds: pinned === null
|
|
414
|
+
? listCanonicalTargets({ kind: hint.affectedTargetKind })
|
|
415
|
+
.filter((candidate) => isCanonicalTargetEvolvable(candidate.id, targetPolicy))
|
|
416
|
+
.map((candidate) => candidate.id)
|
|
417
|
+
: [],
|
|
368
418
|
files: target?.files ? [...target.files] : [],
|
|
369
419
|
localFiles: [],
|
|
370
420
|
hintIds: [],
|
|
@@ -391,7 +441,7 @@ async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
|
|
|
391
441
|
});
|
|
392
442
|
}
|
|
393
443
|
}
|
|
394
|
-
byTarget.set(
|
|
444
|
+
byTarget.set(groupKey, current);
|
|
395
445
|
}
|
|
396
446
|
// Resolve each concrete target to its LOCAL file path(s) in THIS repo,
|
|
397
447
|
// best-effort and READ-ONLY (never materialize during a plain preview). A
|
|
@@ -399,7 +449,7 @@ async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
|
|
|
399
449
|
// empty and the renderer falls back to the registry source path. On any throw
|
|
400
450
|
// we leave localFiles empty and fall back as well.
|
|
401
451
|
for (const target of byTarget.values()) {
|
|
402
|
-
if (
|
|
452
|
+
if (target.targetId === null)
|
|
403
453
|
continue;
|
|
404
454
|
try {
|
|
405
455
|
target.localFiles = await resolveTargetLocalFilesReadonly(target.targetId, projectRoot);
|
|
@@ -418,7 +468,7 @@ async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
|
|
|
418
468
|
...(targetPolicy.source.cliEvolve ? { cliEvolve: targetPolicy.source.cliEvolve } : {}),
|
|
419
469
|
...(targetPolicy.source.cliFreeze ? { cliFreeze: targetPolicy.source.cliFreeze } : {}),
|
|
420
470
|
},
|
|
421
|
-
targets: [...byTarget.values()].sort((left, right) => left.targetId.localeCompare(right.targetId)),
|
|
471
|
+
targets: [...byTarget.values()].sort((left, right) => (left.targetId ?? `~${left.targetKind}`).localeCompare(right.targetId ?? `~${right.targetKind}`)),
|
|
422
472
|
};
|
|
423
473
|
}
|
|
424
474
|
function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, options) {
|
|
@@ -463,7 +513,9 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
|
|
|
463
513
|
}
|
|
464
514
|
else {
|
|
465
515
|
for (const target of evolutionPreview.targets) {
|
|
466
|
-
lines.push(
|
|
516
|
+
lines.push(target.targetId === null
|
|
517
|
+
? `- Target: (${target.targetKind}, unpinned — needs --evolve-target)`
|
|
518
|
+
: `- Target: ${target.targetId} (${target.targetKind})`);
|
|
467
519
|
// Prefer the CONCRETE local file the writer would edit; fall back to the
|
|
468
520
|
// registry source path. Only a genuinely kind-only/ambiguous group with no
|
|
469
521
|
// resolvable file shows the 'not pinned' notice.
|
|
@@ -484,7 +536,7 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
|
|
|
484
536
|
// A "concrete" target is one pinned to a registered canonical id (not a
|
|
485
537
|
// kind-only `<kind>:unspecified` group). The host agent authors edits.json's
|
|
486
538
|
// full new content for that target's resolved LOCAL file.
|
|
487
|
-
const concreteTargets = evolutionPreview.targets.filter((target) =>
|
|
539
|
+
const concreteTargets = evolutionPreview.targets.filter((target) => target.targetId !== null);
|
|
488
540
|
const concreteTarget = concreteTargets.length > 0 ? concreteTargets[0] : undefined;
|
|
489
541
|
if (hintsPath && concreteTarget) {
|
|
490
542
|
lines.push(`- Hints written: ${hintsPath}`);
|
|
@@ -494,7 +546,22 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
|
|
|
494
546
|
}
|
|
495
547
|
else if (hintsPath) {
|
|
496
548
|
lines.push(`- Hints written: ${hintsPath}`);
|
|
497
|
-
|
|
549
|
+
// A kind-only (`<kind>:unspecified`) hint can't be promoted by
|
|
550
|
+
// evolve-from-edits until it is pinned to ONE concrete target. The remedy is
|
|
551
|
+
// to NAME a single target via --evolve-target — NOT to "widen the policy"
|
|
552
|
+
// (widening keeps several same-kind targets evolvable, so the hint stays
|
|
553
|
+
// unpinned). List the registered candidates so the operator can pick one.
|
|
554
|
+
const candidates = [
|
|
555
|
+
...new Set(evolutionPreview.targets
|
|
556
|
+
.filter((target) => target.targetId === null)
|
|
557
|
+
.flatMap((target) => target.candidateTargetIds)),
|
|
558
|
+
];
|
|
559
|
+
if (candidates.length > 0) {
|
|
560
|
+
lines.push(`- No single concrete target resolved yet. Pin one by re-running with --evolve-target <id> (candidates: ${candidates.join(', ')}); then evolve-from-edits can consume the hint.`);
|
|
561
|
+
}
|
|
562
|
+
else {
|
|
563
|
+
lines.push('- No single concrete target resolved yet; add evidence so a specific file can be pinned for evolve-from-edits.');
|
|
564
|
+
}
|
|
498
565
|
}
|
|
499
566
|
else if (evolutionPreview.targets.length > 0) {
|
|
500
567
|
lines.push(`- Persist the optimization evidence: synergyspec-selfevolving learn "${report.changeName}" --persist-hints${renderTargetArgs(options)}`);
|
|
@@ -179,6 +179,8 @@ export interface AutoEvolveReport {
|
|
|
179
179
|
changeNames: string[];
|
|
180
180
|
/** Mean per-change loss (functional ⊕ health) from learn; null when unmeasurable. */
|
|
181
181
|
loss: number | null;
|
|
182
|
+
/** Mean RAW code-health penalty across the change(s); null when no health signal. */
|
|
183
|
+
healthPenalty?: number | null;
|
|
182
184
|
hintCount: number;
|
|
183
185
|
hintsPaths: string[];
|
|
184
186
|
proposed: string[];
|
|
@@ -237,8 +239,19 @@ export interface RunEvolveFromEditsOptions {
|
|
|
237
239
|
*/
|
|
238
240
|
generateReport?: (changeName: string) => Promise<LearnReport>;
|
|
239
241
|
}
|
|
242
|
+
/**
|
|
243
|
+
* Typed, machine-readable result of an evolve-from-edits run. Refusals (the
|
|
244
|
+
* `refused-*` values) are LEGITIMATE non-promotions and keep `exitCode: 0` so the
|
|
245
|
+
* autonomous learn skill treats them as "safe, move on"; only the `error-*` values
|
|
246
|
+
* are non-zero. This is additive to the human-facing `reason`/`error` strings — it
|
|
247
|
+
* lets callers (and `status`) tell "did it promote, and if not, was that a safe
|
|
248
|
+
* refusal or a defect?" without parsing prose.
|
|
249
|
+
*/
|
|
250
|
+
export type EvolveFromEditsOutcome = 'promoted' | 'refused-no-surviving-hint' | 'refused-static-gate' | 'refused-unverified-evidence' | 'refused-auto-promote-declined' | 'refused-health-regression' | 'error-unknown-target' | 'error-bad-input' | 'error-runtime';
|
|
240
251
|
export interface EvolveFromEditsReport {
|
|
241
252
|
exitCode: number;
|
|
253
|
+
/** Typed result. `refused-*` ⇒ exitCode 0 (safe); `error-*` ⇒ non-zero. */
|
|
254
|
+
outcome: EvolveFromEditsOutcome;
|
|
242
255
|
targetId: string;
|
|
243
256
|
/** The host candidate that was packaged (the gate/promote subject). */
|
|
244
257
|
candidateId: string | null;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as fs from 'node:fs';
|
|
2
2
|
import * as path from 'node:path';
|
|
3
3
|
import fastGlob from 'fast-glob';
|
|
4
|
-
import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, generateEvolutionHints, persistLearnHints, readCandidateFitness, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, groupCandidatesByTarget, rankCandidatesForTarget, makeReplayRunChange, scoreCandidatesByReplay, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, runCanonicalProposerAgent, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, readTemplateVariantManifest, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, selectTemplateVariant, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
|
|
4
|
+
import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, generateEvolutionHints, persistLearnHints, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, groupCandidatesByTarget, rankCandidatesForTarget, makeReplayRunChange, scoreCandidatesByReplay, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, runCanonicalProposerAgent, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, readTemplateVariantManifest, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, selectTemplateVariant, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
|
|
5
5
|
import { generateLearnReport } from '../core/learn.js';
|
|
6
6
|
import { resolveMetricSource } from '../core/fitness/index.js';
|
|
7
7
|
import { validateChangeExists, validateSchemaExists } from './workflow/shared.js';
|
|
@@ -957,6 +957,7 @@ export async function runAutoEvolve(args, opts) {
|
|
|
957
957
|
// and several aggregate a recurring signal across them. Each change's hints are
|
|
958
958
|
// persisted; a failed change is skipped, not fatal.
|
|
959
959
|
const losses = [];
|
|
960
|
+
const healthSignals = [];
|
|
960
961
|
const hintsPaths = [];
|
|
961
962
|
let totalHints = 0;
|
|
962
963
|
for (const changeName of args.changeNames) {
|
|
@@ -974,6 +975,9 @@ export async function runAutoEvolve(args, opts) {
|
|
|
974
975
|
const l = learnReport.fitnessSample?.loss?.loss;
|
|
975
976
|
if (typeof l === 'number')
|
|
976
977
|
losses.push(l);
|
|
978
|
+
const h = learnReport.fitnessSample?.healthSignal;
|
|
979
|
+
if (typeof h === 'number')
|
|
980
|
+
healthSignals.push(h);
|
|
977
981
|
const hints = generateEvolutionHints(learnReport, policy);
|
|
978
982
|
totalHints += hints.length;
|
|
979
983
|
if (hints.length === 0)
|
|
@@ -981,6 +985,13 @@ export async function runAutoEvolve(args, opts) {
|
|
|
981
985
|
hintsPaths.push(await persistLearnHints({ projectRoot: opts.repoRoot, changeName, hints, now }));
|
|
982
986
|
}
|
|
983
987
|
report.loss = losses.length > 0 ? losses.reduce((a, b) => a + b, 0) / losses.length : null;
|
|
988
|
+
// Mean RAW health signal across the change(s); null when none were measured
|
|
989
|
+
// (stub source / no signal) ⇒ the health gate below cannot fire.
|
|
990
|
+
const meanHealth = healthSignals.length > 0
|
|
991
|
+
? healthSignals.reduce((a, b) => a + b, 0) / healthSignals.length
|
|
992
|
+
: null;
|
|
993
|
+
report.healthPenalty = meanHealth;
|
|
994
|
+
const healthBaseline = await readHealthBaseline(opts.repoRoot);
|
|
984
995
|
report.hintCount = totalHints;
|
|
985
996
|
report.hintsPaths = hintsPaths;
|
|
986
997
|
if (hintsPaths.length === 0) {
|
|
@@ -1067,6 +1078,8 @@ export async function runAutoEvolve(args, opts) {
|
|
|
1067
1078
|
meanLoss: fitness.meanLoss,
|
|
1068
1079
|
baselineLoss: report.loss,
|
|
1069
1080
|
requireProvenImprovement: args.requireProven === true,
|
|
1081
|
+
healthPenalty: meanHealth,
|
|
1082
|
+
baselineHealthPenalty: healthBaseline?.healthPenalty ?? null,
|
|
1070
1083
|
});
|
|
1071
1084
|
if (!autoPromote) {
|
|
1072
1085
|
report.skipped.push({
|
|
@@ -1097,6 +1110,16 @@ export async function runAutoEvolve(args, opts) {
|
|
|
1097
1110
|
});
|
|
1098
1111
|
}
|
|
1099
1112
|
}
|
|
1113
|
+
// Record the accepted health as the new per-repo baseline (best-effort) when
|
|
1114
|
+
// this run promoted something and had a real health signal. The next run's
|
|
1115
|
+
// health gate compares against this value.
|
|
1116
|
+
if (report.promoted.length > 0 && meanHealth != null) {
|
|
1117
|
+
await writeHealthBaseline(opts.repoRoot, {
|
|
1118
|
+
healthPenalty: meanHealth,
|
|
1119
|
+
updatedAt: now().toISOString(),
|
|
1120
|
+
sourceChange: args.changeNames.join(','),
|
|
1121
|
+
});
|
|
1122
|
+
}
|
|
1100
1123
|
finishAutoEvolve(report, args.json, stdout, stderr);
|
|
1101
1124
|
return report;
|
|
1102
1125
|
}
|
|
@@ -1156,6 +1179,9 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1156
1179
|
((changeName) => generateLearnReport({ projectRoot: opts.repoRoot, changeName }));
|
|
1157
1180
|
const report = {
|
|
1158
1181
|
exitCode: 0,
|
|
1182
|
+
// Loud fallback: a terminal path that forgets to set its outcome surfaces as
|
|
1183
|
+
// an error rather than a silent (and wrong) success.
|
|
1184
|
+
outcome: 'error-runtime',
|
|
1159
1185
|
targetId: args.evolveTarget,
|
|
1160
1186
|
candidateId: null,
|
|
1161
1187
|
gatePassed: false,
|
|
@@ -1163,9 +1189,21 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1163
1189
|
promotedFiles: [],
|
|
1164
1190
|
loss: null,
|
|
1165
1191
|
};
|
|
1166
|
-
|
|
1192
|
+
// The change this run pertains to (derived from the hints path), used to write
|
|
1193
|
+
// the machine-readable evolution-result.json that `status` surfaces.
|
|
1194
|
+
const changeName = changeNameFromHints(args.fromLearn);
|
|
1195
|
+
const persist = () => persistEvolutionResult(opts.repoRoot, changeName, report, now);
|
|
1196
|
+
const fail = (code, outcome, message,
|
|
1197
|
+
// Pure CLI-misuse refusals (bad flags / unregistered target) do NOT write a
|
|
1198
|
+
// per-change evolution-result.json — they are not a defect of the change, and
|
|
1199
|
+
// a fat-fingered flag should not leave a durable `error-*` record that `status`
|
|
1200
|
+
// surfaces. Only paths that actually attempted the evolution persist a record.
|
|
1201
|
+
persistRecord = true) => {
|
|
1167
1202
|
report.exitCode = code;
|
|
1203
|
+
report.outcome = outcome;
|
|
1168
1204
|
report.error = message;
|
|
1205
|
+
if (persistRecord)
|
|
1206
|
+
persist();
|
|
1169
1207
|
if (args.json) {
|
|
1170
1208
|
stdout(JSON.stringify(report, null, 2));
|
|
1171
1209
|
}
|
|
@@ -1177,10 +1215,25 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1177
1215
|
// Non-interactive contract: --yes is required (mirrors auto-evolve's one-button
|
|
1178
1216
|
// confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
|
|
1179
1217
|
if (args.agent) {
|
|
1180
|
-
return fail(2, '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.');
|
|
1218
|
+
return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.', false);
|
|
1181
1219
|
}
|
|
1182
1220
|
if (!args.yes) {
|
|
1183
|
-
return fail(2, '--yes is required: evolve-from-edits promotes onto your local files non-interactively.');
|
|
1221
|
+
return fail(2, 'error-bad-input', '--yes is required: evolve-from-edits promotes onto your local files non-interactively.', false);
|
|
1222
|
+
}
|
|
1223
|
+
// Reject an unregistered / kind-only sentinel target EARLY (before propose) with a
|
|
1224
|
+
// helpful list of concrete same-kind ids. An unpinned kind-only hint surfaces as
|
|
1225
|
+
// `<kind>:unspecified`, which is NOT a registered canonical target; feeding it back
|
|
1226
|
+
// here would otherwise fail late with a bare "Unknown canonical target". Exit code 1
|
|
1227
|
+
// matches the prior behavior (requireCanonicalTarget threw inside propose → exit 1).
|
|
1228
|
+
if (!lookupCanonicalTarget(args.evolveTarget)) {
|
|
1229
|
+
const kind = args.evolveTarget.includes(':') ? args.evolveTarget.split(':')[0] : '';
|
|
1230
|
+
const candidates = kind
|
|
1231
|
+
? listCanonicalTargets({ kind: kind }).map((t) => t.id)
|
|
1232
|
+
: [];
|
|
1233
|
+
const hint = candidates.length > 0
|
|
1234
|
+
? ` Concrete ${kind} targets you can pin: ${candidates.join(', ')}.`
|
|
1235
|
+
: ' Run `self-evolution targets` to list registered ids.';
|
|
1236
|
+
return fail(1, 'error-unknown-target', `--evolve-target "${args.evolveTarget}" is not a registered canonical target.${hint}`, false);
|
|
1184
1237
|
}
|
|
1185
1238
|
// 1) Read + shape-validate the host-authored edits (path or stdin).
|
|
1186
1239
|
let editsInput;
|
|
@@ -1191,7 +1244,7 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1191
1244
|
editsInput = parseHostEditsInput(raw);
|
|
1192
1245
|
}
|
|
1193
1246
|
catch (err) {
|
|
1194
|
-
return fail(2, `invalid --from-edits ${args.fromEdits}: ${err instanceof Error ? err.message : String(err)}
|
|
1247
|
+
return fail(2, 'error-bad-input', `invalid --from-edits ${args.fromEdits}: ${err instanceof Error ? err.message : String(err)}`, false);
|
|
1195
1248
|
}
|
|
1196
1249
|
const layout = resolveCandidateRepo(opts.repoRoot);
|
|
1197
1250
|
const policy = resolveTargetEvolutionPolicy({
|
|
@@ -1226,7 +1279,14 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1226
1279
|
});
|
|
1227
1280
|
}
|
|
1228
1281
|
catch (err) {
|
|
1229
|
-
|
|
1282
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1283
|
+
// After the early --evolve-target check above this is mostly unreachable for
|
|
1284
|
+
// unknown targets, but keep the discrimination as defense in depth. Exit code 1
|
|
1285
|
+
// for both (matches the prior propose-catch behavior).
|
|
1286
|
+
const outcome = message.startsWith('Unknown canonical target')
|
|
1287
|
+
? 'error-unknown-target'
|
|
1288
|
+
: 'error-runtime';
|
|
1289
|
+
return fail(1, outcome, `propose failed: ${message}`);
|
|
1230
1290
|
}
|
|
1231
1291
|
if (proposeResult.exitCode !== 0 || proposeResult.proposed.length === 0) {
|
|
1232
1292
|
// A frozen target / unknown target / >1 group / no surviving group lands here.
|
|
@@ -1235,7 +1295,11 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1235
1295
|
proposeResult.skipped.map((s) => s.reason)[0] ??
|
|
1236
1296
|
'no candidate was packaged from the host edits (target frozen, unknown, or no surviving signal)';
|
|
1237
1297
|
report.reason = why;
|
|
1238
|
-
|
|
1298
|
+
// Legitimate refusal: exitCode stays 0 so the autonomous learn skill treats it
|
|
1299
|
+
// as "safe, move on" — do NOT propagate proposeResult.exitCode (2 for the
|
|
1300
|
+
// surviving-group guard).
|
|
1301
|
+
report.outcome = 'refused-no-surviving-hint';
|
|
1302
|
+
finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
|
|
1239
1303
|
return report;
|
|
1240
1304
|
}
|
|
1241
1305
|
const candidateId = proposeResult.proposed[0].candidateId;
|
|
@@ -1249,7 +1313,7 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1249
1313
|
});
|
|
1250
1314
|
}
|
|
1251
1315
|
catch (err) {
|
|
1252
|
-
return fail(1, `gate error: ${err instanceof Error ? err.message : String(err)}`);
|
|
1316
|
+
return fail(1, 'error-runtime', `gate error: ${err instanceof Error ? err.message : String(err)}`);
|
|
1253
1317
|
}
|
|
1254
1318
|
report.gatePassed = gate.passed;
|
|
1255
1319
|
if (!gate.passed) {
|
|
@@ -1257,7 +1321,8 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1257
1321
|
.filter((f) => f.severity === 'error')
|
|
1258
1322
|
.map((f) => f.message)
|
|
1259
1323
|
.join('; ') || 'placeholder/no-op diff or frozen target'}`;
|
|
1260
|
-
|
|
1324
|
+
report.outcome = 'refused-static-gate';
|
|
1325
|
+
finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
|
|
1261
1326
|
return report;
|
|
1262
1327
|
}
|
|
1263
1328
|
// 4) OBSERVED-VERIFIED evidence + auto-promote decision. Regenerate the change's
|
|
@@ -1268,16 +1333,22 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1268
1333
|
learnReport = await generateReport(changeNameFromHints(args.fromLearn));
|
|
1269
1334
|
}
|
|
1270
1335
|
catch (err) {
|
|
1271
|
-
return fail(1, `learn report failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1336
|
+
return fail(1, 'error-runtime', `learn report failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1272
1337
|
}
|
|
1273
1338
|
report.loss = learnReport.fitnessSample?.loss?.loss ?? null;
|
|
1274
1339
|
const evidence = isEvidenceComplete(learnReport);
|
|
1275
1340
|
if (!evidence.ok) {
|
|
1276
1341
|
report.reason = `evidence not observed-verified-green: ${evidence.reason}`;
|
|
1277
|
-
|
|
1342
|
+
report.outcome = 'refused-unverified-evidence';
|
|
1343
|
+
finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
|
|
1278
1344
|
return report;
|
|
1279
1345
|
}
|
|
1280
1346
|
const fitness = await readCandidateFitness(layout, candidateId);
|
|
1347
|
+
// Default-on health gate: compare THIS change's measured health (post) against
|
|
1348
|
+
// the recorded per-repo baseline (pre). No signal (stub/analyzer failed) ⇒
|
|
1349
|
+
// healthSignal is null ⇒ the gate cannot fire (forward bet preserved).
|
|
1350
|
+
const healthSignal = learnReport.fitnessSample?.healthSignal ?? null;
|
|
1351
|
+
const baseline = await readHealthBaseline(opts.repoRoot);
|
|
1281
1352
|
const decision = shouldAutoPromote({
|
|
1282
1353
|
gatePassed: true,
|
|
1283
1354
|
targetEvolvable: isCanonicalTargetEvolvable(args.evolveTarget, policy),
|
|
@@ -1285,10 +1356,15 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1285
1356
|
meanLoss: fitness.meanLoss,
|
|
1286
1357
|
baselineLoss: report.loss,
|
|
1287
1358
|
requireProvenImprovement: args.requireProven === true,
|
|
1359
|
+
healthPenalty: healthSignal,
|
|
1360
|
+
baselineHealthPenalty: baseline?.healthPenalty ?? null,
|
|
1288
1361
|
});
|
|
1289
1362
|
if (!decision.promote) {
|
|
1290
1363
|
report.reason = `auto-promote declined: ${decision.reason}`;
|
|
1291
|
-
|
|
1364
|
+
report.outcome = decision.reason.startsWith('code-health regression')
|
|
1365
|
+
? 'refused-health-regression'
|
|
1366
|
+
: 'refused-auto-promote-declined';
|
|
1367
|
+
finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
|
|
1292
1368
|
return report;
|
|
1293
1369
|
}
|
|
1294
1370
|
// 5) PROMOTE onto the canonical LOCAL file(s).
|
|
@@ -1301,25 +1377,85 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1301
1377
|
report.promotedFiles = applied.appliedFiles.map((f) => f.file);
|
|
1302
1378
|
}
|
|
1303
1379
|
catch (err) {
|
|
1304
|
-
return fail(1, `promote failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1380
|
+
return fail(1, 'error-runtime', `promote failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1381
|
+
}
|
|
1382
|
+
// Record the accepted health as the new baseline (best-effort; never fails the
|
|
1383
|
+
// promote) — only when this run had a real health signal. The next change's
|
|
1384
|
+
// gate compares against this value.
|
|
1385
|
+
if (healthSignal != null) {
|
|
1386
|
+
await writeHealthBaseline(opts.repoRoot, {
|
|
1387
|
+
healthPenalty: healthSignal,
|
|
1388
|
+
updatedAt: now().toISOString(),
|
|
1389
|
+
sourceChange: changeNameFromHints(args.fromLearn),
|
|
1390
|
+
candidateId,
|
|
1391
|
+
});
|
|
1305
1392
|
}
|
|
1306
|
-
|
|
1393
|
+
report.outcome = 'promoted';
|
|
1394
|
+
finishEvolveFromEdits(report, args.json, stdout, stderr, persist);
|
|
1307
1395
|
return report;
|
|
1308
1396
|
}
|
|
1309
1397
|
/**
|
|
1310
|
-
* Best-effort change-name for the
|
|
1311
|
-
*
|
|
1312
|
-
*
|
|
1313
|
-
*
|
|
1398
|
+
* Best-effort change-name for the hints path passed to `--from-learn`.
|
|
1399
|
+
*
|
|
1400
|
+
* `persistLearnHints` writes the canonical handoff at
|
|
1401
|
+
* `<root>/.synergyspec-selfevolving/learn-handoffs/<change>/<timestamp>/hints.json`,
|
|
1402
|
+
* so on the REAL autonomous path the change name is the GRANDPARENT of hints.json
|
|
1403
|
+
* (the immediate parent is the timestamp). We detect that shape via the
|
|
1404
|
+
* `learn-handoffs` marker and return the grandparent; otherwise we fall back to the
|
|
1405
|
+
* immediate parent dir (the 2-level shape used by tests / hand-built paths), then
|
|
1406
|
+
* the file basename. This is the name used for BOTH the learn report and the
|
|
1407
|
+
* evolution-result.json write, so they cannot diverge.
|
|
1314
1408
|
*/
|
|
1315
1409
|
function changeNameFromHints(hintsPath) {
|
|
1316
1410
|
const abs = path.resolve(hintsPath);
|
|
1317
|
-
const
|
|
1411
|
+
const workdir = path.dirname(abs);
|
|
1412
|
+
const parent = path.basename(workdir);
|
|
1413
|
+
const grandparent = path.basename(path.dirname(workdir));
|
|
1414
|
+
// Canonical: learn-handoffs/<change>/<timestamp>/hints.json → change = grandparent.
|
|
1415
|
+
if (path.basename(path.dirname(path.dirname(workdir))) === 'learn-handoffs' && grandparent) {
|
|
1416
|
+
return grandparent;
|
|
1417
|
+
}
|
|
1318
1418
|
if (parent && parent !== '.' && parent !== path.sep)
|
|
1319
1419
|
return parent;
|
|
1320
1420
|
return path.basename(abs, path.extname(abs));
|
|
1321
1421
|
}
|
|
1322
|
-
|
|
1422
|
+
/**
|
|
1423
|
+
* Write a machine-readable evolution outcome beside the change's other evidence
|
|
1424
|
+
* (`synergyspec-selfevolving/changes/<change>/evolution-result.json`) so `status`
|
|
1425
|
+
* can surface whether self-evolution promoted, was safely refused, or hit a defect
|
|
1426
|
+
* — instead of the outcome only living in the agent-written learn-report.md.
|
|
1427
|
+
*
|
|
1428
|
+
* Best-effort: it NEVER throws (a write failure must not turn a successful promote
|
|
1429
|
+
* into an error) and it does NOT create the change dir — if the change dir is absent
|
|
1430
|
+
* (a typo'd/invalid invocation, or an already-archived change) it simply skips.
|
|
1431
|
+
*/
|
|
1432
|
+
function persistEvolutionResult(repoRoot, changeName, report, now) {
|
|
1433
|
+
try {
|
|
1434
|
+
const changeDir = path.join(repoRoot, 'synergyspec-selfevolving', 'changes', changeName);
|
|
1435
|
+
if (!fs.existsSync(changeDir))
|
|
1436
|
+
return;
|
|
1437
|
+
const record = {
|
|
1438
|
+
schemaVersion: 1,
|
|
1439
|
+
changeName,
|
|
1440
|
+
outcome: report.outcome,
|
|
1441
|
+
promoted: report.promoted,
|
|
1442
|
+
reason: report.reason ?? report.error ?? null,
|
|
1443
|
+
targetId: report.targetId,
|
|
1444
|
+
candidateId: report.candidateId,
|
|
1445
|
+
gatePassed: report.gatePassed,
|
|
1446
|
+
promotedFiles: report.promotedFiles,
|
|
1447
|
+
loss: report.loss,
|
|
1448
|
+
timestamp: now().toISOString(),
|
|
1449
|
+
tool: 'evolve-from-edits',
|
|
1450
|
+
};
|
|
1451
|
+
fs.writeFileSync(path.join(changeDir, 'evolution-result.json'), `${JSON.stringify(record, null, 2)}\n`);
|
|
1452
|
+
}
|
|
1453
|
+
catch {
|
|
1454
|
+
// best-effort: a status-annotation write must never break the evolve run.
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
function finishEvolveFromEdits(report, json, stdout, stderr, persist) {
|
|
1458
|
+
persist?.();
|
|
1323
1459
|
if (json) {
|
|
1324
1460
|
stdout(JSON.stringify(report, null, 2));
|
|
1325
1461
|
return;
|
|
@@ -57,6 +57,19 @@ export function printStatusText(status, readiness) {
|
|
|
57
57
|
else {
|
|
58
58
|
console.log(chalk.green('Evidence: complete'));
|
|
59
59
|
}
|
|
60
|
+
// Self-evolution outcome (visibility only; never gates archive-ready). A
|
|
61
|
+
// refused/failed evolution is surfaced in YELLOW so a silently-failed evolution
|
|
62
|
+
// can't hide inside an otherwise-green, archive-ready run.
|
|
63
|
+
const evolution = readiness.evolution;
|
|
64
|
+
if (evolution.status === 'promoted' && evolution.promoted) {
|
|
65
|
+
console.log(chalk.green(`Evolution: promoted -> ${evolution.targetId ?? '(target)'} (${evolution.promotedFiles.length} file(s))`));
|
|
66
|
+
}
|
|
67
|
+
else if (evolution.status === 'refused' || evolution.status === 'error' || evolution.status === 'promoted') {
|
|
68
|
+
console.log(chalk.yellow(`Evolution: ${evolution.status}${evolution.reason ? ` — ${evolution.reason}` : ''}`));
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
console.log(chalk.dim('Evolution: not run'));
|
|
72
|
+
}
|
|
60
73
|
}
|
|
61
74
|
console.log();
|
|
62
75
|
for (const artifact of status.artifacts) {
|
|
@@ -2,6 +2,13 @@ import { type ChangeStatus } from './artifact-graph/index.js';
|
|
|
2
2
|
export type ArtifactWorkflowStatus = 'complete' | 'ready' | 'in-progress' | 'blocked';
|
|
3
3
|
export type ChangeReadinessStatus = ArtifactWorkflowStatus | 'ready-to-apply';
|
|
4
4
|
export type TaskReadinessStatus = 'no-tasks' | 'complete' | 'in-progress';
|
|
5
|
+
/**
|
|
6
|
+
* Coarse, display-oriented self-evolution outcome for the change. Derived from the
|
|
7
|
+
* CLI-written `evolution-result.json` (NOT the agent-written learn-report.md). It is
|
|
8
|
+
* surfaced for visibility only — it does NOT gate `isArchiveReady` (a safe refusal
|
|
9
|
+
* must not block archiving a finished change).
|
|
10
|
+
*/
|
|
11
|
+
export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'error';
|
|
5
12
|
export interface ArtifactStatusSummary {
|
|
6
13
|
done: number;
|
|
7
14
|
ready: number;
|
|
@@ -26,6 +33,20 @@ export interface EvidenceReadiness {
|
|
|
26
33
|
testPlanRequired: boolean;
|
|
27
34
|
missing: string[];
|
|
28
35
|
}
|
|
36
|
+
export interface EvolutionOutcomeReadiness {
|
|
37
|
+
/** `'not-run'` when no `evolution-result.json` exists for the change. */
|
|
38
|
+
status: EvolutionOutcomeStatus;
|
|
39
|
+
/** Why it stopped (refusal reason or error message); absent when promoted/not-run. */
|
|
40
|
+
reason?: string;
|
|
41
|
+
/** The canonical target the run was for (when known). */
|
|
42
|
+
targetId?: string;
|
|
43
|
+
/** Whether a canonical file was actually promoted. */
|
|
44
|
+
promoted: boolean;
|
|
45
|
+
/** LOCAL file paths written on promotion (empty unless promoted). */
|
|
46
|
+
promotedFiles: string[];
|
|
47
|
+
/** ISO timestamp of the recorded run. */
|
|
48
|
+
timestamp?: string;
|
|
49
|
+
}
|
|
29
50
|
export interface ChangeReadiness {
|
|
30
51
|
changeName: string;
|
|
31
52
|
schemaName: string;
|
|
@@ -37,6 +58,8 @@ export interface ChangeReadiness {
|
|
|
37
58
|
totalTasks: number;
|
|
38
59
|
incompleteTasks: TaskItem[];
|
|
39
60
|
evidence: EvidenceReadiness;
|
|
61
|
+
/** Self-evolution outcome for the change (visibility only; does not gate archive). */
|
|
62
|
+
evolution: EvolutionOutcomeReadiness;
|
|
40
63
|
isArchiveReady: boolean;
|
|
41
64
|
artifactGraph: ChangeStatus;
|
|
42
65
|
}
|
|
@@ -54,6 +77,7 @@ export declare function toReadinessJson(readiness: ChangeReadiness): {
|
|
|
54
77
|
totalTasks: number;
|
|
55
78
|
incompleteTasks: TaskItem[];
|
|
56
79
|
evidence: EvidenceReadiness;
|
|
80
|
+
evolution: EvolutionOutcomeReadiness;
|
|
57
81
|
isArchiveReady: boolean;
|
|
58
82
|
};
|
|
59
83
|
//# sourceMappingURL=change-readiness.d.ts.map
|