synergyspec-selfevolving 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +373 -31
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +179 -786
- package/dist/commands/workflow/status.js +3 -1
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/config-prompts.js +4 -0
- package/dist/core/fitness/health/health-metrics.d.ts +26 -56
- package/dist/core/fitness/health/health-metrics.js +19 -58
- package/dist/core/fitness/health/index.d.ts +15 -2
- package/dist/core/fitness/health/index.js +25 -1
- package/dist/core/fitness/health/local-source.d.ts +43 -4
- package/dist/core/fitness/health/local-source.js +181 -25
- package/dist/core/fitness/health/metric-source.d.ts +48 -19
- package/dist/core/fitness/health/metric-source.js +8 -18
- package/dist/core/fitness/health/resolve-source.js +4 -1
- package/dist/core/fitness/loss.d.ts +7 -7
- package/dist/core/fitness/loss.js +6 -6
- package/dist/core/fitness/sample.d.ts +10 -0
- package/dist/core/fitness/test-failures.d.ts +30 -0
- package/dist/core/fitness/test-failures.js +123 -0
- package/dist/core/learn/credit-path.d.ts +36 -0
- package/dist/core/learn/credit-path.js +198 -0
- package/dist/core/learn/trajectory-discovery.d.ts +39 -0
- package/dist/core/learn/trajectory-discovery.js +140 -0
- package/dist/core/learn.d.ts +39 -5
- package/dist/core/learn.js +131 -14
- package/dist/core/project-config.d.ts +4 -0
- package/dist/core/project-config.js +52 -1
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
- package/dist/core/self-evolution/canonical-targets.js +8 -4
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/health-baseline.d.ts +25 -6
- package/dist/core/self-evolution/health-baseline.js +30 -6
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +10 -6
- package/dist/core/self-evolution/index.js +19 -6
- package/dist/core/self-evolution/learn-hints.d.ts +31 -0
- package/dist/core/self-evolution/learn-hints.js +16 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
- package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
- package/dist/core/self-evolution/proposer-agent.js +94 -13
- package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
- package/dist/core/self-evolution/proposer-slice.js +54 -0
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.d.ts +79 -0
- package/dist/core/self-evolution/success-channel.js +361 -0
- package/dist/core/self-evolution/target-evolution.d.ts +11 -0
- package/dist/core/self-evolution/target-evolution.js +2 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/skill-templates.d.ts +1 -0
- package/dist/core/templates/skill-templates.js +1 -0
- package/dist/core/templates/workflow-manifest.js +2 -0
- package/dist/core/templates/workflows/learn.d.ts +4 -2
- package/dist/core/templates/workflows/learn.js +25 -166
- package/dist/core/templates/workflows/self-evolving.d.ts +13 -0
- package/dist/core/templates/workflows/self-evolving.js +127 -0
- package/dist/core/trajectory/facts.d.ts +16 -0
- package/dist/core/trajectory/facts.js +12 -4
- package/dist/core/trajectory/skeleton.d.ts +43 -0
- package/dist/core/trajectory/skeleton.js +239 -0
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +3 -1
- package/scripts/code-health.py +1066 -638
- package/scripts/slop_rules.yaml +2151 -0
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
import * as fs from 'node:fs';
|
|
2
2
|
import * as path from 'node:path';
|
|
3
|
-
import * as crypto from 'node:crypto';
|
|
4
3
|
import fastGlob from 'fast-glob';
|
|
5
|
-
import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete,
|
|
4
|
+
import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
|
|
6
5
|
import { generateLearnReport } from '../core/learn.js';
|
|
7
|
-
import {
|
|
8
|
-
import { validateChangeExists
|
|
6
|
+
import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
|
|
7
|
+
import { validateChangeExists } from './workflow/shared.js';
|
|
9
8
|
import { readProjectConfig } from '../core/project-config.js';
|
|
9
|
+
import { attachSelfEvolutionEpisodeCommands } from './self-evolution-episode.js';
|
|
10
10
|
export function registerSelfEvolutionCommand(program) {
|
|
11
11
|
const cmd = program
|
|
12
12
|
.command('self-evolution')
|
|
13
|
-
.description('Inspect SynergySpec-SelfEvolving
|
|
13
|
+
.description('Inspect SynergySpec-SelfEvolving self-evolution signals for templates, tasks, verification, memory, and tool changes');
|
|
14
|
+
// Loop-v2 (self-evolution as in-context RL): the `episode` + `policy`
|
|
15
|
+
// subcommands.
|
|
16
|
+
attachSelfEvolutionEpisodeCommands(cmd);
|
|
14
17
|
cmd
|
|
15
18
|
.command('parts')
|
|
16
19
|
.description('List evolvable parts that can be enabled or disabled per run')
|
|
@@ -36,41 +39,6 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
36
39
|
console.log(`- ${id}: ${mark} - ${EVOLVABLE_PART_DESCRIPTIONS[id]}`);
|
|
37
40
|
}
|
|
38
41
|
});
|
|
39
|
-
cmd
|
|
40
|
-
.command('templates')
|
|
41
|
-
.description('Show selected evolving template variant for an artifact')
|
|
42
|
-
.option('--schema <name>', 'schema name', 'spec-driven')
|
|
43
|
-
.requiredOption('--artifact <id>', 'artifact id such as proposal, specs, design, or tasks')
|
|
44
|
-
.option('--evolve <parts>', 'comma-separated evolvable parts allowed for this run, or all/none')
|
|
45
|
-
.option('--evolve-deny <parts>', 'comma-separated evolvable parts disabled for this run')
|
|
46
|
-
.option('--json', 'output JSON')
|
|
47
|
-
.action(async (options) => {
|
|
48
|
-
const projectRoot = process.cwd();
|
|
49
|
-
const schemaName = validateSchemaExists(options.schema ?? 'spec-driven', projectRoot);
|
|
50
|
-
const switches = parseEvolutionSwitchOptions(options);
|
|
51
|
-
if (!isEvolutionPartEnabled(switches, 'template-variants')) {
|
|
52
|
-
printDisabled('template-variants', switches, options.json);
|
|
53
|
-
return;
|
|
54
|
-
}
|
|
55
|
-
const selection = selectTemplateVariant({
|
|
56
|
-
projectRoot,
|
|
57
|
-
schemaName,
|
|
58
|
-
artifactId: options.artifact,
|
|
59
|
-
});
|
|
60
|
-
const manifest = readTemplateVariantManifest(projectRoot);
|
|
61
|
-
if (options.json) {
|
|
62
|
-
console.log(JSON.stringify({ selection, manifest, switches }, null, 2));
|
|
63
|
-
return;
|
|
64
|
-
}
|
|
65
|
-
if (!selection) {
|
|
66
|
-
console.log(`No active project template variant for ${schemaName}/${options.artifact}.`);
|
|
67
|
-
return;
|
|
68
|
-
}
|
|
69
|
-
console.log(`${selection.schema}/${selection.artifact}: ${selection.id}`);
|
|
70
|
-
console.log(`Score: ${selection.score.toFixed(2)}`);
|
|
71
|
-
console.log(`Path: ${selection.templatePath}`);
|
|
72
|
-
console.log(`Reason: ${selection.reason}`);
|
|
73
|
-
});
|
|
74
42
|
cmd
|
|
75
43
|
.command('memory [query]')
|
|
76
44
|
.description('Retrieve similar archived change experience')
|
|
@@ -218,19 +186,6 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
218
186
|
}
|
|
219
187
|
printCanonicalTargetTable(targets);
|
|
220
188
|
});
|
|
221
|
-
cmd
|
|
222
|
-
.command('trajectory <targetId>')
|
|
223
|
-
.description('Print the OPTIMIZATION TRAJECTORY block for a canonical target — the scored history of prior candidates (loss, verdict, approach) + the promoted baseline to beat — exactly as the headless proposer sees it. Read-only; the host learn skill calls this before authoring an --from-edits improvement.')
|
|
224
|
-
.option('--max-entries <n>', 'cap the number of prior candidates shown (default 6)')
|
|
225
|
-
.option('--json', 'output { targetId, baselineLoss, baselineCandidateId, entries } JSON')
|
|
226
|
-
.action(async (targetId, options) => {
|
|
227
|
-
const result = await runTrajectoryCommand({
|
|
228
|
-
targetId,
|
|
229
|
-
maxEntries: options.maxEntries !== undefined ? Number(options.maxEntries) : undefined,
|
|
230
|
-
json: options.json,
|
|
231
|
-
}, { repoRoot: process.cwd() });
|
|
232
|
-
process.exitCode = result.exitCode;
|
|
233
|
-
});
|
|
234
189
|
cmd
|
|
235
190
|
.command('hints <jsonFile...>')
|
|
236
191
|
.description('Aggregate LearnEvolutionHint JSON files into scored candidate-request groups (no canonical files are written)')
|
|
@@ -279,30 +234,6 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
279
234
|
const result = await runPromotionReportCommand({ candidateId, write: options.write, json: options.json }, { repoRoot: process.cwd() });
|
|
280
235
|
process.exitCode = result.exitCode;
|
|
281
236
|
});
|
|
282
|
-
cmd
|
|
283
|
-
.command('evolve')
|
|
284
|
-
.description('Run the GA outer loop: group candidates by target, score by accumulated fitness (or --replay a corpus), rank/select the best, and generate a human-gated promotion report. Never auto-promotes.')
|
|
285
|
-
.option('--target <targetId>', 'restrict the loop to a single canonical target id')
|
|
286
|
-
.option('--replay', 'score candidates by replaying a change corpus through baseline + candidate (requires --change); default uses already-accumulated fitness records')
|
|
287
|
-
.option('--change <ids...>', 'change ids forming the replay corpus (only used with --replay)')
|
|
288
|
-
.option('--write', 'write a promotion-report.md for each selected best candidate (still human-gated; never auto-promotes)')
|
|
289
|
-
.option('--mark-outcompeted', 'mark proven sibling variants that lost the ranking to the best (same variant cohort, higher loss) with verdict `outcompeted` so future proposals see them as negative examples (never changes status)')
|
|
290
|
-
.option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve (supports all/none)')
|
|
291
|
-
.option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
|
|
292
|
-
.option('--json', 'output the full RunEvolveOuterLoopResult JSON')
|
|
293
|
-
.action(async (options) => {
|
|
294
|
-
const result = await runEvolveOuterLoopCommand({
|
|
295
|
-
target: options.target,
|
|
296
|
-
replay: options.replay,
|
|
297
|
-
changeIds: options.change,
|
|
298
|
-
write: options.write,
|
|
299
|
-
markOutcompeted: options.markOutcompeted,
|
|
300
|
-
evolveTarget: options.evolveTarget,
|
|
301
|
-
freezeTarget: options.freezeTarget,
|
|
302
|
-
json: options.json,
|
|
303
|
-
}, { repoRoot: process.cwd() });
|
|
304
|
-
process.exitCode = result.exitCode;
|
|
305
|
-
});
|
|
306
237
|
cmd
|
|
307
238
|
.command('propose-canonical')
|
|
308
239
|
.description('Convert aggregated learn-stage hints into candidate package directories (proposal only; no canonical file is modified)')
|
|
@@ -313,9 +244,7 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
313
244
|
.option('--source <source>', `attribution source recorded on each candidate (one of: ${CANONICAL_CANDIDATE_SOURCES.join(', ')}); defaults to 'learn-threshold'`)
|
|
314
245
|
.option('--dry-run', 'build candidate package contents in memory but do not write to disk')
|
|
315
246
|
.option('--force', 'no effect in v1; the writer never overwrites existing candidates')
|
|
316
|
-
.option('--from-edits <file>', "validate + package candidate edits the HOST agent already wrote (JSON { targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin).
|
|
317
|
-
.option('--agent', 'HEADLESS FALLBACK ONLY (cron/CI with no host agent): spawn the proposer agent to draft the diff. Prefer --from-edits when running inside a host code agent')
|
|
318
|
-
.option('--variants <n>', 'draft N competing variant candidates (1-5; default 1) for the surviving group, each on a distinct improvement angle, so the GA outer loop can select the best. Requires --agent.')
|
|
247
|
+
.option('--from-edits <file>', "validate + package candidate edits the HOST agent already wrote (JSON { targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin). The host code agent authors the diff; requires exactly one surviving group")
|
|
319
248
|
.option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve this run (supports all/none)')
|
|
320
249
|
.option('--freeze-target <ids>', 'comma-separated canonical target ids frozen this run (supports all/none)')
|
|
321
250
|
.option('--json', 'output JSON summary')
|
|
@@ -336,11 +265,6 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
336
265
|
// here we only resolve the file/stdin and shape-validate the payload.
|
|
337
266
|
let editsInput;
|
|
338
267
|
if (options.fromEdits) {
|
|
339
|
-
if (options.agent) {
|
|
340
|
-
process.stderr.write('error: --from-edits and --agent are mutually exclusive\n');
|
|
341
|
-
process.exitCode = 2;
|
|
342
|
-
return;
|
|
343
|
-
}
|
|
344
268
|
let raw;
|
|
345
269
|
try {
|
|
346
270
|
raw =
|
|
@@ -369,8 +293,6 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
369
293
|
source: options.source,
|
|
370
294
|
dryRun: options.dryRun,
|
|
371
295
|
force: options.force,
|
|
372
|
-
agent: options.agent,
|
|
373
|
-
variants: options.variants !== undefined ? Number(options.variants) : undefined,
|
|
374
296
|
editsInput,
|
|
375
297
|
evolveTarget: options.evolveTarget,
|
|
376
298
|
freezeTarget: options.freezeTarget,
|
|
@@ -406,30 +328,6 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
406
328
|
const result = await runRejectCommand({ candidateId, reason: options.reason, json: options.json }, { repoRoot: process.cwd() });
|
|
407
329
|
process.exitCode = result.exitCode;
|
|
408
330
|
});
|
|
409
|
-
cmd
|
|
410
|
-
.command('auto-evolve')
|
|
411
|
-
.description('ONE-BUTTON self-evolution: run learn -> hints -> propose(--agent) -> gate -> promote for a change, auto-applying the gate-passing winner onto the canonical template (no per-change approval). Honors the per-target switch + oracle freeze; snapshots every change for rollback.')
|
|
412
|
-
.requiredOption('--change <names...>', 'one or more completed changes to learn from and evolve (a SINGLE one is enough)')
|
|
413
|
-
.option('--no-auto', 'run the full pipeline but stop BEFORE applying (gate + select only)')
|
|
414
|
-
.option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
|
|
415
|
-
.option('--min-occurrences <n>', 'min occurrences a signal must reach to evolve (default 1; raise + pass several --change to require recurrence across them)')
|
|
416
|
-
.option('--threshold-key <key>', 'force-propose only this aggregated signal (bypasses the recurrence trigger)')
|
|
417
|
-
.option('--evolve-target <ids>', 'comma-separated canonical target ids allowed to evolve (supports all/none)')
|
|
418
|
-
.option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
|
|
419
|
-
.option('--json', 'output the full AutoEvolveReport JSON')
|
|
420
|
-
.action(async (options) => {
|
|
421
|
-
const result = await runAutoEvolve({
|
|
422
|
-
changeNames: options.change,
|
|
423
|
-
auto: options.auto,
|
|
424
|
-
requireProven: options.requireProven,
|
|
425
|
-
minOccurrences: options.minOccurrences !== undefined ? Number(options.minOccurrences) : undefined,
|
|
426
|
-
thresholdKey: options.thresholdKey,
|
|
427
|
-
evolveTarget: options.evolveTarget,
|
|
428
|
-
freezeTarget: options.freezeTarget,
|
|
429
|
-
json: options.json,
|
|
430
|
-
}, { repoRoot: process.cwd() });
|
|
431
|
-
process.exitCode = result.exitCode;
|
|
432
|
-
});
|
|
433
331
|
cmd
|
|
434
332
|
.command('evolve-from-edits')
|
|
435
333
|
.description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns the proposer; --agent is refused.')
|
|
@@ -439,22 +337,74 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
439
337
|
.option('--freeze-target <ids>', 'comma-separated canonical target ids frozen (supports all/none)')
|
|
440
338
|
.option('--threshold-key <key>', 'when the learn handoff aggregates into several change-type groups for the target, force only this aggregated signal (copy one key from the refusal list)')
|
|
441
339
|
.option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
|
|
340
|
+
.option('--transcript <path>', 'Explicit transcript .jsonl to grade (bypasses change-window discovery; Claude transcript store only)')
|
|
341
|
+
.option('--session-id <id>', 'Explicit Claude session id to grade (bypasses change-window discovery; Claude transcript store only)')
|
|
442
342
|
.option('--agent', 'REFUSED: this path is host-authored and never spawns the proposer')
|
|
443
343
|
.option('--yes', 'required: confirm the non-interactive auto-promote')
|
|
444
344
|
.option('--json', 'output the full EvolveFromEditsReport JSON')
|
|
445
345
|
.action(async (options) => {
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
346
|
+
// USER-TYPED handle flags are validated up front and fail LOUD
|
|
347
|
+
// (exit 1) on a miss — unlike the env-var channel, which keeps the
|
|
348
|
+
// fail-closed refusal semantics inside discovery (empty result, the
|
|
349
|
+
// observed-verified gate refuses). Validated BEFORE the env is mutated
|
|
350
|
+
// below so a bad flag never leaks into the environment.
|
|
351
|
+
const handleError = await validateExplicitTrajectoryHandle({
|
|
352
|
+
projectRoot: process.cwd(),
|
|
353
|
+
transcriptPath: options.transcript,
|
|
354
|
+
sessionId: options.sessionId,
|
|
355
|
+
});
|
|
356
|
+
if (handleError) {
|
|
357
|
+
if (options.json) {
|
|
358
|
+
console.log(JSON.stringify({ error: handleError }, null, 2));
|
|
359
|
+
}
|
|
360
|
+
else {
|
|
361
|
+
console.error(`Error: ${handleError}`);
|
|
362
|
+
}
|
|
363
|
+
process.exitCode = 1;
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
// Explicit trajectory handle: surfaced to the discovery layer via env
|
|
367
|
+
// (the observed-verified gate re-grades the change inside
|
|
368
|
+
// runEvolveFromEdits), kept strictly in the action layer so the
|
|
369
|
+
// injected-generateReport test seam stays byte-identical.
|
|
370
|
+
const prevTranscriptEnv = process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT;
|
|
371
|
+
const prevSessionEnv = process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID;
|
|
372
|
+
if (options.transcript)
|
|
373
|
+
process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT = options.transcript;
|
|
374
|
+
if (options.sessionId)
|
|
375
|
+
process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID = options.sessionId;
|
|
376
|
+
try {
|
|
377
|
+
const result = await runEvolveFromEdits({
|
|
378
|
+
fromLearn: options.fromLearn,
|
|
379
|
+
evolveTarget: options.evolveTarget,
|
|
380
|
+
fromEdits: options.fromEdits,
|
|
381
|
+
thresholdKey: options.thresholdKey,
|
|
382
|
+
freezeTarget: options.freezeTarget,
|
|
383
|
+
requireProven: options.requireProven,
|
|
384
|
+
agent: options.agent,
|
|
385
|
+
yes: options.yes,
|
|
386
|
+
json: options.json,
|
|
387
|
+
}, { repoRoot: process.cwd() });
|
|
388
|
+
process.exitCode = result.exitCode;
|
|
389
|
+
}
|
|
390
|
+
finally {
|
|
391
|
+
if (options.transcript) {
|
|
392
|
+
if (prevTranscriptEnv === undefined) {
|
|
393
|
+
delete process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT;
|
|
394
|
+
}
|
|
395
|
+
else {
|
|
396
|
+
process.env.SYNERGYSPEC_SELFEVOLVING_TRANSCRIPT = prevTranscriptEnv;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
if (options.sessionId) {
|
|
400
|
+
if (prevSessionEnv === undefined) {
|
|
401
|
+
delete process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID;
|
|
402
|
+
}
|
|
403
|
+
else {
|
|
404
|
+
process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID = prevSessionEnv;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
458
408
|
});
|
|
459
409
|
}
|
|
460
410
|
/**
|
|
@@ -548,37 +498,13 @@ export function parseHostEditsInput(raw) {
|
|
|
548
498
|
*
|
|
549
499
|
* SAFETY:
|
|
550
500
|
* - Never writes outside `<repoRoot>/.synergyspec-selfevolving/self-evolution/candidates/`.
|
|
551
|
-
* - Generation is
|
|
552
|
-
*
|
|
553
|
-
* the placeholder for a human to complete.
|
|
501
|
+
* - Generation is the host-agent `--from-edits` path (validate + package); when
|
|
502
|
+
* absent, diff.patch is the placeholder for a human to complete.
|
|
554
503
|
*/
|
|
555
504
|
export async function runProposeCanonical(args, opts) {
|
|
556
505
|
const stdout = opts.stdout ?? ((line) => console.log(line));
|
|
557
506
|
const stderr = opts.stderr ?? ((line) => console.error(line));
|
|
558
507
|
const now = opts.now ?? (() => new Date());
|
|
559
|
-
// Host-authored edits and the headless proposer are mutually exclusive. The
|
|
560
|
-
// CLI action also rejects the combo, but mirror it here so both entry points
|
|
561
|
-
// share one contract (a programmatic caller can't silently get host-precedence).
|
|
562
|
-
if (args.editsInput && args.agent) {
|
|
563
|
-
stderr('--from-edits and --agent are mutually exclusive');
|
|
564
|
-
return {
|
|
565
|
-
exitCode: 2,
|
|
566
|
-
proposed: [],
|
|
567
|
-
skipped: [],
|
|
568
|
-
errors: ['--from-edits and --agent are mutually exclusive'],
|
|
569
|
-
};
|
|
570
|
-
}
|
|
571
|
-
// Population-based generation: clamp variants to 1-5. N>1 requires the headless
|
|
572
|
-
// proposer (divergence is prompt-side, one prompt per variant); it is invalid
|
|
573
|
-
// with --from-edits (one host payload = one candidate) and pointless for the
|
|
574
|
-
// placeholder path (N identical placeholders).
|
|
575
|
-
const requestedVariants = Number.isFinite(args.variants) ? Math.trunc(args.variants) : 1;
|
|
576
|
-
const variantCount = Math.max(1, Math.min(5, requestedVariants || 1));
|
|
577
|
-
if (variantCount > 1 && !args.agent) {
|
|
578
|
-
const msg = '--variants > 1 requires --agent (variant divergence is prompt-side; --from-edits carries one payload).';
|
|
579
|
-
stderr(msg);
|
|
580
|
-
return { exitCode: 2, proposed: [], skipped: [], errors: [msg] };
|
|
581
|
-
}
|
|
582
508
|
const filePaths = args.fromLearn ?? [];
|
|
583
509
|
// 1) Resolve & validate --target up front (deterministic throw on unknown).
|
|
584
510
|
if (args.target !== undefined) {
|
|
@@ -650,7 +576,7 @@ export async function runProposeCanonical(args, opts) {
|
|
|
650
576
|
? { ...hint, affectedTargetId: pinId, thresholdKey: `${pinId}:${hint.proposedChangeType}` }
|
|
651
577
|
: hint;
|
|
652
578
|
});
|
|
653
|
-
// 4) Aggregate. `aggregationOptions` lets
|
|
579
|
+
// 4) Aggregate. `aggregationOptions` lets the host evolve-from-edits path act on a single change
|
|
654
580
|
// (one forward pass = one loss); omitted = conservative cross-change defaults.
|
|
655
581
|
const allGroups = aggregateLearnEvolutionHints(scopedHints, args.aggregationOptions);
|
|
656
582
|
// 5) Filter.
|
|
@@ -791,170 +717,112 @@ export async function runProposeCanonical(args, opts) {
|
|
|
791
717
|
const evalPlanSummary = evalPlanMd;
|
|
792
718
|
const rationaleSummary = rationaleMd;
|
|
793
719
|
const riskSummary = riskReportMd;
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
:
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
// the CLI only validates (frozen + target scope) and packages it.
|
|
840
|
-
// • HEADLESS FALLBACK (`--agent`): spawn the proposer agent (cron/CI with
|
|
841
|
-
// no host agent).
|
|
842
|
-
// Both are skipped under --dry-run so a dry run never touches disk (resolving
|
|
843
|
-
// local files can MATERIALIZE a template default). Edits are validated against
|
|
844
|
-
// the target's resolved LOCAL files and rejected if they touch a frozen/gate-
|
|
845
|
-
// defining file. Any failure (no-op, invalid output, missing binary) falls
|
|
846
|
-
// back to the placeholder so the candidate is still written for a human.
|
|
847
|
-
const useHostEdits = !!args.editsInput && targetIds.length > 0 && !args.dryRun;
|
|
848
|
-
const useAgentFallback = !args.editsInput && !!args.agent && targetIds.length > 0 && !args.dryRun;
|
|
849
|
-
if (useHostEdits || useAgentFallback) {
|
|
850
|
-
const agentTarget = lookupCanonicalTarget(targetIds[0]);
|
|
851
|
-
if (agentTarget) {
|
|
852
|
-
try {
|
|
853
|
-
// Resolve to the user's LOCAL editable files (installed SKILL.md, or a
|
|
854
|
-
// materialized project-local template/schema) so evolution writes to
|
|
855
|
-
// THEIR repo — no rebuild/republish. Dev repo resolves to source.
|
|
856
|
-
const resolved = await resolveTargetLocalFiles(targetIds[0], opts.repoRoot);
|
|
857
|
-
if (resolved.files.length === 0) {
|
|
858
|
-
throw new Error(`no local editable file for target ${targetIds[0]} in this repo`);
|
|
859
|
-
}
|
|
860
|
-
const allowedRel = resolved.files.map((f) => f.relPath);
|
|
861
|
-
const currentFiles = resolved.files.map((f) => ({
|
|
862
|
-
relPath: f.relPath,
|
|
863
|
-
content: f.content,
|
|
864
|
-
}));
|
|
865
|
-
// Close the feedback loop into the proposer (OPRO/AlphaEvolve-style):
|
|
866
|
-
// feed the current promoted-baseline loss + a scored history of prior
|
|
867
|
-
// candidates for this target so the agent can beat the best prior
|
|
868
|
-
// attempt and avoid rejected/high-loss approaches. Headless-agent path
|
|
869
|
-
// only (the host --from-edits path has no prompt). Best-effort: any
|
|
870
|
-
// failure degrades to no trajectory and never blocks proposing. For
|
|
871
|
-
// N>1 variants this is what makes siblings diverge from prior attempts.
|
|
872
|
-
let trajectoryContext;
|
|
873
|
-
let baselineLoss = null;
|
|
874
|
-
if (useAgentFallback) {
|
|
875
|
-
try {
|
|
876
|
-
const baseline = await readPromotedBaselineLoss(layout, targetIds[0]);
|
|
877
|
-
baselineLoss = baseline?.meanLoss ?? null;
|
|
878
|
-
const traj = await buildOptimizationTrajectory(layout, targetIds[0]);
|
|
879
|
-
trajectoryContext = renderTrajectoryBlock(traj, {
|
|
880
|
-
baselineLoss,
|
|
881
|
-
baselineCandidateId: baseline?.candidateId,
|
|
882
|
-
});
|
|
883
|
-
}
|
|
884
|
-
catch {
|
|
885
|
-
// best-effort scored history; never block proposing.
|
|
886
|
-
}
|
|
887
|
-
}
|
|
888
|
-
const out = useHostEdits
|
|
889
|
-
? packageHostEdits(args.editsInput, allowedRel, currentFiles, group, targetIds[0])
|
|
890
|
-
: await runCanonicalProposerAgent({
|
|
891
|
-
group,
|
|
892
|
-
groupHints,
|
|
893
|
-
// Bind the editable surface to the resolved LOCAL files, not the
|
|
894
|
-
// registry's package-source paths.
|
|
895
|
-
target: { ...agentTarget, files: allowedRel },
|
|
896
|
-
currentFiles,
|
|
897
|
-
trajectoryContext,
|
|
898
|
-
baselineLoss,
|
|
899
|
-
variantAngle,
|
|
900
|
-
spawn: opts.proposerSpawn,
|
|
901
|
-
binary: opts.proposerBinary,
|
|
902
|
-
});
|
|
903
|
-
diffPatch = out.diffPatch + '\n';
|
|
904
|
-
candidate.changedFiles = out.changedFiles;
|
|
905
|
-
agentEdits = out.edits;
|
|
906
|
-
rationaleForPkg = `${out.rationale}\n\n---\n\n${rationaleMd}`;
|
|
907
|
-
}
|
|
908
|
-
catch (err) {
|
|
909
|
-
stderr(`${useHostEdits ? 'host edits' : 'proposer agent'} did not produce a diff for ${candidateId}; keeping placeholder: ${err instanceof Error ? err.message : String(err)}`);
|
|
720
|
+
const candidateId = generateCandidateId();
|
|
721
|
+
const ts = now().toISOString();
|
|
722
|
+
const candidate = {
|
|
723
|
+
id: candidateId,
|
|
724
|
+
createdAt: ts,
|
|
725
|
+
updatedAt: ts,
|
|
726
|
+
source,
|
|
727
|
+
sourceHints: [...group.hintIds],
|
|
728
|
+
sourceAggregatedKey: group.thresholdKey,
|
|
729
|
+
targetIds,
|
|
730
|
+
changedFiles: [],
|
|
731
|
+
status: 'draft',
|
|
732
|
+
expectedBenefit,
|
|
733
|
+
riskLevel: group.dominantRisk,
|
|
734
|
+
rollbackPlan: 'Delete candidate directory or transition candidate to rejected. Canonical files are unchanged because no diff is applied at proposal time.',
|
|
735
|
+
proposalSummary,
|
|
736
|
+
evalPlanSummary,
|
|
737
|
+
rationaleSummary,
|
|
738
|
+
riskSummary,
|
|
739
|
+
};
|
|
740
|
+
let diffPatch = '# No diff yet. Apply candidate change manually before invoking static gate.\n';
|
|
741
|
+
let rationaleForPkg = rationaleMd;
|
|
742
|
+
// Structured full-file edits (persisted as edits.json so the promote/apply
|
|
743
|
+
// step can write the new content deterministically). Sourced from the host
|
|
744
|
+
// code agent (`--from-edits`), the host-authored channel.
|
|
745
|
+
let agentEdits;
|
|
746
|
+
// Generate a REAL diff for this group's target (proposal-only) via the HOST
|
|
747
|
+
// path (`--from-edits`): the host code agent already wrote the new file; the
|
|
748
|
+
// CLI only validates (frozen + target scope) and packages it. Skipped under
|
|
749
|
+
// --dry-run so a dry run never touches disk (resolving local files can
|
|
750
|
+
// MATERIALIZE a template default). Edits are validated against the target's
|
|
751
|
+
// resolved LOCAL files and rejected if they touch a frozen/gate-defining
|
|
752
|
+
// file. Any failure (no-op, invalid edits) falls back to the placeholder so
|
|
753
|
+
// the candidate is still written for a human.
|
|
754
|
+
const useHostEdits = !!args.editsInput && targetIds.length > 0 && !args.dryRun;
|
|
755
|
+
if (useHostEdits) {
|
|
756
|
+
const agentTarget = lookupCanonicalTarget(targetIds[0]);
|
|
757
|
+
if (agentTarget) {
|
|
758
|
+
try {
|
|
759
|
+
// Resolve to the user's LOCAL editable files (installed SKILL.md, or a
|
|
760
|
+
// materialized project-local template/schema) so evolution writes to
|
|
761
|
+
// THEIR repo — no rebuild/republish. Dev repo resolves to source.
|
|
762
|
+
const resolved = await resolveTargetLocalFiles(targetIds[0], opts.repoRoot);
|
|
763
|
+
if (resolved.files.length === 0) {
|
|
764
|
+
throw new Error(`no local editable file for target ${targetIds[0]} in this repo`);
|
|
910
765
|
}
|
|
766
|
+
const allowedRel = resolved.files.map((f) => f.relPath);
|
|
767
|
+
const currentFiles = resolved.files.map((f) => ({
|
|
768
|
+
relPath: f.relPath,
|
|
769
|
+
content: f.content,
|
|
770
|
+
}));
|
|
771
|
+
const out = packageHostEdits(args.editsInput, allowedRel, currentFiles, group, targetIds[0]);
|
|
772
|
+
diffPatch = out.diffPatch + '\n';
|
|
773
|
+
candidate.changedFiles = out.changedFiles;
|
|
774
|
+
agentEdits = out.edits;
|
|
775
|
+
rationaleForPkg = `${out.rationale}\n\n---\n\n${rationaleMd}`;
|
|
911
776
|
}
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
// this candidate (so a placeholder is silently written instead of their edit).
|
|
915
|
-
if (args.editsInput && !useHostEdits) {
|
|
916
|
-
stderr(`--from-edits payload not applied to ${candidateId}: ${args.dryRun
|
|
917
|
-
? 'host edits are not validated under --dry-run'
|
|
918
|
-
: 'surviving group has no resolved canonical target'}.`);
|
|
919
|
-
}
|
|
920
|
-
const pkg = {
|
|
921
|
-
candidate,
|
|
922
|
-
proposalMd,
|
|
923
|
-
diffPatch,
|
|
924
|
-
rationaleMd: rationaleForPkg,
|
|
925
|
-
evalPlanMd,
|
|
926
|
-
riskReportMd,
|
|
927
|
-
...(agentEdits && agentEdits.length > 0 ? { edits: agentEdits } : {}),
|
|
928
|
-
};
|
|
929
|
-
if (args.dryRun) {
|
|
930
|
-
const dryRunPath = path.join(layout.baseDir, candidateId);
|
|
931
|
-
proposed.push({
|
|
932
|
-
candidateId,
|
|
933
|
-
path: dryRunPath,
|
|
934
|
-
targetIds,
|
|
935
|
-
riskLevel: candidate.riskLevel,
|
|
936
|
-
source,
|
|
937
|
-
});
|
|
938
|
-
if (!args.json) {
|
|
939
|
-
stdout(`[dry-run] would propose ${candidateId} (target=${targetIds.join(',') || '(none)'}, risk=${candidate.riskLevel})`);
|
|
777
|
+
catch (err) {
|
|
778
|
+
stderr(`host edits did not produce a diff for ${candidateId}; keeping placeholder: ${err instanceof Error ? err.message : String(err)}`);
|
|
940
779
|
}
|
|
941
|
-
continue;
|
|
942
780
|
}
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
781
|
+
}
|
|
782
|
+
// Tell the operator when a supplied --from-edits payload was NOT applied to
|
|
783
|
+
// this candidate (so a placeholder is silently written instead of their edit).
|
|
784
|
+
if (args.editsInput && !useHostEdits) {
|
|
785
|
+
stderr(`--from-edits payload not applied to ${candidateId}: ${args.dryRun
|
|
786
|
+
? 'host edits are not validated under --dry-run'
|
|
787
|
+
: 'surviving group has no resolved canonical target'}.`);
|
|
788
|
+
}
|
|
789
|
+
const pkg = {
|
|
790
|
+
candidate,
|
|
791
|
+
proposalMd,
|
|
792
|
+
diffPatch,
|
|
793
|
+
rationaleMd: rationaleForPkg,
|
|
794
|
+
evalPlanMd,
|
|
795
|
+
riskReportMd,
|
|
796
|
+
...(agentEdits && agentEdits.length > 0 ? { edits: agentEdits } : {}),
|
|
797
|
+
};
|
|
798
|
+
if (args.dryRun) {
|
|
799
|
+
const dryRunPath = path.join(layout.baseDir, candidateId);
|
|
800
|
+
proposed.push({
|
|
801
|
+
candidateId,
|
|
802
|
+
path: dryRunPath,
|
|
803
|
+
targetIds,
|
|
804
|
+
riskLevel: candidate.riskLevel,
|
|
805
|
+
source,
|
|
806
|
+
});
|
|
807
|
+
if (!args.json) {
|
|
808
|
+
stdout(`[dry-run] would propose ${candidateId} (target=${targetIds.join(',') || '(none)'}, risk=${candidate.riskLevel})`);
|
|
957
809
|
}
|
|
810
|
+
continue;
|
|
811
|
+
}
|
|
812
|
+
try {
|
|
813
|
+
const written = await writeCandidatePackage(layout, pkg);
|
|
814
|
+
proposed.push({
|
|
815
|
+
candidateId,
|
|
816
|
+
path: written.candidateDir,
|
|
817
|
+
targetIds,
|
|
818
|
+
riskLevel: candidate.riskLevel,
|
|
819
|
+
source,
|
|
820
|
+
});
|
|
821
|
+
}
|
|
822
|
+
catch (err) {
|
|
823
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
824
|
+
errors.push(`failed to write candidate ${candidateId}: ${message}`);
|
|
825
|
+
stderr(`Failed to write candidate ${candidateId}: ${message}`);
|
|
958
826
|
}
|
|
959
827
|
}
|
|
960
828
|
// 7) Build result + output.
|
|
@@ -988,7 +856,7 @@ export async function runProposeCanonical(args, opts) {
|
|
|
988
856
|
}
|
|
989
857
|
/**
|
|
990
858
|
* Programmatic entrypoint for `self-evolution promote <id>` — the close-the-loop
|
|
991
|
-
* apply/rollback. Exported so tests +
|
|
859
|
+
* apply/rollback. Exported so tests + the host evolve-from-edits path can drive it directly.
|
|
992
860
|
*/
|
|
993
861
|
export async function runPromoteCommand(args, opts) {
|
|
994
862
|
const stdout = opts.stdout ?? ((l) => console.log(l));
|
|
@@ -1135,314 +1003,6 @@ export async function runRejectCommand(args, opts) {
|
|
|
1135
1003
|
return { exitCode: 1, error: message };
|
|
1136
1004
|
}
|
|
1137
1005
|
}
|
|
1138
|
-
/**
|
|
1139
|
-
* Programmatic entrypoint for `self-evolution trajectory <targetId>` — a
|
|
1140
|
-
* READ-ONLY view of the scored optimization-trajectory block the headless
|
|
1141
|
-
* proposer receives, so a HOST code agent (which authors edits via
|
|
1142
|
-
* `--from-edits` and never sees that prompt) can read the same prior-candidate
|
|
1143
|
-
* loss/verdict history before authoring. Reuses the exact builder/renderer the
|
|
1144
|
-
* proposer uses. Never mutates anything.
|
|
1145
|
-
*/
|
|
1146
|
-
export async function runTrajectoryCommand(args, opts) {
|
|
1147
|
-
const stdout = opts.stdout ?? ((l) => console.log(l));
|
|
1148
|
-
const stderr = opts.stderr ?? ((l) => console.error(l));
|
|
1149
|
-
if (!lookupCanonicalTarget(args.targetId)) {
|
|
1150
|
-
const message = `Unknown canonical target: ${args.targetId}`;
|
|
1151
|
-
if (args.json)
|
|
1152
|
-
stdout(JSON.stringify({ error: message }, null, 2));
|
|
1153
|
-
else
|
|
1154
|
-
stderr(message);
|
|
1155
|
-
return { exitCode: 1, error: message };
|
|
1156
|
-
}
|
|
1157
|
-
const layout = resolveCandidateRepo(opts.repoRoot);
|
|
1158
|
-
try {
|
|
1159
|
-
const baseline = await readPromotedBaselineLoss(layout, args.targetId);
|
|
1160
|
-
const maxEntries = args.maxEntries !== undefined && Number.isFinite(args.maxEntries) && args.maxEntries > 0
|
|
1161
|
-
? Math.trunc(args.maxEntries)
|
|
1162
|
-
: undefined;
|
|
1163
|
-
const entries = await buildOptimizationTrajectory(layout, args.targetId, maxEntries !== undefined ? { maxEntries } : undefined);
|
|
1164
|
-
if (args.json) {
|
|
1165
|
-
stdout(JSON.stringify({
|
|
1166
|
-
targetId: args.targetId,
|
|
1167
|
-
baselineLoss: baseline?.meanLoss ?? null,
|
|
1168
|
-
baselineCandidateId: baseline?.candidateId ?? null,
|
|
1169
|
-
entries,
|
|
1170
|
-
}, null, 2));
|
|
1171
|
-
return { exitCode: 0 };
|
|
1172
|
-
}
|
|
1173
|
-
const block = renderTrajectoryBlock(entries, {
|
|
1174
|
-
baselineLoss: baseline?.meanLoss ?? null,
|
|
1175
|
-
baselineCandidateId: baseline?.candidateId,
|
|
1176
|
-
});
|
|
1177
|
-
if (block.trim().length === 0) {
|
|
1178
|
-
stdout(`No prior candidates with signal for ${args.targetId} yet.`);
|
|
1179
|
-
}
|
|
1180
|
-
else {
|
|
1181
|
-
stdout(block);
|
|
1182
|
-
}
|
|
1183
|
-
return { exitCode: 0 };
|
|
1184
|
-
}
|
|
1185
|
-
catch (err) {
|
|
1186
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
1187
|
-
if (args.json)
|
|
1188
|
-
stdout(JSON.stringify({ error: message }, null, 2));
|
|
1189
|
-
else
|
|
1190
|
-
stderr(`trajectory failed: ${message}`);
|
|
1191
|
-
return { exitCode: 1, error: message };
|
|
1192
|
-
}
|
|
1193
|
-
}
|
|
1194
|
-
/**
|
|
1195
|
-
* ONE-BUTTON auto-evolve: learn → hints → propose(--agent) → gate → promote, in
|
|
1196
|
-
* one motion. Auto-applies the gate-passing winner per target onto the canonical
|
|
1197
|
-
* template (no per-change human approval), honoring the per-target switch + the
|
|
1198
|
-
* oracle/gate freeze, and snapshotting every write for rollback.
|
|
1199
|
-
*
|
|
1200
|
-
* Exported + fully injectable (proposer spawn, clock, io) so it is unit-testable
|
|
1201
|
-
* without a real `claude` binary.
|
|
1202
|
-
*/
|
|
1203
|
-
export async function runAutoEvolve(args, opts) {
|
|
1204
|
-
const stdout = opts.stdout ?? ((l) => console.log(l));
|
|
1205
|
-
const stderr = opts.stderr ?? ((l) => console.error(l));
|
|
1206
|
-
const now = opts.now ?? (() => new Date());
|
|
1207
|
-
const autoPromote = args.auto !== false; // default true
|
|
1208
|
-
const layout = resolveCandidateRepo(opts.repoRoot);
|
|
1209
|
-
const policy = resolveTargetEvolutionPolicy({
|
|
1210
|
-
config: readProjectConfig(opts.repoRoot),
|
|
1211
|
-
evolveTarget: args.evolveTarget,
|
|
1212
|
-
freezeTarget: args.freezeTarget,
|
|
1213
|
-
});
|
|
1214
|
-
const report = {
|
|
1215
|
-
exitCode: 0,
|
|
1216
|
-
changeNames: args.changeNames,
|
|
1217
|
-
loss: null,
|
|
1218
|
-
hintCount: 0,
|
|
1219
|
-
hintsPaths: [],
|
|
1220
|
-
proposed: [],
|
|
1221
|
-
gated: [],
|
|
1222
|
-
promoted: [],
|
|
1223
|
-
skipped: [],
|
|
1224
|
-
};
|
|
1225
|
-
// Stage 1 — LEARN (backward pass) over EACH change: a single change is enough,
|
|
1226
|
-
// and several aggregate a recurring signal across them. Each change's hints are
|
|
1227
|
-
// persisted; a failed change is skipped, not fatal.
|
|
1228
|
-
const losses = [];
|
|
1229
|
-
const healthSignals = [];
|
|
1230
|
-
const hintsPaths = [];
|
|
1231
|
-
let totalHints = 0;
|
|
1232
|
-
for (const changeName of args.changeNames) {
|
|
1233
|
-
let learnReport;
|
|
1234
|
-
try {
|
|
1235
|
-
learnReport = await generateLearnReport({ projectRoot: opts.repoRoot, changeName });
|
|
1236
|
-
}
|
|
1237
|
-
catch (err) {
|
|
1238
|
-
report.skipped.push({
|
|
1239
|
-
candidateId: `learn:${changeName}`,
|
|
1240
|
-
reason: `learn failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
1241
|
-
});
|
|
1242
|
-
continue;
|
|
1243
|
-
}
|
|
1244
|
-
const l = learnReport.fitnessSample?.loss?.loss;
|
|
1245
|
-
if (typeof l === 'number')
|
|
1246
|
-
losses.push(l);
|
|
1247
|
-
const h = learnReport.fitnessSample?.healthSignal;
|
|
1248
|
-
if (typeof h === 'number')
|
|
1249
|
-
healthSignals.push(h);
|
|
1250
|
-
const hints = generateEvolutionHints(learnReport, policy);
|
|
1251
|
-
totalHints += hints.length;
|
|
1252
|
-
if (hints.length === 0)
|
|
1253
|
-
continue;
|
|
1254
|
-
hintsPaths.push(await persistLearnHints({ projectRoot: opts.repoRoot, changeName, hints, now }));
|
|
1255
|
-
}
|
|
1256
|
-
report.loss = losses.length > 0 ? losses.reduce((a, b) => a + b, 0) / losses.length : null;
|
|
1257
|
-
// Mean RAW health signal across the change(s); null when none were measured
|
|
1258
|
-
// (stub source / no signal) ⇒ the health gate below cannot fire.
|
|
1259
|
-
const meanHealth = healthSignals.length > 0
|
|
1260
|
-
? healthSignals.reduce((a, b) => a + b, 0) / healthSignals.length
|
|
1261
|
-
: null;
|
|
1262
|
-
report.healthPenalty = meanHealth;
|
|
1263
|
-
const healthBaseline = await readHealthBaseline(opts.repoRoot);
|
|
1264
|
-
report.hintCount = totalHints;
|
|
1265
|
-
report.hintsPaths = hintsPaths;
|
|
1266
|
-
if (hintsPaths.length === 0) {
|
|
1267
|
-
finishAutoEvolve(report, args.json, stdout, stderr, 'No evolution hints from the change(s) — nothing to evolve.');
|
|
1268
|
-
return report;
|
|
1269
|
-
}
|
|
1270
|
-
// Stage 2 — PROPOSE real candidate diffs via the proposer agent.
|
|
1271
|
-
// A single change is enough (minOccurrences defaults to 1). Raise it to require
|
|
1272
|
-
// a signal to RECUR across the provided changes before it evolves — neither
|
|
1273
|
-
// single-change nor multi-change is forced.
|
|
1274
|
-
const minOcc = args.minOccurrences && args.minOccurrences > 0 ? args.minOccurrences : 1;
|
|
1275
|
-
const proposeResult = await runProposeCanonical({
|
|
1276
|
-
fromLearn: hintsPaths,
|
|
1277
|
-
agent: true,
|
|
1278
|
-
thresholdKey: args.thresholdKey,
|
|
1279
|
-
aggregationOptions: {
|
|
1280
|
-
criticalMinOccurrences: minOcc,
|
|
1281
|
-
highMinOccurrences: minOcc,
|
|
1282
|
-
mediumOrLowMinOccurrences: minOcc,
|
|
1283
|
-
minDiversityForLowSeverity: minOcc,
|
|
1284
|
-
},
|
|
1285
|
-
evolveTarget: args.evolveTarget,
|
|
1286
|
-
freezeTarget: args.freezeTarget,
|
|
1287
|
-
}, {
|
|
1288
|
-
repoRoot: opts.repoRoot,
|
|
1289
|
-
now,
|
|
1290
|
-
proposerSpawn: opts.proposerSpawn,
|
|
1291
|
-
proposerBinary: opts.proposerBinary,
|
|
1292
|
-
stdout: () => { },
|
|
1293
|
-
stderr: (l) => stderr(l),
|
|
1294
|
-
});
|
|
1295
|
-
report.proposed = proposeResult.proposed.map((p) => p.candidateId);
|
|
1296
|
-
// Stage 4 + 5 — GATE then SELECT/PROMOTE, one target at a time.
|
|
1297
|
-
const handledTargets = new Set();
|
|
1298
|
-
for (const candidateId of report.proposed) {
|
|
1299
|
-
let gate;
|
|
1300
|
-
try {
|
|
1301
|
-
gate = await runStaticCandidateGate(layout, candidateId, {
|
|
1302
|
-
applyTransition: true,
|
|
1303
|
-
targetPolicy: policy,
|
|
1304
|
-
});
|
|
1305
|
-
}
|
|
1306
|
-
catch (err) {
|
|
1307
|
-
report.skipped.push({
|
|
1308
|
-
candidateId,
|
|
1309
|
-
reason: `gate error: ${err instanceof Error ? err.message : String(err)}`,
|
|
1310
|
-
});
|
|
1311
|
-
continue;
|
|
1312
|
-
}
|
|
1313
|
-
report.gated.push({ candidateId, passed: gate.passed });
|
|
1314
|
-
if (!gate.passed) {
|
|
1315
|
-
report.skipped.push({
|
|
1316
|
-
candidateId,
|
|
1317
|
-
reason: 'static gate failed (placeholder/no-op diff or frozen target)',
|
|
1318
|
-
});
|
|
1319
|
-
await recordVerdictBestEffort(layout, candidateId, {
|
|
1320
|
-
verdict: 'gate-failed',
|
|
1321
|
-
at: now().toISOString(),
|
|
1322
|
-
reason: 'static gate failed (placeholder/no-op diff or frozen target)',
|
|
1323
|
-
lossAtDecision: (await readCandidateFitness(layout, candidateId)).meanLoss,
|
|
1324
|
-
decidedBy: 'static-gate',
|
|
1325
|
-
});
|
|
1326
|
-
continue;
|
|
1327
|
-
}
|
|
1328
|
-
let pkg;
|
|
1329
|
-
try {
|
|
1330
|
-
pkg = await readCandidatePackage(layout, candidateId);
|
|
1331
|
-
}
|
|
1332
|
-
catch (err) {
|
|
1333
|
-
report.skipped.push({
|
|
1334
|
-
candidateId,
|
|
1335
|
-
reason: `read failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
1336
|
-
});
|
|
1337
|
-
continue;
|
|
1338
|
-
}
|
|
1339
|
-
const targetId = pkg.candidate.targetIds[0];
|
|
1340
|
-
if (!targetId) {
|
|
1341
|
-
report.skipped.push({ candidateId, reason: 'candidate has no canonical target id' });
|
|
1342
|
-
continue;
|
|
1343
|
-
}
|
|
1344
|
-
if (handledTargets.has(targetId)) {
|
|
1345
|
-
report.skipped.push({ candidateId, reason: `target ${targetId} already handled this run` });
|
|
1346
|
-
continue;
|
|
1347
|
-
}
|
|
1348
|
-
handledTargets.add(targetId);
|
|
1349
|
-
const fitness = await readCandidateFitness(layout, candidateId);
|
|
1350
|
-
const decision = shouldAutoPromote({
|
|
1351
|
-
gatePassed: true,
|
|
1352
|
-
targetEvolvable: isCanonicalTargetEvolvable(targetId, policy),
|
|
1353
|
-
accumulatedCount: fitness.count,
|
|
1354
|
-
meanLoss: fitness.meanLoss,
|
|
1355
|
-
baselineLoss: report.loss,
|
|
1356
|
-
requireProvenImprovement: args.requireProven === true,
|
|
1357
|
-
healthPenalty: meanHealth,
|
|
1358
|
-
baselineHealthPenalty: healthBaseline?.healthPenalty ?? null,
|
|
1359
|
-
});
|
|
1360
|
-
if (!autoPromote) {
|
|
1361
|
-
report.skipped.push({
|
|
1362
|
-
candidateId,
|
|
1363
|
-
reason: `--no-auto: gated & ready (would promote: ${decision.promote ? 'yes' : `no — ${decision.reason}`})`,
|
|
1364
|
-
});
|
|
1365
|
-
continue;
|
|
1366
|
-
}
|
|
1367
|
-
if (!decision.promote) {
|
|
1368
|
-
report.skipped.push({ candidateId, reason: decision.reason });
|
|
1369
|
-
await recordVerdictBestEffort(layout, candidateId, {
|
|
1370
|
-
verdict: 'declined',
|
|
1371
|
-
at: now().toISOString(),
|
|
1372
|
-
reason: decision.reason,
|
|
1373
|
-
lossAtDecision: fitness.meanLoss,
|
|
1374
|
-
baselineLoss: report.loss,
|
|
1375
|
-
decidedBy: 'auto-evolve',
|
|
1376
|
-
});
|
|
1377
|
-
continue;
|
|
1378
|
-
}
|
|
1379
|
-
try {
|
|
1380
|
-
const applied = await applyCandidatePromotion(layout, candidateId, {
|
|
1381
|
-
repoRoot: opts.repoRoot,
|
|
1382
|
-
policy,
|
|
1383
|
-
});
|
|
1384
|
-
report.promoted.push({
|
|
1385
|
-
candidateId,
|
|
1386
|
-
targetIds: applied.targetIds,
|
|
1387
|
-
files: applied.appliedFiles.map((f) => f.file),
|
|
1388
|
-
});
|
|
1389
|
-
await recordVerdictBestEffort(layout, candidateId, {
|
|
1390
|
-
verdict: 'promoted',
|
|
1391
|
-
at: now().toISOString(),
|
|
1392
|
-
reason: `auto-evolve: ${decision.reason}`,
|
|
1393
|
-
lossAtDecision: fitness.meanLoss,
|
|
1394
|
-
baselineLoss: report.loss,
|
|
1395
|
-
decidedBy: 'auto-evolve',
|
|
1396
|
-
});
|
|
1397
|
-
}
|
|
1398
|
-
catch (err) {
|
|
1399
|
-
report.skipped.push({
|
|
1400
|
-
candidateId,
|
|
1401
|
-
reason: `promote failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
1402
|
-
});
|
|
1403
|
-
}
|
|
1404
|
-
}
|
|
1405
|
-
// Record the accepted health as the new per-repo baseline (best-effort) when
|
|
1406
|
-
// this run promoted something and had a real health signal. The next run's
|
|
1407
|
-
// health gate compares against this value.
|
|
1408
|
-
if (report.promoted.length > 0 && meanHealth != null) {
|
|
1409
|
-
await writeHealthBaseline(opts.repoRoot, {
|
|
1410
|
-
healthPenalty: meanHealth,
|
|
1411
|
-
updatedAt: now().toISOString(),
|
|
1412
|
-
sourceChange: args.changeNames.join(','),
|
|
1413
|
-
});
|
|
1414
|
-
}
|
|
1415
|
-
finishAutoEvolve(report, args.json, stdout, stderr);
|
|
1416
|
-
return report;
|
|
1417
|
-
}
|
|
1418
|
-
function finishAutoEvolve(report, json, stdout, stderr, note) {
|
|
1419
|
-
if (json) {
|
|
1420
|
-
stdout(JSON.stringify(report, null, 2));
|
|
1421
|
-
return;
|
|
1422
|
-
}
|
|
1423
|
-
if (report.error) {
|
|
1424
|
-
stderr(`auto-evolve failed: ${report.error}`);
|
|
1425
|
-
return;
|
|
1426
|
-
}
|
|
1427
|
-
stdout(`Auto-evolve: ${report.changeNames.join(', ')}`);
|
|
1428
|
-
stdout(` loss (functional⊕health): ${report.loss === null ? 'n/a' : report.loss.toFixed(3)}`);
|
|
1429
|
-
stdout(` hints: ${report.hintCount}`);
|
|
1430
|
-
if (note) {
|
|
1431
|
-
stdout(` ${note}`);
|
|
1432
|
-
return;
|
|
1433
|
-
}
|
|
1434
|
-
stdout(` proposed: ${report.proposed.length} | gate-pass: ${report.gated.filter((g) => g.passed).length} | PROMOTED: ${report.promoted.length} | skipped: ${report.skipped.length}`);
|
|
1435
|
-
for (const p of report.promoted) {
|
|
1436
|
-
stdout(` ✓ PROMOTED ${p.candidateId} -> [${p.targetIds.join(', ')}]: ${p.files.join(', ')}`);
|
|
1437
|
-
}
|
|
1438
|
-
for (const s of report.skipped) {
|
|
1439
|
-
stdout(` · skipped ${s.candidateId}: ${s.reason}`);
|
|
1440
|
-
}
|
|
1441
|
-
if (report.promoted.length > 0) {
|
|
1442
|
-
stdout(' Rebuild + republish to ship the evolved template(s).');
|
|
1443
|
-
stdout(' Revert any promotion with: self-evolution promote <id> --rollback');
|
|
1444
|
-
}
|
|
1445
|
-
}
|
|
1446
1006
|
/**
|
|
1447
1007
|
* HOST-AUTHORED one-button evolve. The single non-interactive
|
|
1448
1008
|
* host-authored-edit → gate → observed-verified promote command.
|
|
@@ -1504,7 +1064,7 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1504
1064
|
}
|
|
1505
1065
|
return report;
|
|
1506
1066
|
};
|
|
1507
|
-
// Non-interactive contract: --yes is required (
|
|
1067
|
+
// Non-interactive contract: --yes is required (one-button host-authored
|
|
1508
1068
|
// confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
|
|
1509
1069
|
if (args.agent) {
|
|
1510
1070
|
return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.', false);
|
|
@@ -2122,173 +1682,6 @@ export async function runPromotionReportCommand(args, opts) {
|
|
|
2122
1682
|
}
|
|
2123
1683
|
return { exitCode: 0, report };
|
|
2124
1684
|
}
|
|
2125
|
-
/**
|
|
2126
|
-
* Programmatic entrypoint for `self-evolution evolve` — the GA outer loop.
|
|
2127
|
-
*
|
|
2128
|
-
* Chains the previously-inert pieces into one live pass:
|
|
2129
|
-
* groupCandidatesByTarget → (optional replay scoring that APPENDS fitness)
|
|
2130
|
-
* → rankCandidatesForTarget → select best → human-gated promotion report.
|
|
2131
|
-
*
|
|
2132
|
-
* Invariants: frozen targets (per the resolved policy) are skipped; promotion
|
|
2133
|
-
* is NEVER applied here (the report keeps its human-review gate); the oracle is
|
|
2134
|
-
* never touched (replay only runs tests).
|
|
2135
|
-
*/
|
|
2136
|
-
export async function runEvolveOuterLoopCommand(args, opts) {
|
|
2137
|
-
const stdout = opts.stdout ?? ((line) => console.log(line));
|
|
2138
|
-
const stderr = opts.stderr ?? ((line) => console.error(line));
|
|
2139
|
-
const layout = resolveCandidateRepo(opts.repoRoot);
|
|
2140
|
-
const config = readProjectConfig(opts.repoRoot);
|
|
2141
|
-
const policy = resolveTargetEvolutionPolicy({
|
|
2142
|
-
config,
|
|
2143
|
-
evolveTarget: args.evolveTarget,
|
|
2144
|
-
freezeTarget: args.freezeTarget,
|
|
2145
|
-
});
|
|
2146
|
-
const at = args.at ?? new Date().toISOString();
|
|
2147
|
-
let byTarget;
|
|
2148
|
-
try {
|
|
2149
|
-
byTarget = await groupCandidatesByTarget(layout);
|
|
2150
|
-
}
|
|
2151
|
-
catch (err) {
|
|
2152
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
2153
|
-
if (args.json)
|
|
2154
|
-
stdout(JSON.stringify({ error: message }, null, 2));
|
|
2155
|
-
else
|
|
2156
|
-
stderr(message);
|
|
2157
|
-
return { exitCode: 1, targets: [], error: message };
|
|
2158
|
-
}
|
|
2159
|
-
let targetIds = [...byTarget.keys()].sort();
|
|
2160
|
-
if (args.target)
|
|
2161
|
-
targetIds = targetIds.filter((t) => t === args.target);
|
|
2162
|
-
const replayMode = args.replay === true && (args.changeIds?.length ?? 0) > 0;
|
|
2163
|
-
if (args.replay === true && !replayMode) {
|
|
2164
|
-
stderr('--replay requires at least one --change <id>; falling back to accumulated fitness');
|
|
2165
|
-
}
|
|
2166
|
-
const runChange = args.runChange ??
|
|
2167
|
-
makeReplayRunChange({ repoRoot: opts.repoRoot, healthSource: resolveMetricSource(config) });
|
|
2168
|
-
const summaries = [];
|
|
2169
|
-
for (const targetId of targetIds) {
|
|
2170
|
-
const candidateIds = byTarget.get(targetId) ?? [];
|
|
2171
|
-
if (!isCanonicalTargetEvolvable(targetId, policy)) {
|
|
2172
|
-
stdout(`target ${targetId}: frozen by policy — skipped (${candidateIds.length} candidate(s))`);
|
|
2173
|
-
summaries.push({ targetId, candidateIds, frozen: true, ranked: [], best: null });
|
|
2174
|
-
continue;
|
|
2175
|
-
}
|
|
2176
|
-
let scored;
|
|
2177
|
-
if (replayMode) {
|
|
2178
|
-
scored = await scoreCandidatesByReplay({
|
|
2179
|
-
layout,
|
|
2180
|
-
candidateIds,
|
|
2181
|
-
changeIds: args.changeIds,
|
|
2182
|
-
runChange,
|
|
2183
|
-
at,
|
|
2184
|
-
log: (line) => stdout(` ${line}`),
|
|
2185
|
-
});
|
|
2186
|
-
}
|
|
2187
|
-
const ranked = await rankCandidatesForTarget(layout, candidateIds, targetId);
|
|
2188
|
-
const best = ranked.length > 0 ? ranked[0].candidateId : null;
|
|
2189
|
-
let promotionReportPath;
|
|
2190
|
-
if (best && args.write) {
|
|
2191
|
-
// Human-gated: writes the report + records its path; NEVER transitions
|
|
2192
|
-
// status / promotes. The report retains its "human review required" gate.
|
|
2193
|
-
await generatePromotionReport(layout, best, { write: true });
|
|
2194
|
-
promotionReportPath = path.join(layout.baseDir, best, 'promotion-report.md');
|
|
2195
|
-
}
|
|
2196
|
-
// Mark sibling-variant losers `outcompeted` (advisory metadata; status is
|
|
2197
|
-
// never changed). A loser is a PROVEN candidate (meanLoss !== null) in the
|
|
2198
|
-
// SAME variantGroup as `best` with a strictly higher meanLoss. The
|
|
2199
|
-
// optimization-trajectory block then renders them as negative examples.
|
|
2200
|
-
let outcompeted;
|
|
2201
|
-
if (args.markOutcompeted && best) {
|
|
2202
|
-
const bestRow = ranked.find((r) => r.candidateId === best);
|
|
2203
|
-
if (bestRow && bestRow.meanLoss !== null) {
|
|
2204
|
-
const metas = await listCandidates(layout, { targetId });
|
|
2205
|
-
const vgById = new Map(metas.map((c) => [c.id, c.variantGroup]));
|
|
2206
|
-
const bestVg = vgById.get(best);
|
|
2207
|
-
if (bestVg) {
|
|
2208
|
-
outcompeted = [];
|
|
2209
|
-
for (const r of ranked) {
|
|
2210
|
-
if (r.candidateId === best || r.meanLoss === null)
|
|
2211
|
-
continue;
|
|
2212
|
-
if (vgById.get(r.candidateId) !== bestVg)
|
|
2213
|
-
continue;
|
|
2214
|
-
if (r.meanLoss > bestRow.meanLoss) {
|
|
2215
|
-
await recordVerdictBestEffort(layout, r.candidateId, {
|
|
2216
|
-
verdict: 'outcompeted',
|
|
2217
|
-
at,
|
|
2218
|
-
reason: `lost GA ranking to ${best} (meanLoss ${r.meanLoss.toFixed(3)} vs ${bestRow.meanLoss.toFixed(3)})`,
|
|
2219
|
-
lossAtDecision: r.meanLoss,
|
|
2220
|
-
baselineLoss: bestRow.meanLoss,
|
|
2221
|
-
decidedBy: 'evolve-outer-loop',
|
|
2222
|
-
});
|
|
2223
|
-
outcompeted.push(r.candidateId);
|
|
2224
|
-
}
|
|
2225
|
-
}
|
|
2226
|
-
}
|
|
2227
|
-
}
|
|
2228
|
-
}
|
|
2229
|
-
summaries.push({
|
|
2230
|
-
targetId,
|
|
2231
|
-
candidateIds,
|
|
2232
|
-
frozen: false,
|
|
2233
|
-
scored,
|
|
2234
|
-
ranked,
|
|
2235
|
-
best,
|
|
2236
|
-
promotionReportPath,
|
|
2237
|
-
...(outcompeted ? { outcompeted } : {}),
|
|
2238
|
-
});
|
|
2239
|
-
}
|
|
2240
|
-
if (args.json) {
|
|
2241
|
-
stdout(JSON.stringify({ exitCode: 0, targets: summaries }, null, 2));
|
|
2242
|
-
}
|
|
2243
|
-
else {
|
|
2244
|
-
stdout(renderEvolveOuterLoopSummary(summaries, { replayMode, write: args.write === true }));
|
|
2245
|
-
}
|
|
2246
|
-
return { exitCode: 0, targets: summaries };
|
|
2247
|
-
}
|
|
2248
|
-
function renderEvolveOuterLoopSummary(targets, ctx) {
|
|
2249
|
-
const lines = [];
|
|
2250
|
-
lines.push('# GA outer loop');
|
|
2251
|
-
lines.push(`mode: ${ctx.replayMode ? 'replay (corpus scoring)' : 'accumulated fitness'} · promotion: human-gated (never auto-promoted)`);
|
|
2252
|
-
if (targets.length === 0) {
|
|
2253
|
-
lines.push('', 'No candidates found.');
|
|
2254
|
-
return lines.join('\n');
|
|
2255
|
-
}
|
|
2256
|
-
for (const t of targets) {
|
|
2257
|
-
lines.push('', `## ${t.targetId} (${t.candidateIds.length} candidate(s))`);
|
|
2258
|
-
if (t.frozen) {
|
|
2259
|
-
lines.push('- frozen by policy — skipped');
|
|
2260
|
-
continue;
|
|
2261
|
-
}
|
|
2262
|
-
if (t.scored) {
|
|
2263
|
-
const ok = t.scored.filter((s) => !s.error).length;
|
|
2264
|
-
const skipped = t.scored.length - ok;
|
|
2265
|
-
lines.push(`- replay: scored ${ok}, skipped ${skipped}`);
|
|
2266
|
-
for (const s of t.scored.filter((x) => x.error)) {
|
|
2267
|
-
lines.push(` - skipped ${s.candidateId}: ${s.error}`);
|
|
2268
|
-
}
|
|
2269
|
-
}
|
|
2270
|
-
for (const r of t.ranked) {
|
|
2271
|
-
const proven = r.meanLoss !== null;
|
|
2272
|
-
const detail = proven
|
|
2273
|
-
? `meanLoss=${r.meanLoss.toFixed(4)} meanPass=${(r.meanPassRate ?? 0).toFixed(4)} n=${r.count} trend=${r.trend}`
|
|
2274
|
-
: 'unproven (no fitness records yet)';
|
|
2275
|
-
lines.push(`- ${r.candidateId === t.best ? '★' : ' '} ${r.candidateId}: ${detail}`);
|
|
2276
|
-
}
|
|
2277
|
-
if (t.best) {
|
|
2278
|
-
lines.push(`- best: ${t.best}`);
|
|
2279
|
-
if (ctx.write && t.promotionReportPath) {
|
|
2280
|
-
lines.push(`- promotion report (human review required): ${t.promotionReportPath}`);
|
|
2281
|
-
}
|
|
2282
|
-
if (t.outcompeted && t.outcompeted.length > 0) {
|
|
2283
|
-
lines.push(`- marked outcompeted: ${t.outcompeted.join(', ')}`);
|
|
2284
|
-
}
|
|
2285
|
-
}
|
|
2286
|
-
else {
|
|
2287
|
-
lines.push('- best: (none)');
|
|
2288
|
-
}
|
|
2289
|
-
}
|
|
2290
|
-
return lines.join('\n');
|
|
2291
|
-
}
|
|
2292
1685
|
function parseRequireDiff(value) {
|
|
2293
1686
|
if (value === undefined)
|
|
2294
1687
|
return true;
|