synergyspec-selfevolving 2.1.2 → 2.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/learn.js +13 -3
- package/dist/commands/self-evolution-episode.d.ts +6 -1
- package/dist/commands/self-evolution-episode.js +8 -1
- package/dist/commands/self-evolution.d.ts +2 -2
- package/dist/commands/self-evolution.js +10 -10
- package/dist/commands/workflow/status.js +5 -0
- package/dist/core/change-readiness.d.ts +1 -1
- package/dist/core/change-readiness.js +66 -11
- package/dist/core/fitness/test-metrics.d.ts +33 -0
- package/dist/core/fitness/test-metrics.js +67 -0
- package/dist/core/learn.js +11 -2
- package/dist/core/project-config.d.ts +3 -0
- package/dist/core/project-config.js +7 -1
- package/dist/core/self-evolution/critic-agent.js +13 -5
- package/dist/core/self-evolution/edits-contract.d.ts +15 -5
- package/dist/core/self-evolution/edits-contract.js +26 -16
- package/dist/core/self-evolution/episode-orchestrator.d.ts +16 -9
- package/dist/core/self-evolution/episode-orchestrator.js +126 -35
- package/dist/core/self-evolution/episode-store.d.ts +34 -11
- package/dist/core/self-evolution/episode-store.js +45 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +12 -12
- package/dist/core/self-evolution/evolving-agent.js +46 -48
- package/dist/core/self-evolution/host-harness.d.ts +68 -2
- package/dist/core/self-evolution/host-harness.js +208 -21
- package/dist/core/self-evolution/policy/policy-store.d.ts +8 -6
- package/dist/core/self-evolution/policy/policy-store.js +124 -24
- package/dist/core/self-evolution/proposer-slice.d.ts +4 -3
- package/dist/core/self-evolution/reward-agent.d.ts +11 -1
- package/dist/core/self-evolution/reward-agent.js +53 -20
- package/dist/core/self-evolution/reward-aggregator.d.ts +18 -0
- package/dist/core/self-evolution/reward-aggregator.js +53 -3
- package/dist/core/self-evolution/reward-deepread.d.ts +64 -0
- package/dist/core/self-evolution/reward-deepread.js +112 -0
- package/dist/core/templates/workflows/learn.js +3 -2
- package/dist/core/templates/workflows/self-evolving.js +5 -2
- package/dist/core/trajectory/facts.d.ts +69 -2
- package/dist/core/trajectory/facts.js +179 -10
- package/dist/core/trajectory/skeleton.d.ts +10 -0
- package/dist/core/trajectory/skeleton.js +24 -3
- package/package.json +4 -3
- package/schemas/spec-driven/templates/design.md +2 -1
package/dist/commands/learn.js
CHANGED
|
@@ -5,7 +5,7 @@ import { readProjectConfig } from '../core/project-config.js';
|
|
|
5
5
|
import { assembleTrajectoryContext, } from '../core/learn/trajectory-assembler.js';
|
|
6
6
|
import { findTranscriptsForChange, resolveChangeDir, validateExplicitTrajectoryHandle, } from '../core/learn/trajectory-discovery.js';
|
|
7
7
|
import { getTrajectoryForChange } from '../core/trajectory/registry.js';
|
|
8
|
-
import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/facts.js';
|
|
8
|
+
import { toTrajectoryFacts, describeRunnerResults, extractExpectedTestPaths } from '../core/trajectory/facts.js';
|
|
9
9
|
import { toActionSkeleton } from '../core/trajectory/skeleton.js';
|
|
10
10
|
import { resolveHostHarness, resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
|
|
11
11
|
import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
|
|
@@ -301,13 +301,23 @@ export function registerLearnCommand(program, deps = {}) {
|
|
|
301
301
|
process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID = opts.sessionId;
|
|
302
302
|
try {
|
|
303
303
|
const adapterTrajectory = await getTrajectoryForChange(projectRoot, change);
|
|
304
|
+
// Change-scope guard input so debug-trajectory's facts + per-runner
|
|
305
|
+
// detail reflect the same scope demotion the loop uses (surfaces a
|
|
306
|
+
// green-but-out-of-scope graded run instead of hiding it).
|
|
307
|
+
const adapterExpectedTestPaths = extractExpectedTestPaths(await (await import('node:fs/promises'))
|
|
308
|
+
.readFile(path.join(projectRoot, 'synergyspec-selfevolving', 'changes', change, 'spec-tests.md'), 'utf8')
|
|
309
|
+
.catch(() => undefined));
|
|
304
310
|
payload.adapter = {
|
|
305
311
|
resolvedHarness: resolveHostHarness(),
|
|
306
312
|
sessionId: adapterTrajectory?.sessionId ?? null,
|
|
307
313
|
turns: adapterTrajectory?.turns.length ?? 0,
|
|
308
314
|
sourcePaths: adapterTrajectory ? [...new Set(adapterTrajectory.sourcePaths)] : [],
|
|
309
|
-
facts: toTrajectoryFacts(adapterTrajectory, change
|
|
310
|
-
|
|
315
|
+
facts: toTrajectoryFacts(adapterTrajectory, change, {
|
|
316
|
+
expectedTestPaths: adapterExpectedTestPaths,
|
|
317
|
+
}),
|
|
318
|
+
runnerResults: describeRunnerResults(adapterTrajectory, {
|
|
319
|
+
expectedTestPaths: adapterExpectedTestPaths,
|
|
320
|
+
}),
|
|
311
321
|
// Bounded play-by-play projection (file edits / test runs /
|
|
312
322
|
// commands) so a wrong skeleton is visible in one command.
|
|
313
323
|
steps: toActionSkeleton(adapterTrajectory),
|
|
@@ -89,7 +89,12 @@ export interface RunEpisodeCommandResult {
|
|
|
89
89
|
exitCode: number;
|
|
90
90
|
/** Present when the episode ran (not busy / not an error). */
|
|
91
91
|
result?: RunEpisodeResult;
|
|
92
|
-
/**
|
|
92
|
+
/**
|
|
93
|
+
* Present when the target's in-flight slot was already held by another
|
|
94
|
+
* episode. The command emits the EXACT machine outcome literal
|
|
95
|
+
* `busy-in-flight` (lowercase, hyphenated, NOT error-prefixed): a TRANSIENT,
|
|
96
|
+
* self-healing concurrency deferral, NEVER an `error-...` stop.
|
|
97
|
+
*/
|
|
93
98
|
busy?: RunEpisodeBusy;
|
|
94
99
|
error?: string;
|
|
95
100
|
}
|
|
@@ -157,7 +157,7 @@ export async function runEpisodeCommand(args, opts) {
|
|
|
157
157
|
// `.synergyspec-selfevolving/host-harness.json`, so even when the
|
|
158
158
|
// orchestrator's reward/evolving agents later spawn from an env-less Task
|
|
159
159
|
// subagent they read the seeded harness instead of defaulting to the
|
|
160
|
-
// 'claude' binary (the ydata
|
|
160
|
+
// 'claude' binary (the ydata 演进智能体 EVOLVING AGENT spawn failure).
|
|
161
161
|
const harness = await resolveHostHarnessForRepo(opts.repoRoot);
|
|
162
162
|
const episodeOptions = {
|
|
163
163
|
repoRoot: opts.repoRoot,
|
|
@@ -190,6 +190,13 @@ export async function runEpisodeCommand(args, opts) {
|
|
|
190
190
|
stdout(JSON.stringify({ exitCode: 0, busy: outcome }, null, 2));
|
|
191
191
|
}
|
|
192
192
|
else {
|
|
193
|
+
// Emit the EXACT machine outcome literal so the runner skill COPIES it
|
|
194
|
+
// verbatim into its '## Episode Verdict' block instead of INFERRING an
|
|
195
|
+
// 'error-in-flight' from prose. busy-in-flight is a TRANSIENT, self-healing
|
|
196
|
+
// concurrency deferral (another in-flight episode holds the SAME 策略
|
|
197
|
+
// POLICY target) — it is NOT error-prefixed and must never be classified as
|
|
198
|
+
// an error. The lock self-heals; recommend WAIT-AND-RETRY.
|
|
199
|
+
stdout('Outcome: busy-in-flight');
|
|
193
200
|
stdout(`Episode not started for ${targetId}: ${outcome.reason}`);
|
|
194
201
|
}
|
|
195
202
|
return { exitCode: 0, busy: outcome };
|
|
@@ -6,7 +6,7 @@ export declare function registerSelfEvolutionCommand(program: Command): void;
|
|
|
6
6
|
* Candidate edits authored by the HOST code agent (the one running the learn
|
|
7
7
|
* skill, with full repo context) and handed to the CLI via `--from-edits`. The
|
|
8
8
|
* host GENERATES the new file contents; the CLI re-validates them against the
|
|
9
|
-
* target's frozen + scoped files exactly as the headless
|
|
9
|
+
* target's frozen + scoped files exactly as the headless 演进智能体 EVOLVING AGENT path does,
|
|
10
10
|
* then packages them. This is the preferred path; `--agent` is the no-host
|
|
11
11
|
* fallback.
|
|
12
12
|
*/
|
|
@@ -225,7 +225,7 @@ export interface EvolveFromEditsReport {
|
|
|
225
225
|
* HOST-AUTHORED one-button evolve. The single non-interactive
|
|
226
226
|
* host-authored-edit → gate → observed-verified promote command.
|
|
227
227
|
*
|
|
228
|
-
* Flow (NEVER spawns
|
|
228
|
+
* Flow (NEVER spawns an agent):
|
|
229
229
|
* 1. Read `--from-edits` (path or '-') into a {@link HostEditsInput}.
|
|
230
230
|
* 2. {@link runProposeCanonical} with single-change aggregation + the host
|
|
231
231
|
* `editsInput` to PACKAGE the host candidate (proposal-only). Take
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as fs from 'node:fs';
|
|
2
2
|
import * as path from 'node:path';
|
|
3
3
|
import fastGlob from 'fast-glob';
|
|
4
|
-
import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff,
|
|
4
|
+
import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, EvolvingAgentNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
|
|
5
5
|
import { generateLearnReport } from '../core/learn.js';
|
|
6
6
|
import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
|
|
7
7
|
import { validateChangeExists } from './workflow/shared.js';
|
|
@@ -330,7 +330,7 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
330
330
|
});
|
|
331
331
|
cmd
|
|
332
332
|
.command('evolve-from-edits')
|
|
333
|
-
.description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns
|
|
333
|
+
.description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns an agent; --agent is refused.')
|
|
334
334
|
.requiredOption('--from-learn <hints.json>', 'the change\'s learn hints.json to aggregate (one signal)')
|
|
335
335
|
.requiredOption('--evolve-target <targetId>', 'the single canonical target id to evolve')
|
|
336
336
|
.requiredOption('--from-edits <file>', "JSON the host agent wrote ({ targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin)")
|
|
@@ -339,7 +339,7 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
339
339
|
.option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
|
|
340
340
|
.option('--transcript <path>', 'Explicit transcript .jsonl to grade (bypasses change-window discovery; Claude transcript store only)')
|
|
341
341
|
.option('--session-id <id>', 'Explicit Claude session id to grade (bypasses change-window discovery; Claude transcript store only)')
|
|
342
|
-
.option('--agent', 'REFUSED: this path is host-authored and never spawns
|
|
342
|
+
.option('--agent', 'REFUSED: this path is host-authored and never spawns an agent')
|
|
343
343
|
.option('--yes', 'required: confirm the non-interactive auto-promote')
|
|
344
344
|
.option('--json', 'output the full EvolveFromEditsReport JSON')
|
|
345
345
|
.action(async (options) => {
|
|
@@ -409,11 +409,11 @@ export function registerSelfEvolutionCommand(program) {
|
|
|
409
409
|
}
|
|
410
410
|
/**
|
|
411
411
|
* Validate host-authored candidate edits (the `--from-edits` path) and turn them
|
|
412
|
-
* into the same {@link CanonicalProposeOutput} shape the
|
|
413
|
-
* returns. Reuses {@link validateCandidateEdits} (frozen + target-scope checks)
|
|
412
|
+
* into the same {@link CanonicalProposeOutput} shape the 演进智能体 EVOLVING
|
|
413
|
+
* AGENT returns. Reuses {@link validateCandidateEdits} (frozen + target-scope checks)
|
|
414
414
|
* and {@link renderUnifiedDiff}, so the host path and the agent path are
|
|
415
415
|
* byte-identical in what they accept and how they package. Throws
|
|
416
|
-
* {@link
|
|
416
|
+
* {@link EvolvingAgentNoOp} when the edits change nothing.
|
|
417
417
|
*/
|
|
418
418
|
function packageHostEdits(editsInput, allowedFiles, currentFiles, group, targetId) {
|
|
419
419
|
if (editsInput.targetId && editsInput.targetId !== targetId) {
|
|
@@ -425,7 +425,7 @@ function packageHostEdits(editsInput, allowedFiles, currentFiles, group, targetI
|
|
|
425
425
|
// nothing to evolve — surface it as a no-op (placeholder), like the agent path.
|
|
426
426
|
const changesSomething = validated.some((e) => (oldByPath.get(e.relPath) ?? '') !== e.content);
|
|
427
427
|
if (!changesSomething) {
|
|
428
|
-
throw new
|
|
428
|
+
throw new EvolvingAgentNoOp();
|
|
429
429
|
}
|
|
430
430
|
const diffPatch = validated
|
|
431
431
|
.map((e) => renderUnifiedDiff(e.relPath, oldByPath.get(e.relPath) ?? '', e.content))
|
|
@@ -1007,7 +1007,7 @@ export async function runRejectCommand(args, opts) {
|
|
|
1007
1007
|
* HOST-AUTHORED one-button evolve. The single non-interactive
|
|
1008
1008
|
* host-authored-edit → gate → observed-verified promote command.
|
|
1009
1009
|
*
|
|
1010
|
-
* Flow (NEVER spawns
|
|
1010
|
+
* Flow (NEVER spawns an agent):
|
|
1011
1011
|
* 1. Read `--from-edits` (path or '-') into a {@link HostEditsInput}.
|
|
1012
1012
|
* 2. {@link runProposeCanonical} with single-change aggregation + the host
|
|
1013
1013
|
* `editsInput` to PACKAGE the host candidate (proposal-only). Take
|
|
@@ -1067,7 +1067,7 @@ export async function runEvolveFromEdits(args, opts) {
|
|
|
1067
1067
|
// Non-interactive contract: --yes is required (one-button host-authored
|
|
1068
1068
|
// confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
|
|
1069
1069
|
if (args.agent) {
|
|
1070
|
-
return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns
|
|
1070
|
+
return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns an agent.', false);
|
|
1071
1071
|
}
|
|
1072
1072
|
if (!args.yes) {
|
|
1073
1073
|
return fail(2, 'error-bad-input', '--yes is required: evolve-from-edits promotes onto your local files non-interactively.', false);
|
|
@@ -1375,7 +1375,7 @@ function renderProposalMd(group, expectedBenefit) {
|
|
|
1375
1375
|
lines.push(expectedBenefit);
|
|
1376
1376
|
lines.push('');
|
|
1377
1377
|
lines.push('## Status');
|
|
1378
|
-
lines.push('- diff.patch is intentionally empty. Apply the candidate change manually (or via a future automated
|
|
1378
|
+
lines.push('- diff.patch is intentionally empty. Apply the candidate change manually (or via a future automated agent) before invoking the static gate.');
|
|
1379
1379
|
return lines.join('\n') + '\n';
|
|
1380
1380
|
}
|
|
1381
1381
|
function renderRationaleMd(group) {
|
|
@@ -67,6 +67,11 @@ export function printStatusText(status, readiness) {
|
|
|
67
67
|
else if (evolution.status === 'refused' || evolution.status === 'error' || evolution.status === 'promoted') {
|
|
68
68
|
console.log(chalk.yellow(`Evolution: ${evolution.status}${evolution.reason ? ` — ${evolution.reason}` : ''}`));
|
|
69
69
|
}
|
|
70
|
+
else if (evolution.status === 'busy') {
|
|
71
|
+
// A transient concurrency deferral (another in-flight episode holds the
|
|
72
|
+
// 策略 POLICY target). NOT a failure and NOT 'not-run' — self-heals; retry.
|
|
73
|
+
console.log(chalk.yellow(`Evolution: busy${evolution.reason ? ` — ${evolution.reason}` : ''} (another episode is in flight; retry shortly)`));
|
|
74
|
+
}
|
|
70
75
|
else {
|
|
71
76
|
// Hyphenated to match the machine enum ('not-run', change-readiness.ts)
|
|
72
77
|
// and the critic skill's verbatim "status shows `Evolution: not-run`".
|
|
@@ -8,7 +8,7 @@ export type TaskReadinessStatus = 'no-tasks' | 'complete' | 'in-progress';
|
|
|
8
8
|
* surfaced for visibility only — it does NOT gate `isArchiveReady` (a safe refusal
|
|
9
9
|
* must not block archiving a finished change).
|
|
10
10
|
*/
|
|
11
|
-
export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'error';
|
|
11
|
+
export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'busy' | 'error';
|
|
12
12
|
export interface ArtifactStatusSummary {
|
|
13
13
|
done: number;
|
|
14
14
|
ready: number;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { promises as fs } from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import { formatChangeStatus, loadChangeContext, } from './artifact-graph/index.js';
|
|
4
|
+
import { listEpisodes } from './self-evolution/episode-store.js';
|
|
4
5
|
const TASK_PATTERN = /^[-*]\s+\[([\sx])\]\s*(.*)$/i;
|
|
5
6
|
const REQUIRED_EVIDENCE_FILES = [
|
|
6
7
|
['specTests', 'spec-tests.md'],
|
|
@@ -45,7 +46,7 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
|
|
|
45
46
|
const artifactStatus = deriveArtifactWorkflowStatus(artifactGraph);
|
|
46
47
|
const taskReadiness = await readTaskReadiness(context.changeDir);
|
|
47
48
|
const evidence = await readEvidenceReadiness(context.changeDir);
|
|
48
|
-
const evolution = await readEvolutionOutcome(context.changeDir);
|
|
49
|
+
const evolution = await readEvolutionOutcome(projectRoot, context.changeDir, changeName);
|
|
49
50
|
const status = deriveChangeReadinessStatus(artifactStatus, taskReadiness.total, taskReadiness.completed);
|
|
50
51
|
return {
|
|
51
52
|
changeName,
|
|
@@ -141,29 +142,40 @@ async function readEvidenceReadiness(changeDir) {
|
|
|
141
142
|
};
|
|
142
143
|
}
|
|
143
144
|
/**
|
|
144
|
-
* Read the CLI-written evolution outcome for the change, if any.
|
|
145
|
-
*
|
|
146
|
-
*
|
|
145
|
+
* Read the CLI-written evolution outcome for the change, if any. When the manual
|
|
146
|
+
* evolution-result file is absent, fall back to the durable loop-v2 episode store
|
|
147
|
+
* so a failed `learn --apply` / self-evolution episode is not mislabeled
|
|
148
|
+
* `not-run`. Defensive: parse errors / unknown outcomes degrade to `'not-run'`
|
|
149
|
+
* (forward compatible and never throws), so `status` can always render an
|
|
150
|
+
* Evolution line.
|
|
147
151
|
*/
|
|
148
|
-
async function readEvolutionOutcome(changeDir) {
|
|
152
|
+
async function readEvolutionOutcome(projectRoot, changeDir, changeName) {
|
|
149
153
|
const notRun = { status: 'not-run', promoted: false, promotedFiles: [] };
|
|
150
154
|
let raw;
|
|
151
155
|
try {
|
|
152
156
|
raw = await fs.readFile(path.join(changeDir, 'evolution-result.json'), 'utf-8');
|
|
153
157
|
}
|
|
154
158
|
catch {
|
|
155
|
-
return notRun;
|
|
159
|
+
return (await readLatestEpisodeOutcome(projectRoot, changeDir, changeName)) ?? notRun;
|
|
156
160
|
}
|
|
157
161
|
try {
|
|
158
162
|
const record = JSON.parse(raw);
|
|
159
163
|
const outcome = typeof record.outcome === 'string' ? record.outcome : '';
|
|
164
|
+
// `busy-in-flight` is a TRANSIENT, self-healing concurrency deferral (another
|
|
165
|
+
// in-flight episode holds the SAME 策略 POLICY target) — NOT error-prefixed
|
|
166
|
+
// and NOT a defect. It is classified as a distinct non-error 'busy' status so
|
|
167
|
+
// a reader never mistakes it for an `error-...` stop. The in-flight lock
|
|
168
|
+
// self-heals (re-acquired once the holder finishes or the stale window
|
|
169
|
+
// elapses), so the recommended posture is wait-and-retry.
|
|
160
170
|
const status = outcome === 'promoted'
|
|
161
171
|
? 'promoted'
|
|
162
|
-
: outcome
|
|
163
|
-
? '
|
|
164
|
-
: outcome.startsWith('
|
|
165
|
-
? '
|
|
166
|
-
: '
|
|
172
|
+
: outcome === 'busy-in-flight'
|
|
173
|
+
? 'busy'
|
|
174
|
+
: outcome.startsWith('refused-')
|
|
175
|
+
? 'refused'
|
|
176
|
+
: outcome.startsWith('error-')
|
|
177
|
+
? 'error'
|
|
178
|
+
: 'not-run';
|
|
167
179
|
if (status === 'not-run')
|
|
168
180
|
return notRun;
|
|
169
181
|
return {
|
|
@@ -181,6 +193,49 @@ async function readEvolutionOutcome(changeDir) {
|
|
|
181
193
|
return notRun;
|
|
182
194
|
}
|
|
183
195
|
}
|
|
196
|
+
async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
|
|
197
|
+
let episodes;
|
|
198
|
+
try {
|
|
199
|
+
episodes = await listEpisodes(projectRoot);
|
|
200
|
+
}
|
|
201
|
+
catch {
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
const resolvedChangeDir = path.resolve(changeDir);
|
|
205
|
+
const episode = episodes.find((ep) => ep.changeName === changeName || path.resolve(ep.changeDirPath) === resolvedChangeDir);
|
|
206
|
+
if (!episode)
|
|
207
|
+
return null;
|
|
208
|
+
if (episode.stage === 'errored') {
|
|
209
|
+
return {
|
|
210
|
+
status: 'error',
|
|
211
|
+
reason: episode.terminalError,
|
|
212
|
+
targetId: episode.targetId,
|
|
213
|
+
promoted: false,
|
|
214
|
+
promotedFiles: [],
|
|
215
|
+
timestamp: episode.updatedAt,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
if (episode.stage === 'evolution-refused') {
|
|
219
|
+
return {
|
|
220
|
+
status: 'refused',
|
|
221
|
+
reason: 'evolution refused',
|
|
222
|
+
targetId: episode.targetId,
|
|
223
|
+
promoted: false,
|
|
224
|
+
promotedFiles: [],
|
|
225
|
+
timestamp: episode.updatedAt,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
if (episode.stage === 'evolved') {
|
|
229
|
+
return {
|
|
230
|
+
status: 'promoted',
|
|
231
|
+
targetId: episode.targetId,
|
|
232
|
+
promoted: true,
|
|
233
|
+
promotedFiles: [],
|
|
234
|
+
timestamp: episode.updatedAt,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
return null;
|
|
238
|
+
}
|
|
184
239
|
async function testReportRequiresPlan(testReportPath) {
|
|
185
240
|
try {
|
|
186
241
|
const content = await fs.readFile(testReportPath, 'utf-8');
|
|
@@ -31,4 +31,37 @@ export interface TestMetrics {
|
|
|
31
31
|
* Returns null when no recognized summary is found.
|
|
32
32
|
*/
|
|
33
33
|
export declare function parseTestMetrics(reportText: string): TestMetrics | null;
|
|
34
|
+
/**
|
|
35
|
+
* What a runner actually COLLECTED, independent of how many passed. A green
|
|
36
|
+
* SUMMARY line ("46 passed") says nothing about WHICH tests ran — a default
|
|
37
|
+
* `pytest` can pass 46 unrelated tests while a conftest `collect_ignore`
|
|
38
|
+
* excludes the change's own tests entirely. This is the collection-scope
|
|
39
|
+
* signal the change-scope guard reads so a passing-but-irrelevant run cannot be
|
|
40
|
+
* certified as a verified success arm.
|
|
41
|
+
*/
|
|
42
|
+
export interface TestCollection {
|
|
43
|
+
/**
|
|
44
|
+
* Number of tests COLLECTED for execution. For pytest this is `collected N`
|
|
45
|
+
* minus `deselected M` (i.e. the SELECTED count); for vitest the number of
|
|
46
|
+
* matched test files. null when no collection line was recognized.
|
|
47
|
+
*/
|
|
48
|
+
collected: number | null;
|
|
49
|
+
/**
|
|
50
|
+
* Test FILE paths the runner reported collecting / running, lowercased and
|
|
51
|
+
* forward-slashed, deduped. Empty when none were itemized (a collected COUNT
|
|
52
|
+
* with no path list still populates {@link collected}). Used to intersect
|
|
53
|
+
* against the change's expected test paths.
|
|
54
|
+
*/
|
|
55
|
+
paths: string[];
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Parse a runner's COLLECTION scope from its output. Pure + dependency-free.
|
|
59
|
+
*
|
|
60
|
+
* Returns null ("no collection signal") when no recognized collection line is
|
|
61
|
+
* present — callers MUST treat that as unknown scope (no gate), never as zero.
|
|
62
|
+
* A recognized collection line with count 0 (pytest "collected 0 items",
|
|
63
|
+
* vitest "no test files found") returns `{ collected: 0, paths: [] }` — an
|
|
64
|
+
* affirmative empty-scope signal.
|
|
65
|
+
*/
|
|
66
|
+
export declare function parseTestCollection(reportText: string): TestCollection | null;
|
|
34
67
|
//# sourceMappingURL=test-metrics.d.ts.map
|
|
@@ -61,4 +61,71 @@ export function parseTestMetrics(reportText) {
|
|
|
61
61
|
}
|
|
62
62
|
return result;
|
|
63
63
|
}
|
|
64
|
+
// pytest: "collected 46 items" / "collected 46 items / 3 deselected" /
|
|
65
|
+
// "46 deselected" / "12 selected" (after a deselect).
|
|
66
|
+
const PYTEST_COLLECTED_RE = /\bcollected\s+(\d+)\s+items?\b/i;
|
|
67
|
+
const PYTEST_DESELECTED_RE = /(\d+)\s+deselected\b/i;
|
|
68
|
+
const PYTEST_SELECTED_RE = /(\d+)\s+selected\b/i;
|
|
69
|
+
// vitest: "no test files found" — an explicit zero-collection signal.
|
|
70
|
+
const VITEST_NO_FILES_RE = /\bno\s+test\s+files?\s+found\b/i;
|
|
71
|
+
// A test FILE path token: any *.py / *.ts / *.js / *.tsx / *.spec.* style path.
|
|
72
|
+
// Matched globally on a line so itemized collection output yields every path.
|
|
73
|
+
const TEST_PATH_RE = /(?:^|[\s"'(])([\w./\\-]*\b(?:tests?|spec|specs)\b[\w./\\-]*\.(?:py|tsx?|jsx?))/gi;
|
|
74
|
+
function normPath(p) {
|
|
75
|
+
return p.replace(/\\/g, '/').toLowerCase().replace(/^\.\//, '');
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Parse a runner's COLLECTION scope from its output. Pure + dependency-free.
|
|
79
|
+
*
|
|
80
|
+
* Returns null ("no collection signal") when no recognized collection line is
|
|
81
|
+
* present — callers MUST treat that as unknown scope (no gate), never as zero.
|
|
82
|
+
* A recognized collection line with count 0 (pytest "collected 0 items",
|
|
83
|
+
* vitest "no test files found") returns `{ collected: 0, paths: [] }` — an
|
|
84
|
+
* affirmative empty-scope signal.
|
|
85
|
+
*/
|
|
86
|
+
export function parseTestCollection(reportText) {
|
|
87
|
+
if (!reportText)
|
|
88
|
+
return null;
|
|
89
|
+
const text = reportText.replace(ANSI_SGR, '');
|
|
90
|
+
let collected = null;
|
|
91
|
+
const paths = new Set();
|
|
92
|
+
let sawSignal = false;
|
|
93
|
+
for (const raw of text.split(/\r?\n/)) {
|
|
94
|
+
const line = raw.trim();
|
|
95
|
+
if (!line)
|
|
96
|
+
continue;
|
|
97
|
+
if (VITEST_NO_FILES_RE.test(line)) {
|
|
98
|
+
collected = 0;
|
|
99
|
+
sawSignal = true;
|
|
100
|
+
}
|
|
101
|
+
const collectedN = count(line, PYTEST_COLLECTED_RE);
|
|
102
|
+
if (collectedN !== null) {
|
|
103
|
+
const deselected = count(line, PYTEST_DESELECTED_RE) ?? 0;
|
|
104
|
+
collected = Math.max(0, collectedN - deselected);
|
|
105
|
+
sawSignal = true;
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
// A standalone "N selected" / "N deselected" line refines a prior count.
|
|
109
|
+
const selected = count(line, PYTEST_SELECTED_RE);
|
|
110
|
+
if (selected !== null) {
|
|
111
|
+
collected = selected;
|
|
112
|
+
sawSignal = true;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Harvest itemized test file paths from any line (collection listing,
|
|
116
|
+
// per-file vitest report, or a pytest rootdir/test path echo).
|
|
117
|
+
TEST_PATH_RE.lastIndex = 0;
|
|
118
|
+
let m;
|
|
119
|
+
while ((m = TEST_PATH_RE.exec(line)) !== null) {
|
|
120
|
+
const p = normPath(m[1]);
|
|
121
|
+
if (p) {
|
|
122
|
+
paths.add(p);
|
|
123
|
+
sawSignal = true;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (!sawSignal)
|
|
128
|
+
return null;
|
|
129
|
+
return { collected, paths: [...paths] };
|
|
130
|
+
}
|
|
64
131
|
//# sourceMappingURL=test-metrics.js.map
|
package/dist/core/learn.js
CHANGED
|
@@ -9,7 +9,7 @@ import { buildLLMSummaryCandidates, } from './learn/llm-summary.js';
|
|
|
9
9
|
import { parseTestMetrics, computePerChangeLoss, measureHealthReport, resolveMetricSource, } from './fitness/index.js';
|
|
10
10
|
import { readProjectConfig } from './project-config.js';
|
|
11
11
|
import { getTrajectoryForChange } from './trajectory/registry.js';
|
|
12
|
-
import { toTrajectoryFacts } from './trajectory/facts.js';
|
|
12
|
+
import { toTrajectoryFacts, extractExpectedTestPaths } from './trajectory/facts.js';
|
|
13
13
|
import { toActionSkeleton, renderActionSkeleton } from './trajectory/skeleton.js';
|
|
14
14
|
import { walkCreditPath } from './learn/credit-path.js';
|
|
15
15
|
const PRIMARY_ARTIFACTS = [
|
|
@@ -74,7 +74,16 @@ export async function generateLearnReport(args = {}) {
|
|
|
74
74
|
const trajectory = args.trajectorySource
|
|
75
75
|
? await args.trajectorySource.getTrajectory(resolved.changeName).catch(() => null)
|
|
76
76
|
: await getTrajectoryForChange(projectRoot, resolved.changeName);
|
|
77
|
-
|
|
77
|
+
// Change-scope guard input: the change's own expected test paths (from its
|
|
78
|
+
// spec-tests.md mapping). Lets toTrajectoryFacts DEMOTE a green-but-irrelevant
|
|
79
|
+
// run (a default `pytest` that collected ZERO of the change's tests) to
|
|
80
|
+
// unverified so the 奖励智能体 REWARD AGENT abstains instead of certifying a
|
|
81
|
+
// false-GREEN. Absent/empty ⇒ no gate (byte-identical baseline).
|
|
82
|
+
const specTestsForScope = artifacts.evidence.find((f) => /(?:^|[\\/])spec-tests\.md$/i.test(f.relativePath));
|
|
83
|
+
const expectedTestPaths = extractExpectedTestPaths(specTestsForScope?.content);
|
|
84
|
+
const trajectoryFacts = toTrajectoryFacts(trajectory, resolved.changeName, {
|
|
85
|
+
expectedTestPaths,
|
|
86
|
+
});
|
|
78
87
|
// "Trust the trajectory": when a real runner was observed, its pass rate wins
|
|
79
88
|
// over the authored test-report; otherwise the report stands but is flagged
|
|
80
89
|
// unverified (observe-only soft penalty — `unverifiedWeight` defaults to 0, so
|
|
@@ -42,6 +42,9 @@ export declare const ProjectConfigSchema: z.ZodObject<{
|
|
|
42
42
|
flag: "flag";
|
|
43
43
|
route: "route";
|
|
44
44
|
}>>;
|
|
45
|
+
deepReadGradient: z.ZodOptional<z.ZodBoolean>;
|
|
46
|
+
deepReadMaxChunks: z.ZodOptional<z.ZodNumber>;
|
|
47
|
+
deepReadMaxChunkChars: z.ZodOptional<z.ZodNumber>;
|
|
45
48
|
}, z.core.$strip>>;
|
|
46
49
|
critic: z.ZodOptional<z.ZodObject<{
|
|
47
50
|
baselineMode: z.ZodOptional<z.ZodEnum<{
|
|
@@ -91,6 +91,12 @@ export const ProjectConfigSchema = z.object({
|
|
|
91
91
|
// confidently prefers the worse-pass-rate arm (the complement to
|
|
92
92
|
// gate-not-blend), never on a legitimate health/verbosity override.
|
|
93
93
|
divergenceCheck: z.enum(['flag', 'route']).optional(),
|
|
94
|
+
// M6 POST-SCORE deep read of the full transcript → enrich the textual
|
|
95
|
+
// GRADIENT only (never the sealed scalar). Off by default; stochastic,
|
|
96
|
+
// so it is confined to the advisory gradient and runs after scoring.
|
|
97
|
+
deepReadGradient: z.boolean().optional(),
|
|
98
|
+
deepReadMaxChunks: z.number().optional(),
|
|
99
|
+
deepReadMaxChunkChars: z.number().optional(),
|
|
94
100
|
})
|
|
95
101
|
.optional(),
|
|
96
102
|
// Loop v2 — CRITIC AGENT(基线智能体 baseline agent)baseline construction.
|
|
@@ -319,7 +325,7 @@ export function readProjectConfig(projectRoot) {
|
|
|
319
325
|
else if (rawSE.reward !== undefined) {
|
|
320
326
|
console.warn(`Invalid 'selfEvolution.reward' in config (samples/noiseFloor numbers, ` +
|
|
321
327
|
`orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block, ` +
|
|
322
|
-
`divergenceCheck flag|route), ignoring`);
|
|
328
|
+
`divergenceCheck flag|route, deepReadGradient boolean), ignoring`);
|
|
323
329
|
}
|
|
324
330
|
// Loop v2 — CRITIC AGENT knobs. Resilient: a bad value is dropped with a
|
|
325
331
|
// warning (the critic default 're-do' then applies). Omitted ⇒ undefined
|
|
@@ -47,7 +47,7 @@ import { readProjectConfig } from '../project-config.js';
|
|
|
47
47
|
import { claudeProjectsDir } from '../learn/trajectory-discovery.js';
|
|
48
48
|
import { claudeSourceFactory } from '../trajectory/adapters/claude.js';
|
|
49
49
|
import { toActionSkeleton } from '../trajectory/skeleton.js';
|
|
50
|
-
import { runHeadlessAgent,
|
|
50
|
+
import { runHeadlessAgent, resolveAgentTimeoutMs, } from './host-harness.js';
|
|
51
51
|
import { currentPolicyVersion, readPolicyLedger, readPolicySnapshotFiles, } from './policy/index.js';
|
|
52
52
|
import { advanceEpisodeStage, writeArmCapture } from './episode-store.js';
|
|
53
53
|
/** Error thrown when the worktree could not be created (git AND copy fallback failed). */
|
|
@@ -212,7 +212,7 @@ const GIT_TIMEOUT_MS = 60_000;
|
|
|
212
212
|
export async function runCriticAgent(opts) {
|
|
213
213
|
const repoRoot = path.resolve(opts.repoRoot);
|
|
214
214
|
const spawnImpl = opts.spawn ?? nodeSpawn;
|
|
215
|
-
const timeoutMs = opts.timeoutMs ??
|
|
215
|
+
const timeoutMs = opts.timeoutMs ?? resolveAgentTimeoutMs(opts.harness);
|
|
216
216
|
const gitTimeoutMs = opts.gitTimeoutMs ?? GIT_TIMEOUT_MS;
|
|
217
217
|
const homeDir = opts.homeDir ?? os.homedir();
|
|
218
218
|
const baselineMode = opts.baselineMode ?? 're-do';
|
|
@@ -271,10 +271,18 @@ export async function runCriticAgent(opts) {
|
|
|
271
271
|
homeDir,
|
|
272
272
|
runStartMs: runStart,
|
|
273
273
|
});
|
|
274
|
+
// Local import keeps the facts derivation in one place (learn uses the same
|
|
275
|
+
// function); imported lazily to avoid a top-level cycle hazard.
|
|
276
|
+
const { toTrajectoryFacts, extractExpectedTestPaths } = await import('../trajectory/facts.js');
|
|
277
|
+
// Change-scope guard input for the baseline arm: the change's expected test
|
|
278
|
+
// paths from its spec-tests.md (the worktree carries the change dir), so a
|
|
279
|
+
// green-but-out-of-scope baseline run is demoted symmetrically with the main
|
|
280
|
+
// arm and advantage stays scope-consistent.
|
|
281
|
+
const expectedTestPaths = extractExpectedTestPaths(await (await import('node:fs/promises'))
|
|
282
|
+
.readFile(path.join(worktreePath, 'synergyspec-selfevolving', 'changes', opts.changeName, 'spec-tests.md'), 'utf8')
|
|
283
|
+
.catch(() => undefined));
|
|
274
284
|
const facts = trajectory
|
|
275
|
-
?
|
|
276
|
-
// same function); imported lazily to avoid a top-level cycle hazard.
|
|
277
|
-
(await import('../trajectory/facts.js')).toTrajectoryFacts(trajectory, opts.changeName)
|
|
285
|
+
? toTrajectoryFacts(trajectory, opts.changeName, { expectedTestPaths })
|
|
278
286
|
: null;
|
|
279
287
|
// Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
|
|
280
288
|
// stdout-parsed summary; null when neither parsed (never fabricated).
|
|
@@ -1,14 +1,24 @@
|
|
|
1
|
-
export declare class
|
|
1
|
+
export declare class EvolvingAgentOutputInvalid extends Error {
|
|
2
2
|
constructor(message: string);
|
|
3
3
|
}
|
|
4
4
|
/** The model declined to edit anything (empty edits). Not an error — a no-op. */
|
|
5
|
-
export declare class
|
|
5
|
+
export declare class EvolvingAgentNoOp extends Error {
|
|
6
6
|
constructor();
|
|
7
7
|
}
|
|
8
8
|
/** The headless agent invocation itself failed (crash / empty output). */
|
|
9
|
-
export declare class
|
|
9
|
+
export declare class EvolvingAgentInvocationError extends Error {
|
|
10
10
|
constructor(stderr: string);
|
|
11
11
|
}
|
|
12
|
+
/**
|
|
13
|
+
* @deprecated v2.0.0 removed the GA "canonical proposer"; these names are
|
|
14
|
+
* retained only as transitional aliases for any external importer. Use the
|
|
15
|
+
* `EvolvingAgent*` classes — they are the same constructors.
|
|
16
|
+
*/
|
|
17
|
+
export declare const CanonicalProposerOutputInvalid: typeof EvolvingAgentOutputInvalid;
|
|
18
|
+
/** @deprecated alias of {@link EvolvingAgentNoOp}. */
|
|
19
|
+
export declare const CanonicalProposerNoOp: typeof EvolvingAgentNoOp;
|
|
20
|
+
/** @deprecated alias of {@link EvolvingAgentInvocationError}. */
|
|
21
|
+
export declare const CanonicalProposerInvocationError: typeof EvolvingAgentInvocationError;
|
|
12
22
|
/**
|
|
13
23
|
* The packaged result of one validated candidate edit set: the human-readable
|
|
14
24
|
* unified diff, the POSIX paths actually edited (a subset of the target's
|
|
@@ -39,8 +49,8 @@ export interface CanonicalProposeOutput {
|
|
|
39
49
|
* the loop-v2 演进智能体 EVOLVING AGENT call this so their safety contract is
|
|
40
50
|
* byte-identical. relPaths are normalized to POSIX separators.
|
|
41
51
|
*
|
|
42
|
-
* Throws {@link
|
|
43
|
-
* {@link
|
|
52
|
+
* Throws {@link EvolvingAgentNoOp} when `rawEdits` is empty and
|
|
53
|
+
* {@link EvolvingAgentOutputInvalid} for any shape / frozen / scope
|
|
44
54
|
* violation. Path traversal and absolute paths are rejected transitively: they
|
|
45
55
|
* can never be a member of `allowedFiles`, so they fail the scope check.
|
|
46
56
|
*/
|