synergyspec-selfevolving 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/commands/learn.js +13 -3
  2. package/dist/commands/self-evolution-episode.d.ts +6 -1
  3. package/dist/commands/self-evolution-episode.js +8 -1
  4. package/dist/commands/self-evolution.d.ts +2 -2
  5. package/dist/commands/self-evolution.js +10 -10
  6. package/dist/commands/workflow/status.js +5 -0
  7. package/dist/core/change-readiness.d.ts +1 -1
  8. package/dist/core/change-readiness.js +66 -11
  9. package/dist/core/fitness/test-metrics.d.ts +33 -0
  10. package/dist/core/fitness/test-metrics.js +67 -0
  11. package/dist/core/learn.js +11 -2
  12. package/dist/core/project-config.d.ts +3 -0
  13. package/dist/core/project-config.js +7 -1
  14. package/dist/core/self-evolution/critic-agent.js +13 -5
  15. package/dist/core/self-evolution/edits-contract.d.ts +15 -5
  16. package/dist/core/self-evolution/edits-contract.js +26 -16
  17. package/dist/core/self-evolution/episode-orchestrator.d.ts +16 -9
  18. package/dist/core/self-evolution/episode-orchestrator.js +126 -35
  19. package/dist/core/self-evolution/episode-store.d.ts +34 -11
  20. package/dist/core/self-evolution/episode-store.js +45 -10
  21. package/dist/core/self-evolution/evolving-agent.d.ts +12 -12
  22. package/dist/core/self-evolution/evolving-agent.js +46 -48
  23. package/dist/core/self-evolution/host-harness.d.ts +68 -2
  24. package/dist/core/self-evolution/host-harness.js +208 -21
  25. package/dist/core/self-evolution/policy/policy-store.d.ts +8 -6
  26. package/dist/core/self-evolution/policy/policy-store.js +124 -24
  27. package/dist/core/self-evolution/proposer-slice.d.ts +4 -3
  28. package/dist/core/self-evolution/reward-agent.d.ts +11 -1
  29. package/dist/core/self-evolution/reward-agent.js +53 -20
  30. package/dist/core/self-evolution/reward-aggregator.d.ts +18 -0
  31. package/dist/core/self-evolution/reward-aggregator.js +53 -3
  32. package/dist/core/self-evolution/reward-deepread.d.ts +64 -0
  33. package/dist/core/self-evolution/reward-deepread.js +112 -0
  34. package/dist/core/templates/workflows/learn.js +3 -2
  35. package/dist/core/templates/workflows/self-evolving.js +5 -2
  36. package/dist/core/trajectory/facts.d.ts +69 -2
  37. package/dist/core/trajectory/facts.js +179 -10
  38. package/dist/core/trajectory/skeleton.d.ts +10 -0
  39. package/dist/core/trajectory/skeleton.js +24 -3
  40. package/package.json +4 -3
  41. package/schemas/spec-driven/templates/design.md +2 -1
@@ -5,7 +5,7 @@ import { readProjectConfig } from '../core/project-config.js';
5
5
  import { assembleTrajectoryContext, } from '../core/learn/trajectory-assembler.js';
6
6
  import { findTranscriptsForChange, resolveChangeDir, validateExplicitTrajectoryHandle, } from '../core/learn/trajectory-discovery.js';
7
7
  import { getTrajectoryForChange } from '../core/trajectory/registry.js';
8
- import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/facts.js';
8
+ import { toTrajectoryFacts, describeRunnerResults, extractExpectedTestPaths } from '../core/trajectory/facts.js';
9
9
  import { toActionSkeleton } from '../core/trajectory/skeleton.js';
10
10
  import { resolveHostHarness, resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
11
11
  import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
@@ -301,13 +301,23 @@ export function registerLearnCommand(program, deps = {}) {
301
301
  process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID = opts.sessionId;
302
302
  try {
303
303
  const adapterTrajectory = await getTrajectoryForChange(projectRoot, change);
304
+ // Change-scope guard input so debug-trajectory's facts + per-runner
305
+ // detail reflect the same scope demotion the loop uses (surfaces a
306
+ // green-but-out-of-scope graded run instead of hiding it).
307
+ const adapterExpectedTestPaths = extractExpectedTestPaths(await (await import('node:fs/promises'))
308
+ .readFile(path.join(projectRoot, 'synergyspec-selfevolving', 'changes', change, 'spec-tests.md'), 'utf8')
309
+ .catch(() => undefined));
304
310
  payload.adapter = {
305
311
  resolvedHarness: resolveHostHarness(),
306
312
  sessionId: adapterTrajectory?.sessionId ?? null,
307
313
  turns: adapterTrajectory?.turns.length ?? 0,
308
314
  sourcePaths: adapterTrajectory ? [...new Set(adapterTrajectory.sourcePaths)] : [],
309
- facts: toTrajectoryFacts(adapterTrajectory, change),
310
- runnerResults: describeRunnerResults(adapterTrajectory),
315
+ facts: toTrajectoryFacts(adapterTrajectory, change, {
316
+ expectedTestPaths: adapterExpectedTestPaths,
317
+ }),
318
+ runnerResults: describeRunnerResults(adapterTrajectory, {
319
+ expectedTestPaths: adapterExpectedTestPaths,
320
+ }),
311
321
  // Bounded play-by-play projection (file edits / test runs /
312
322
  // commands) so a wrong skeleton is visible in one command.
313
323
  steps: toActionSkeleton(adapterTrajectory),
@@ -89,7 +89,12 @@ export interface RunEpisodeCommandResult {
89
89
  exitCode: number;
90
90
  /** Present when the episode ran (not busy / not an error). */
91
91
  result?: RunEpisodeResult;
92
- /** Present when the target's in-flight slot was already held. */
92
+ /**
93
+ * Present when the target's in-flight slot was already held by another
94
+ * episode. The command emits the EXACT machine outcome literal
95
+ * `busy-in-flight` (lowercase, hyphenated, NOT error-prefixed): a TRANSIENT,
96
+ * self-healing concurrency deferral, NEVER an `error-...` stop.
97
+ */
93
98
  busy?: RunEpisodeBusy;
94
99
  error?: string;
95
100
  }
@@ -157,7 +157,7 @@ export async function runEpisodeCommand(args, opts) {
157
157
  // `.synergyspec-selfevolving/host-harness.json`, so even when the
158
158
  // orchestrator's reward/evolving agents later spawn from an env-less Task
159
159
  // subagent they read the seeded harness instead of defaulting to the
160
- // 'claude' binary (the ydata proposer-spawn failure).
160
+ // 'claude' binary (the ydata 演进智能体 EVOLVING AGENT spawn failure).
161
161
  const harness = await resolveHostHarnessForRepo(opts.repoRoot);
162
162
  const episodeOptions = {
163
163
  repoRoot: opts.repoRoot,
@@ -190,6 +190,13 @@ export async function runEpisodeCommand(args, opts) {
190
190
  stdout(JSON.stringify({ exitCode: 0, busy: outcome }, null, 2));
191
191
  }
192
192
  else {
193
+ // Emit the EXACT machine outcome literal so the runner skill COPIES it
194
+ // verbatim into its '## Episode Verdict' block instead of INFERRING an
195
+ // 'error-in-flight' from prose. busy-in-flight is a TRANSIENT, self-healing
196
+ // concurrency deferral (another in-flight episode holds the SAME 策略
197
+ // POLICY target) — it is NOT error-prefixed and must never be classified as
198
+ // an error. The lock self-heals; recommend WAIT-AND-RETRY.
199
+ stdout('Outcome: busy-in-flight');
193
200
  stdout(`Episode not started for ${targetId}: ${outcome.reason}`);
194
201
  }
195
202
  return { exitCode: 0, busy: outcome };
@@ -6,7 +6,7 @@ export declare function registerSelfEvolutionCommand(program: Command): void;
6
6
  * Candidate edits authored by the HOST code agent (the one running the learn
7
7
  * skill, with full repo context) and handed to the CLI via `--from-edits`. The
8
8
  * host GENERATES the new file contents; the CLI re-validates them against the
9
- * target's frozen + scoped files exactly as the headless proposer path does,
9
+ * target's frozen + scoped files exactly as the headless 演进智能体 EVOLVING AGENT path does,
10
10
  * then packages them. This is the preferred path; `--agent` is the no-host
11
11
  * fallback.
12
12
  */
@@ -225,7 +225,7 @@ export interface EvolveFromEditsReport {
225
225
  * HOST-AUTHORED one-button evolve. The single non-interactive
226
226
  * host-authored-edit → gate → observed-verified promote command.
227
227
  *
228
- * Flow (NEVER spawns the proposer):
228
+ * Flow (NEVER spawns an agent):
229
229
  * 1. Read `--from-edits` (path or '-') into a {@link HostEditsInput}.
230
230
  * 2. {@link runProposeCanonical} with single-change aggregation + the host
231
231
  * `editsInput` to PACKAGE the host candidate (proposal-only). Take
@@ -1,7 +1,7 @@
1
1
  import * as fs from 'node:fs';
2
2
  import * as path from 'node:path';
3
3
  import fastGlob from 'fast-glob';
4
- import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
4
+ import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, EvolvingAgentNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
5
5
  import { generateLearnReport } from '../core/learn.js';
6
6
  import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
7
7
  import { validateChangeExists } from './workflow/shared.js';
@@ -330,7 +330,7 @@ export function registerSelfEvolutionCommand(program) {
330
330
  });
331
331
  cmd
332
332
  .command('evolve-from-edits')
333
- .description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns the proposer; --agent is refused.')
333
+ .description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns an agent; --agent is refused.')
334
334
  .requiredOption('--from-learn <hints.json>', 'the change\'s learn hints.json to aggregate (one signal)')
335
335
  .requiredOption('--evolve-target <targetId>', 'the single canonical target id to evolve')
336
336
  .requiredOption('--from-edits <file>', "JSON the host agent wrote ({ targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin)")
@@ -339,7 +339,7 @@ export function registerSelfEvolutionCommand(program) {
339
339
  .option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
340
340
  .option('--transcript <path>', 'Explicit transcript .jsonl to grade (bypasses change-window discovery; Claude transcript store only)')
341
341
  .option('--session-id <id>', 'Explicit Claude session id to grade (bypasses change-window discovery; Claude transcript store only)')
342
- .option('--agent', 'REFUSED: this path is host-authored and never spawns the proposer')
342
+ .option('--agent', 'REFUSED: this path is host-authored and never spawns an agent')
343
343
  .option('--yes', 'required: confirm the non-interactive auto-promote')
344
344
  .option('--json', 'output the full EvolveFromEditsReport JSON')
345
345
  .action(async (options) => {
@@ -409,11 +409,11 @@ export function registerSelfEvolutionCommand(program) {
409
409
  }
410
410
  /**
411
411
  * Validate host-authored candidate edits (the `--from-edits` path) and turn them
412
- * into the same {@link CanonicalProposeOutput} shape the headless proposer
413
- * returns. Reuses {@link validateCandidateEdits} (frozen + target-scope checks)
412
+ * into the same {@link CanonicalProposeOutput} shape the 演进智能体 EVOLVING
413
+ * AGENT returns. Reuses {@link validateCandidateEdits} (frozen + target-scope checks)
414
414
  * and {@link renderUnifiedDiff}, so the host path and the agent path are
415
415
  * byte-identical in what they accept and how they package. Throws
416
- * {@link CanonicalProposerNoOp} when the edits change nothing.
416
+ * {@link EvolvingAgentNoOp} when the edits change nothing.
417
417
  */
418
418
  function packageHostEdits(editsInput, allowedFiles, currentFiles, group, targetId) {
419
419
  if (editsInput.targetId && editsInput.targetId !== targetId) {
@@ -425,7 +425,7 @@ function packageHostEdits(editsInput, allowedFiles, currentFiles, group, targetI
425
425
  // nothing to evolve — surface it as a no-op (placeholder), like the agent path.
426
426
  const changesSomething = validated.some((e) => (oldByPath.get(e.relPath) ?? '') !== e.content);
427
427
  if (!changesSomething) {
428
- throw new CanonicalProposerNoOp();
428
+ throw new EvolvingAgentNoOp();
429
429
  }
430
430
  const diffPatch = validated
431
431
  .map((e) => renderUnifiedDiff(e.relPath, oldByPath.get(e.relPath) ?? '', e.content))
@@ -1007,7 +1007,7 @@ export async function runRejectCommand(args, opts) {
1007
1007
  * HOST-AUTHORED one-button evolve. The single non-interactive
1008
1008
  * host-authored-edit → gate → observed-verified promote command.
1009
1009
  *
1010
- * Flow (NEVER spawns the proposer):
1010
+ * Flow (NEVER spawns an agent):
1011
1011
  * 1. Read `--from-edits` (path or '-') into a {@link HostEditsInput}.
1012
1012
  * 2. {@link runProposeCanonical} with single-change aggregation + the host
1013
1013
  * `editsInput` to PACKAGE the host candidate (proposal-only). Take
@@ -1067,7 +1067,7 @@ export async function runEvolveFromEdits(args, opts) {
1067
1067
  // Non-interactive contract: --yes is required (one-button host-authored
1068
1068
  // confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
1069
1069
  if (args.agent) {
1070
- return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.', false);
1070
+ return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns an agent.', false);
1071
1071
  }
1072
1072
  if (!args.yes) {
1073
1073
  return fail(2, 'error-bad-input', '--yes is required: evolve-from-edits promotes onto your local files non-interactively.', false);
@@ -1375,7 +1375,7 @@ function renderProposalMd(group, expectedBenefit) {
1375
1375
  lines.push(expectedBenefit);
1376
1376
  lines.push('');
1377
1377
  lines.push('## Status');
1378
- lines.push('- diff.patch is intentionally empty. Apply the candidate change manually (or via a future automated proposer) before invoking the static gate.');
1378
+ lines.push('- diff.patch is intentionally empty. Apply the candidate change manually (or via a future automated agent) before invoking the static gate.');
1379
1379
  return lines.join('\n') + '\n';
1380
1380
  }
1381
1381
  function renderRationaleMd(group) {
@@ -67,6 +67,11 @@ export function printStatusText(status, readiness) {
67
67
  else if (evolution.status === 'refused' || evolution.status === 'error' || evolution.status === 'promoted') {
68
68
  console.log(chalk.yellow(`Evolution: ${evolution.status}${evolution.reason ? ` — ${evolution.reason}` : ''}`));
69
69
  }
70
+ else if (evolution.status === 'busy') {
71
+ // A transient concurrency deferral (another in-flight episode holds the
72
+ // 策略 POLICY target). NOT a failure and NOT 'not-run' — self-heals; retry.
73
+ console.log(chalk.yellow(`Evolution: busy${evolution.reason ? ` — ${evolution.reason}` : ''} (another episode is in flight; retry shortly)`));
74
+ }
70
75
  else {
71
76
  // Hyphenated to match the machine enum ('not-run', change-readiness.ts)
72
77
  // and the critic skill's verbatim "status shows `Evolution: not-run`".
@@ -8,7 +8,7 @@ export type TaskReadinessStatus = 'no-tasks' | 'complete' | 'in-progress';
8
8
  * surfaced for visibility only — it does NOT gate `isArchiveReady` (a safe refusal
9
9
  * must not block archiving a finished change).
10
10
  */
11
- export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'error';
11
+ export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'busy' | 'error';
12
12
  export interface ArtifactStatusSummary {
13
13
  done: number;
14
14
  ready: number;
@@ -1,6 +1,7 @@
1
1
  import { promises as fs } from 'fs';
2
2
  import path from 'path';
3
3
  import { formatChangeStatus, loadChangeContext, } from './artifact-graph/index.js';
4
+ import { listEpisodes } from './self-evolution/episode-store.js';
4
5
  const TASK_PATTERN = /^[-*]\s+\[([\sx])\]\s*(.*)$/i;
5
6
  const REQUIRED_EVIDENCE_FILES = [
6
7
  ['specTests', 'spec-tests.md'],
@@ -45,7 +46,7 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
45
46
  const artifactStatus = deriveArtifactWorkflowStatus(artifactGraph);
46
47
  const taskReadiness = await readTaskReadiness(context.changeDir);
47
48
  const evidence = await readEvidenceReadiness(context.changeDir);
48
- const evolution = await readEvolutionOutcome(context.changeDir);
49
+ const evolution = await readEvolutionOutcome(projectRoot, context.changeDir, changeName);
49
50
  const status = deriveChangeReadinessStatus(artifactStatus, taskReadiness.total, taskReadiness.completed);
50
51
  return {
51
52
  changeName,
@@ -141,29 +142,40 @@ async function readEvidenceReadiness(changeDir) {
141
142
  };
142
143
  }
143
144
  /**
144
- * Read the CLI-written evolution outcome for the change, if any. Defensive: any
145
- * missing file / parse error / unknown outcome degrades to `'not-run'` (forward
146
- * compatible and never throws), so `status` can always render an Evolution line.
145
+ * Read the CLI-written evolution outcome for the change, if any. When the manual
146
+ * evolution-result file is absent, fall back to the durable loop-v2 episode store
147
+ * so a failed `learn --apply` / self-evolution episode is not mislabeled
148
+ * `not-run`. Defensive: parse errors / unknown outcomes degrade to `'not-run'`
149
+ * (forward compatible and never throws), so `status` can always render an
150
+ * Evolution line.
147
151
  */
148
- async function readEvolutionOutcome(changeDir) {
152
+ async function readEvolutionOutcome(projectRoot, changeDir, changeName) {
149
153
  const notRun = { status: 'not-run', promoted: false, promotedFiles: [] };
150
154
  let raw;
151
155
  try {
152
156
  raw = await fs.readFile(path.join(changeDir, 'evolution-result.json'), 'utf-8');
153
157
  }
154
158
  catch {
155
- return notRun;
159
+ return (await readLatestEpisodeOutcome(projectRoot, changeDir, changeName)) ?? notRun;
156
160
  }
157
161
  try {
158
162
  const record = JSON.parse(raw);
159
163
  const outcome = typeof record.outcome === 'string' ? record.outcome : '';
164
+ // `busy-in-flight` is a TRANSIENT, self-healing concurrency deferral (another
165
+ // in-flight episode holds the SAME 策略 POLICY target) — NOT error-prefixed
166
+ // and NOT a defect. It is classified as a distinct non-error 'busy' status so
167
+ // a reader never mistakes it for an `error-...` stop. The in-flight lock
168
+ // self-heals (re-acquired once the holder finishes or the stale window
169
+ // elapses), so the recommended posture is wait-and-retry.
160
170
  const status = outcome === 'promoted'
161
171
  ? 'promoted'
162
- : outcome.startsWith('refused-')
163
- ? 'refused'
164
- : outcome.startsWith('error-')
165
- ? 'error'
166
- : 'not-run';
172
+ : outcome === 'busy-in-flight'
173
+ ? 'busy'
174
+ : outcome.startsWith('refused-')
175
+ ? 'refused'
176
+ : outcome.startsWith('error-')
177
+ ? 'error'
178
+ : 'not-run';
167
179
  if (status === 'not-run')
168
180
  return notRun;
169
181
  return {
@@ -181,6 +193,49 @@ async function readEvolutionOutcome(changeDir) {
181
193
  return notRun;
182
194
  }
183
195
  }
196
+ async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
197
+ let episodes;
198
+ try {
199
+ episodes = await listEpisodes(projectRoot);
200
+ }
201
+ catch {
202
+ return null;
203
+ }
204
+ const resolvedChangeDir = path.resolve(changeDir);
205
+ const episode = episodes.find((ep) => ep.changeName === changeName || path.resolve(ep.changeDirPath) === resolvedChangeDir);
206
+ if (!episode)
207
+ return null;
208
+ if (episode.stage === 'errored') {
209
+ return {
210
+ status: 'error',
211
+ reason: episode.terminalError,
212
+ targetId: episode.targetId,
213
+ promoted: false,
214
+ promotedFiles: [],
215
+ timestamp: episode.updatedAt,
216
+ };
217
+ }
218
+ if (episode.stage === 'evolution-refused') {
219
+ return {
220
+ status: 'refused',
221
+ reason: 'evolution refused',
222
+ targetId: episode.targetId,
223
+ promoted: false,
224
+ promotedFiles: [],
225
+ timestamp: episode.updatedAt,
226
+ };
227
+ }
228
+ if (episode.stage === 'evolved') {
229
+ return {
230
+ status: 'promoted',
231
+ targetId: episode.targetId,
232
+ promoted: true,
233
+ promotedFiles: [],
234
+ timestamp: episode.updatedAt,
235
+ };
236
+ }
237
+ return null;
238
+ }
184
239
  async function testReportRequiresPlan(testReportPath) {
185
240
  try {
186
241
  const content = await fs.readFile(testReportPath, 'utf-8');
@@ -31,4 +31,37 @@ export interface TestMetrics {
31
31
  * Returns null when no recognized summary is found.
32
32
  */
33
33
  export declare function parseTestMetrics(reportText: string): TestMetrics | null;
34
+ /**
35
+ * What a runner actually COLLECTED, independent of how many passed. A green
36
+ * SUMMARY line ("46 passed") says nothing about WHICH tests ran — a default
37
+ * `pytest` can pass 46 unrelated tests while a conftest `collect_ignore`
38
+ * excludes the change's own tests entirely. This is the collection-scope
39
+ * signal the change-scope guard reads so a passing-but-irrelevant run cannot be
40
+ * certified as a verified success arm.
41
+ */
42
+ export interface TestCollection {
43
+ /**
44
+ * Number of tests COLLECTED for execution. For pytest this is `collected N`
45
+ * minus `deselected M` (i.e. the SELECTED count); for vitest the number of
46
+ * matched test files. null when no collection line was recognized.
47
+ */
48
+ collected: number | null;
49
+ /**
50
+ * Test FILE paths the runner reported collecting / running, lowercased and
51
+ * forward-slashed, deduped. Empty when none were itemized (a collected COUNT
52
+ * with no path list still populates {@link collected}). Used to intersect
53
+ * against the change's expected test paths.
54
+ */
55
+ paths: string[];
56
+ }
57
+ /**
58
+ * Parse a runner's COLLECTION scope from its output. Pure + dependency-free.
59
+ *
60
+ * Returns null ("no collection signal") when no recognized collection line is
61
+ * present — callers MUST treat that as unknown scope (no gate), never as zero.
62
+ * A recognized collection line with count 0 (pytest "collected 0 items",
63
+ * vitest "no test files found") returns `{ collected: 0, paths: [] }` — an
64
+ * affirmative empty-scope signal.
65
+ */
66
+ export declare function parseTestCollection(reportText: string): TestCollection | null;
34
67
  //# sourceMappingURL=test-metrics.d.ts.map
@@ -61,4 +61,71 @@ export function parseTestMetrics(reportText) {
61
61
  }
62
62
  return result;
63
63
  }
64
+ // pytest: "collected 46 items" / "collected 46 items / 3 deselected" /
65
+ // "46 deselected" / "12 selected" (after a deselect).
66
+ const PYTEST_COLLECTED_RE = /\bcollected\s+(\d+)\s+items?\b/i;
67
+ const PYTEST_DESELECTED_RE = /(\d+)\s+deselected\b/i;
68
+ const PYTEST_SELECTED_RE = /(\d+)\s+selected\b/i;
69
+ // vitest: "no test files found" — an explicit zero-collection signal.
70
+ const VITEST_NO_FILES_RE = /\bno\s+test\s+files?\s+found\b/i;
71
+ // A test FILE path token: any *.py / *.ts / *.js / *.tsx / *.spec.* style path.
72
+ // Matched globally on a line so itemized collection output yields every path.
73
+ const TEST_PATH_RE = /(?:^|[\s"'(])([\w./\\-]*\b(?:tests?|spec|specs)\b[\w./\\-]*\.(?:py|tsx?|jsx?))/gi;
74
+ function normPath(p) {
75
+ return p.replace(/\\/g, '/').toLowerCase().replace(/^\.\//, '');
76
+ }
77
+ /**
78
+ * Parse a runner's COLLECTION scope from its output. Pure + dependency-free.
79
+ *
80
+ * Returns null ("no collection signal") when no recognized collection line is
81
+ * present — callers MUST treat that as unknown scope (no gate), never as zero.
82
+ * A recognized collection line with count 0 (pytest "collected 0 items",
83
+ * vitest "no test files found") returns `{ collected: 0, paths: [] }` — an
84
+ * affirmative empty-scope signal.
85
+ */
86
+ export function parseTestCollection(reportText) {
87
+ if (!reportText)
88
+ return null;
89
+ const text = reportText.replace(ANSI_SGR, '');
90
+ let collected = null;
91
+ const paths = new Set();
92
+ let sawSignal = false;
93
+ for (const raw of text.split(/\r?\n/)) {
94
+ const line = raw.trim();
95
+ if (!line)
96
+ continue;
97
+ if (VITEST_NO_FILES_RE.test(line)) {
98
+ collected = 0;
99
+ sawSignal = true;
100
+ }
101
+ const collectedN = count(line, PYTEST_COLLECTED_RE);
102
+ if (collectedN !== null) {
103
+ const deselected = count(line, PYTEST_DESELECTED_RE) ?? 0;
104
+ collected = Math.max(0, collectedN - deselected);
105
+ sawSignal = true;
106
+ }
107
+ else {
108
+ // A standalone "N selected" / "N deselected" line refines a prior count.
109
+ const selected = count(line, PYTEST_SELECTED_RE);
110
+ if (selected !== null) {
111
+ collected = selected;
112
+ sawSignal = true;
113
+ }
114
+ }
115
+ // Harvest itemized test file paths from any line (collection listing,
116
+ // per-file vitest report, or a pytest rootdir/test path echo).
117
+ TEST_PATH_RE.lastIndex = 0;
118
+ let m;
119
+ while ((m = TEST_PATH_RE.exec(line)) !== null) {
120
+ const p = normPath(m[1]);
121
+ if (p) {
122
+ paths.add(p);
123
+ sawSignal = true;
124
+ }
125
+ }
126
+ }
127
+ if (!sawSignal)
128
+ return null;
129
+ return { collected, paths: [...paths] };
130
+ }
64
131
  //# sourceMappingURL=test-metrics.js.map
@@ -9,7 +9,7 @@ import { buildLLMSummaryCandidates, } from './learn/llm-summary.js';
9
9
  import { parseTestMetrics, computePerChangeLoss, measureHealthReport, resolveMetricSource, } from './fitness/index.js';
10
10
  import { readProjectConfig } from './project-config.js';
11
11
  import { getTrajectoryForChange } from './trajectory/registry.js';
12
- import { toTrajectoryFacts } from './trajectory/facts.js';
12
+ import { toTrajectoryFacts, extractExpectedTestPaths } from './trajectory/facts.js';
13
13
  import { toActionSkeleton, renderActionSkeleton } from './trajectory/skeleton.js';
14
14
  import { walkCreditPath } from './learn/credit-path.js';
15
15
  const PRIMARY_ARTIFACTS = [
@@ -74,7 +74,16 @@ export async function generateLearnReport(args = {}) {
74
74
  const trajectory = args.trajectorySource
75
75
  ? await args.trajectorySource.getTrajectory(resolved.changeName).catch(() => null)
76
76
  : await getTrajectoryForChange(projectRoot, resolved.changeName);
77
- const trajectoryFacts = toTrajectoryFacts(trajectory, resolved.changeName);
77
+ // Change-scope guard input: the change's own expected test paths (from its
78
+ // spec-tests.md mapping). Lets toTrajectoryFacts DEMOTE a green-but-irrelevant
79
+ // run (a default `pytest` that collected ZERO of the change's tests) to
80
+ // unverified so the 奖励智能体 REWARD AGENT abstains instead of certifying a
81
+ // false-GREEN. Absent/empty ⇒ no gate (byte-identical baseline).
82
+ const specTestsForScope = artifacts.evidence.find((f) => /(?:^|[\\/])spec-tests\.md$/i.test(f.relativePath));
83
+ const expectedTestPaths = extractExpectedTestPaths(specTestsForScope?.content);
84
+ const trajectoryFacts = toTrajectoryFacts(trajectory, resolved.changeName, {
85
+ expectedTestPaths,
86
+ });
78
87
  // "Trust the trajectory": when a real runner was observed, its pass rate wins
79
88
  // over the authored test-report; otherwise the report stands but is flagged
80
89
  // unverified (observe-only soft penalty — `unverifiedWeight` defaults to 0, so
@@ -42,6 +42,9 @@ export declare const ProjectConfigSchema: z.ZodObject<{
42
42
  flag: "flag";
43
43
  route: "route";
44
44
  }>>;
45
+ deepReadGradient: z.ZodOptional<z.ZodBoolean>;
46
+ deepReadMaxChunks: z.ZodOptional<z.ZodNumber>;
47
+ deepReadMaxChunkChars: z.ZodOptional<z.ZodNumber>;
45
48
  }, z.core.$strip>>;
46
49
  critic: z.ZodOptional<z.ZodObject<{
47
50
  baselineMode: z.ZodOptional<z.ZodEnum<{
@@ -91,6 +91,12 @@ export const ProjectConfigSchema = z.object({
91
91
  // confidently prefers the worse-pass-rate arm (the complement to
92
92
  // gate-not-blend), never on a legitimate health/verbosity override.
93
93
  divergenceCheck: z.enum(['flag', 'route']).optional(),
94
+ // M6 POST-SCORE deep read of the full transcript → enrich the textual
95
+ // GRADIENT only (never the sealed scalar). Off by default; stochastic,
96
+ // so it is confined to the advisory gradient and runs after scoring.
97
+ deepReadGradient: z.boolean().optional(),
98
+ deepReadMaxChunks: z.number().optional(),
99
+ deepReadMaxChunkChars: z.number().optional(),
94
100
  })
95
101
  .optional(),
96
102
  // Loop v2 — CRITIC AGENT(基线智能体 baseline agent)baseline construction.
@@ -319,7 +325,7 @@ export function readProjectConfig(projectRoot) {
319
325
  else if (rawSE.reward !== undefined) {
320
326
  console.warn(`Invalid 'selfEvolution.reward' in config (samples/noiseFloor numbers, ` +
321
327
  `orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block, ` +
322
- `divergenceCheck flag|route), ignoring`);
328
+ `divergenceCheck flag|route, deepReadGradient boolean), ignoring`);
323
329
  }
324
330
  // Loop v2 — CRITIC AGENT knobs. Resilient: a bad value is dropped with a
325
331
  // warning (the critic default 're-do' then applies). Omitted ⇒ undefined
@@ -47,7 +47,7 @@ import { readProjectConfig } from '../project-config.js';
47
47
  import { claudeProjectsDir } from '../learn/trajectory-discovery.js';
48
48
  import { claudeSourceFactory } from '../trajectory/adapters/claude.js';
49
49
  import { toActionSkeleton } from '../trajectory/skeleton.js';
50
- import { runHeadlessAgent, DEFAULT_AGENT_TIMEOUT_MS } from './host-harness.js';
50
+ import { runHeadlessAgent, resolveAgentTimeoutMs, } from './host-harness.js';
51
51
  import { currentPolicyVersion, readPolicyLedger, readPolicySnapshotFiles, } from './policy/index.js';
52
52
  import { advanceEpisodeStage, writeArmCapture } from './episode-store.js';
53
53
  /** Error thrown when the worktree could not be created (git AND copy fallback failed). */
@@ -212,7 +212,7 @@ const GIT_TIMEOUT_MS = 60_000;
212
212
  export async function runCriticAgent(opts) {
213
213
  const repoRoot = path.resolve(opts.repoRoot);
214
214
  const spawnImpl = opts.spawn ?? nodeSpawn;
215
- const timeoutMs = opts.timeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
215
+ const timeoutMs = opts.timeoutMs ?? resolveAgentTimeoutMs(opts.harness);
216
216
  const gitTimeoutMs = opts.gitTimeoutMs ?? GIT_TIMEOUT_MS;
217
217
  const homeDir = opts.homeDir ?? os.homedir();
218
218
  const baselineMode = opts.baselineMode ?? 're-do';
@@ -271,10 +271,18 @@ export async function runCriticAgent(opts) {
271
271
  homeDir,
272
272
  runStartMs: runStart,
273
273
  });
274
+ // Local import keeps the facts derivation in one place (learn uses the same
275
+ // function); imported lazily to avoid a top-level cycle hazard.
276
+ const { toTrajectoryFacts, extractExpectedTestPaths } = await import('../trajectory/facts.js');
277
+ // Change-scope guard input for the baseline arm: the change's expected test
278
+ // paths from its spec-tests.md (the worktree carries the change dir), so a
279
+ // green-but-out-of-scope baseline run is demoted symmetrically with the main
280
+ // arm and advantage stays scope-consistent.
281
+ const expectedTestPaths = extractExpectedTestPaths(await (await import('node:fs/promises'))
282
+ .readFile(path.join(worktreePath, 'synergyspec-selfevolving', 'changes', opts.changeName, 'spec-tests.md'), 'utf8')
283
+ .catch(() => undefined));
274
284
  const facts = trajectory
275
- ? // Local import keeps the facts derivation in one place (learn uses the
276
- // same function); imported lazily to avoid a top-level cycle hazard.
277
- (await import('../trajectory/facts.js')).toTrajectoryFacts(trajectory, opts.changeName)
285
+ ? toTrajectoryFacts(trajectory, opts.changeName, { expectedTestPaths })
278
286
  : null;
279
287
  // Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
280
288
  // stdout-parsed summary; null when neither parsed (never fabricated).
@@ -1,14 +1,24 @@
1
- export declare class CanonicalProposerOutputInvalid extends Error {
1
+ export declare class EvolvingAgentOutputInvalid extends Error {
2
2
  constructor(message: string);
3
3
  }
4
4
  /** The model declined to edit anything (empty edits). Not an error — a no-op. */
5
- export declare class CanonicalProposerNoOp extends Error {
5
+ export declare class EvolvingAgentNoOp extends Error {
6
6
  constructor();
7
7
  }
8
8
  /** The headless agent invocation itself failed (crash / empty output). */
9
- export declare class CanonicalProposerInvocationError extends Error {
9
+ export declare class EvolvingAgentInvocationError extends Error {
10
10
  constructor(stderr: string);
11
11
  }
12
+ /**
13
+ * @deprecated v2.0.0 removed the GA "canonical proposer"; these names are
14
+ * retained only as transitional aliases for any external importer. Use the
15
+ * `EvolvingAgent*` classes — they are the same constructors.
16
+ */
17
+ export declare const CanonicalProposerOutputInvalid: typeof EvolvingAgentOutputInvalid;
18
+ /** @deprecated alias of {@link EvolvingAgentNoOp}. */
19
+ export declare const CanonicalProposerNoOp: typeof EvolvingAgentNoOp;
20
+ /** @deprecated alias of {@link EvolvingAgentInvocationError}. */
21
+ export declare const CanonicalProposerInvocationError: typeof EvolvingAgentInvocationError;
12
22
  /**
13
23
  * The packaged result of one validated candidate edit set: the human-readable
14
24
  * unified diff, the POSIX paths actually edited (a subset of the target's
@@ -39,8 +49,8 @@ export interface CanonicalProposeOutput {
39
49
  * the loop-v2 演进智能体 EVOLVING AGENT call this so their safety contract is
40
50
  * byte-identical. relPaths are normalized to POSIX separators.
41
51
  *
42
- * Throws {@link CanonicalProposerNoOp} when `rawEdits` is empty and
43
- * {@link CanonicalProposerOutputInvalid} for any shape / frozen / scope
52
+ * Throws {@link EvolvingAgentNoOp} when `rawEdits` is empty and
53
+ * {@link EvolvingAgentOutputInvalid} for any shape / frozen / scope
44
54
  * violation. Path traversal and absolute paths are rejected transitively: they
45
55
  * can never be a member of `allowedFiles`, so they fail the scope check.
46
56
  */