npm - synergyspec-selfevolving - Versions diffs - 2.1.2 → 2.1.4 - Mend

synergyspec-selfevolving 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/dist/commands/learn.js +13 -3
package/dist/commands/self-evolution-episode.d.ts +6 -1
package/dist/commands/self-evolution-episode.js +8 -1
package/dist/commands/self-evolution.d.ts +2 -2
package/dist/commands/self-evolution.js +10 -10
package/dist/commands/workflow/status.js +5 -0
package/dist/core/change-readiness.d.ts +1 -1
package/dist/core/change-readiness.js +66 -11
package/dist/core/fitness/test-metrics.d.ts +33 -0
package/dist/core/fitness/test-metrics.js +67 -0
package/dist/core/learn.js +11 -2
package/dist/core/project-config.d.ts +3 -0
package/dist/core/project-config.js +7 -1
package/dist/core/self-evolution/critic-agent.js +13 -5
package/dist/core/self-evolution/edits-contract.d.ts +15 -5
package/dist/core/self-evolution/edits-contract.js +26 -16
package/dist/core/self-evolution/episode-orchestrator.d.ts +16 -9
package/dist/core/self-evolution/episode-orchestrator.js +126 -35
package/dist/core/self-evolution/episode-store.d.ts +34 -11
package/dist/core/self-evolution/episode-store.js +45 -10
package/dist/core/self-evolution/evolving-agent.d.ts +12 -12
package/dist/core/self-evolution/evolving-agent.js +46 -48
package/dist/core/self-evolution/host-harness.d.ts +68 -2
package/dist/core/self-evolution/host-harness.js +208 -21
package/dist/core/self-evolution/policy/policy-store.d.ts +8 -6
package/dist/core/self-evolution/policy/policy-store.js +124 -24
package/dist/core/self-evolution/proposer-slice.d.ts +4 -3
package/dist/core/self-evolution/reward-agent.d.ts +11 -1
package/dist/core/self-evolution/reward-agent.js +53 -20
package/dist/core/self-evolution/reward-aggregator.d.ts +18 -0
package/dist/core/self-evolution/reward-aggregator.js +53 -3
package/dist/core/self-evolution/reward-deepread.d.ts +64 -0
package/dist/core/self-evolution/reward-deepread.js +112 -0
package/dist/core/templates/workflows/learn.js +3 -2
package/dist/core/templates/workflows/self-evolving.js +5 -2
package/dist/core/trajectory/facts.d.ts +69 -2
package/dist/core/trajectory/facts.js +179 -10
package/dist/core/trajectory/skeleton.d.ts +10 -0
package/dist/core/trajectory/skeleton.js +24 -3
package/package.json +4 -3
package/schemas/spec-driven/templates/design.md +2 -1

package/dist/commands/learn.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { readProjectConfig } from '../core/project-config.js';
 import { assembleTrajectoryContext, } from '../core/learn/trajectory-assembler.js';
 import { findTranscriptsForChange, resolveChangeDir, validateExplicitTrajectoryHandle, } from '../core/learn/trajectory-discovery.js';
 import { getTrajectoryForChange } from '../core/trajectory/registry.js';
-import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/facts.js';
+import { toTrajectoryFacts, describeRunnerResults, extractExpectedTestPaths } from '../core/trajectory/facts.js';
 import { toActionSkeleton } from '../core/trajectory/skeleton.js';
 import { resolveHostHarness, resolveHostHarnessForRepo } from '../core/self-evolution/host-harness.js';
 import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
@@ -301,13 +301,23 @@ export function registerLearnCommand(program, deps = {}) {
                 process.env.SYNERGYSPEC_SELFEVOLVING_SESSION_ID = opts.sessionId;
             try {
                 const adapterTrajectory = await getTrajectoryForChange(projectRoot, change);
+                // Change-scope guard input so debug-trajectory's facts + per-runner
+                // detail reflect the same scope demotion the loop uses (surfaces a
+                // green-but-out-of-scope graded run instead of hiding it).
+                const adapterExpectedTestPaths = extractExpectedTestPaths(await (await import('node:fs/promises'))
+                    .readFile(path.join(projectRoot, 'synergyspec-selfevolving', 'changes', change, 'spec-tests.md'), 'utf8')
+                    .catch(() => undefined));
                 payload.adapter = {
                     resolvedHarness: resolveHostHarness(),
                     sessionId: adapterTrajectory?.sessionId ?? null,
                     turns: adapterTrajectory?.turns.length ?? 0,
                     sourcePaths: adapterTrajectory ? [...new Set(adapterTrajectory.sourcePaths)] : [],
-                    facts: toTrajectoryFacts(adapterTrajectory, change),
-                    runnerResults: describeRunnerResults(adapterTrajectory),
+                    facts: toTrajectoryFacts(adapterTrajectory, change, {
+                        expectedTestPaths: adapterExpectedTestPaths,
+                    }),
+                    runnerResults: describeRunnerResults(adapterTrajectory, {
+                        expectedTestPaths: adapterExpectedTestPaths,
+                    }),
                     // Bounded play-by-play projection (file edits / test runs /
                     // commands) so a wrong skeleton is visible in one command.
                     steps: toActionSkeleton(adapterTrajectory),

package/dist/commands/self-evolution-episode.d.ts CHANGED Viewed

@@ -89,7 +89,12 @@ export interface RunEpisodeCommandResult {
     exitCode: number;
     /** Present when the episode ran (not busy / not an error). */
     result?: RunEpisodeResult;
-    /** Present when the target's in-flight slot was already held. */
+    /**
+     * Present when the target's in-flight slot was already held by another
+     * episode. The command emits the EXACT machine outcome literal
+     * `busy-in-flight` (lowercase, hyphenated, NOT error-prefixed): a TRANSIENT,
+     * self-healing concurrency deferral, NEVER an `error-...` stop.
+     */
     busy?: RunEpisodeBusy;
     error?: string;
 }

package/dist/commands/self-evolution-episode.js CHANGED Viewed

@@ -157,7 +157,7 @@ export async function runEpisodeCommand(args, opts) {
         // `.synergyspec-selfevolving/host-harness.json`, so even when the
         // orchestrator's reward/evolving agents later spawn from an env-less Task
         // subagent they read the seeded harness instead of defaulting to the
-        // 'claude' binary (the ydata proposer-spawn failure).
+        // 'claude' binary (the ydata 演进智能体 EVOLVING AGENT spawn failure).
         const harness = await resolveHostHarnessForRepo(opts.repoRoot);
         const episodeOptions = {
             repoRoot: opts.repoRoot,
@@ -190,6 +190,13 @@ export async function runEpisodeCommand(args, opts) {
             stdout(JSON.stringify({ exitCode: 0, busy: outcome }, null, 2));
         }
         else {
+            // Emit the EXACT machine outcome literal so the runner skill COPIES it
+            // verbatim into its '## Episode Verdict' block instead of INFERRING an
+            // 'error-in-flight' from prose. busy-in-flight is a TRANSIENT, self-healing
+            // concurrency deferral (another in-flight episode holds the SAME 策略
+            // POLICY target) — it is NOT error-prefixed and must never be classified as
+            // an error. The lock self-heals; recommend WAIT-AND-RETRY.
+            stdout('Outcome: busy-in-flight');
             stdout(`Episode not started for ${targetId}: ${outcome.reason}`);
         }
         return { exitCode: 0, busy: outcome };

package/dist/commands/self-evolution.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@ export declare function registerSelfEvolutionCommand(program: Command): void;
  * Candidate edits authored by the HOST code agent (the one running the learn
  * skill, with full repo context) and handed to the CLI via `--from-edits`. The
  * host GENERATES the new file contents; the CLI re-validates them against the
- * target's frozen + scoped files exactly as the headless proposer path does,
+ * target's frozen + scoped files exactly as the headless 演进智能体 EVOLVING AGENT path does,
  * then packages them. This is the preferred path; `--agent` is the no-host
  * fallback.
  */
@@ -225,7 +225,7 @@ export interface EvolveFromEditsReport {
  * HOST-AUTHORED one-button evolve. The single non-interactive
  * host-authored-edit → gate → observed-verified promote command.
  *
- * Flow (NEVER spawns the proposer):
+ * Flow (NEVER spawns an agent):
  *   1. Read `--from-edits` (path or '-') into a {@link HostEditsInput}.
  *   2. {@link runProposeCanonical} with single-change aggregation + the host
  *      `editsInput` to PACKAGE the host candidate (proposal-only). Take

package/dist/commands/self-evolution.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import fastGlob from 'fast-glob';
-import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, CanonicalProposerNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
+import { aggregateLearnEvolutionHints, applyCandidatePromotion, rollbackCandidatePromotion, shouldAutoPromote, isEvidenceComplete, readCandidateFitness, readHealthBaseline, writeHealthBaseline, readCandidatePackage, resolveTargetLocalFiles, CANONICAL_CANDIDATE_SOURCES, CANONICAL_TARGETS, collectArchiveExperiences, EVOLVABLE_PART_DESCRIPTIONS, EVOLVABLE_PARTS, evaluateTaskDecompositionForChange, evaluateToolEvolutionCandidate, generateCandidateId, generatePromotionReport, readPromotedBaselineLoss, checkLossRegression, recordVerdictBestEffort, updateCandidateStatus, isEvolutionPartEnabled, findSimilarArchiveExperiences, listCanonicalTargets, lookupCanonicalTarget, validateCandidateEdits, renderUnifiedDiff, EvolvingAgentNoOp, resolveTargetEvolutionPolicy, resolveKindOnlyPinTarget, detectUnbindableHintObservations, isCanonicalTargetEvolvable, parseEvolutionSwitchOptions, renderAlignmentReport, renderArchiveExperienceBlock, renderStaticGateSummary, renderToolEvolutionGuardReport, renderEvolutionSwitches, requireCanonicalTarget, resolveCandidateRepo, runStaticCandidateGate, shouldTriggerCandidate, validateLearnEvolutionHint, writeCandidatePackage, verifySpecCodeAlignmentForChange, } from '../core/self-evolution/index.js';
 import { generateLearnReport } from '../core/learn.js';
 import { validateExplicitTrajectoryHandle } from '../core/learn/trajectory-discovery.js';
 import { validateChangeExists } from './workflow/shared.js';
@@ -330,7 +330,7 @@ export function registerSelfEvolutionCommand(program) {
     });
     cmd
         .command('evolve-from-edits')
-        .description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns the proposer; --agent is refused.')
+        .description('HOST-AUTHORED one-button evolve: package edits the host code agent already wrote (--from-edits) for ONE learn signal, run the static gate, and auto-promote ONLY when the change\'s learn report carries an OBSERVED-VERIFIED green signal (a real test run was seen) onto the canonical LOCAL file. Never spawns an agent; --agent is refused.')
         .requiredOption('--from-learn <hints.json>', 'the change\'s learn hints.json to aggregate (one signal)')
         .requiredOption('--evolve-target <targetId>', 'the single canonical target id to evolve')
         .requiredOption('--from-edits <file>', "JSON the host agent wrote ({ targetId?, rationale?, edits: [{relPath, content}] }; '-' reads stdin)")
@@ -339,7 +339,7 @@ export function registerSelfEvolutionCommand(program) {
         .option('--require-proven', 'only promote on a MEASURED fitness improvement (refuse unproven candidates)')
         .option('--transcript <path>', 'Explicit transcript .jsonl to grade (bypasses change-window discovery; Claude transcript store only)')
         .option('--session-id <id>', 'Explicit Claude session id to grade (bypasses change-window discovery; Claude transcript store only)')
-        .option('--agent', 'REFUSED: this path is host-authored and never spawns the proposer')
+        .option('--agent', 'REFUSED: this path is host-authored and never spawns an agent')
         .option('--yes', 'required: confirm the non-interactive auto-promote')
         .option('--json', 'output the full EvolveFromEditsReport JSON')
         .action(async (options) => {
@@ -409,11 +409,11 @@ export function registerSelfEvolutionCommand(program) {
 }
 /**
  * Validate host-authored candidate edits (the `--from-edits` path) and turn them
- * into the same {@link CanonicalProposeOutput} shape the headless proposer
- * returns. Reuses {@link validateCandidateEdits} (frozen + target-scope checks)
+ * into the same {@link CanonicalProposeOutput} shape the 演进智能体 EVOLVING
+ * AGENT returns. Reuses {@link validateCandidateEdits} (frozen + target-scope checks)
  * and {@link renderUnifiedDiff}, so the host path and the agent path are
  * byte-identical in what they accept and how they package. Throws
- * {@link CanonicalProposerNoOp} when the edits change nothing.
+ * {@link EvolvingAgentNoOp} when the edits change nothing.
  */
 function packageHostEdits(editsInput, allowedFiles, currentFiles, group, targetId) {
     if (editsInput.targetId && editsInput.targetId !== targetId) {
@@ -425,7 +425,7 @@ function packageHostEdits(editsInput, allowedFiles, currentFiles, group, targetI
     // nothing to evolve — surface it as a no-op (placeholder), like the agent path.
     const changesSomething = validated.some((e) => (oldByPath.get(e.relPath) ?? '') !== e.content);
     if (!changesSomething) {
-        throw new CanonicalProposerNoOp();
+        throw new EvolvingAgentNoOp();
     }
     const diffPatch = validated
         .map((e) => renderUnifiedDiff(e.relPath, oldByPath.get(e.relPath) ?? '', e.content))
@@ -1007,7 +1007,7 @@ export async function runRejectCommand(args, opts) {
  * HOST-AUTHORED one-button evolve. The single non-interactive
  * host-authored-edit → gate → observed-verified promote command.
  *
- * Flow (NEVER spawns the proposer):
+ * Flow (NEVER spawns an agent):
  *   1. Read `--from-edits` (path or '-') into a {@link HostEditsInput}.
  *   2. {@link runProposeCanonical} with single-change aggregation + the host
  *      `editsInput` to PACKAGE the host candidate (proposal-only). Take
@@ -1067,7 +1067,7 @@ export async function runEvolveFromEdits(args, opts) {
     // Non-interactive contract: --yes is required (one-button host-authored
     // confirmation), and --agent is REFUSED (this path is host-authored, never spawns).
     if (args.agent) {
-        return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns the proposer.', false);
+        return fail(2, 'error-bad-input', '--agent is not allowed: evolve-from-edits is host-authored and never spawns an agent.', false);
     }
     if (!args.yes) {
         return fail(2, 'error-bad-input', '--yes is required: evolve-from-edits promotes onto your local files non-interactively.', false);
@@ -1375,7 +1375,7 @@ function renderProposalMd(group, expectedBenefit) {
     lines.push(expectedBenefit);
     lines.push('');
     lines.push('## Status');
-    lines.push('- diff.patch is intentionally empty. Apply the candidate change manually (or via a future automated proposer) before invoking the static gate.');
+    lines.push('- diff.patch is intentionally empty. Apply the candidate change manually (or via a future automated agent) before invoking the static gate.');
     return lines.join('\n') + '\n';
 }
 function renderRationaleMd(group) {

package/dist/commands/workflow/status.js CHANGED Viewed

@@ -67,6 +67,11 @@ export function printStatusText(status, readiness) {
         else if (evolution.status === 'refused' || evolution.status === 'error' || evolution.status === 'promoted') {
             console.log(chalk.yellow(`Evolution: ${evolution.status}${evolution.reason ? ` — ${evolution.reason}` : ''}`));
         }
+        else if (evolution.status === 'busy') {
+            // A transient concurrency deferral (another in-flight episode holds the
+            // 策略 POLICY target). NOT a failure and NOT 'not-run' — self-heals; retry.
+            console.log(chalk.yellow(`Evolution: busy${evolution.reason ? ` — ${evolution.reason}` : ''} (another episode is in flight; retry shortly)`));
+        }
         else {
             // Hyphenated to match the machine enum ('not-run', change-readiness.ts)
             // and the critic skill's verbatim "status shows `Evolution: not-run`".

package/dist/core/change-readiness.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@ export type TaskReadinessStatus = 'no-tasks' | 'complete' | 'in-progress';
  * surfaced for visibility only — it does NOT gate `isArchiveReady` (a safe refusal
  * must not block archiving a finished change).
  */
-export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'error';
+export type EvolutionOutcomeStatus = 'not-run' | 'promoted' | 'refused' | 'busy' | 'error';
 export interface ArtifactStatusSummary {
     done: number;
     ready: number;

package/dist/core/change-readiness.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { promises as fs } from 'fs';
 import path from 'path';
 import { formatChangeStatus, loadChangeContext, } from './artifact-graph/index.js';
+import { listEpisodes } from './self-evolution/episode-store.js';
 const TASK_PATTERN = /^[-*]\s+\[([\sx])\]\s*(.*)$/i;
 const REQUIRED_EVIDENCE_FILES = [
     ['specTests', 'spec-tests.md'],
@@ -45,7 +46,7 @@ export async function getChangeReadiness(projectRoot, changeName, schemaName) {
     const artifactStatus = deriveArtifactWorkflowStatus(artifactGraph);
     const taskReadiness = await readTaskReadiness(context.changeDir);
     const evidence = await readEvidenceReadiness(context.changeDir);
-    const evolution = await readEvolutionOutcome(context.changeDir);
+    const evolution = await readEvolutionOutcome(projectRoot, context.changeDir, changeName);
     const status = deriveChangeReadinessStatus(artifactStatus, taskReadiness.total, taskReadiness.completed);
     return {
         changeName,
@@ -141,29 +142,40 @@ async function readEvidenceReadiness(changeDir) {
     };
 }
 /**
- * Read the CLI-written evolution outcome for the change, if any. Defensive: any
- * missing file / parse error / unknown outcome degrades to `'not-run'` (forward
- * compatible and never throws), so `status` can always render an Evolution line.
+ * Read the CLI-written evolution outcome for the change, if any. When the manual
+ * evolution-result file is absent, fall back to the durable loop-v2 episode store
+ * so a failed `learn --apply` / self-evolution episode is not mislabeled
+ * `not-run`. Defensive: parse errors / unknown outcomes degrade to `'not-run'`
+ * (forward compatible and never throws), so `status` can always render an
+ * Evolution line.
  */
-async function readEvolutionOutcome(changeDir) {
+async function readEvolutionOutcome(projectRoot, changeDir, changeName) {
     const notRun = { status: 'not-run', promoted: false, promotedFiles: [] };
     let raw;
     try {
         raw = await fs.readFile(path.join(changeDir, 'evolution-result.json'), 'utf-8');
     }
     catch {
-        return notRun;
+        return (await readLatestEpisodeOutcome(projectRoot, changeDir, changeName)) ?? notRun;
     }
     try {
         const record = JSON.parse(raw);
         const outcome = typeof record.outcome === 'string' ? record.outcome : '';
+        // `busy-in-flight` is a TRANSIENT, self-healing concurrency deferral (another
+        // in-flight episode holds the SAME 策略 POLICY target) — NOT error-prefixed
+        // and NOT a defect. It is classified as a distinct non-error 'busy' status so
+        // a reader never mistakes it for an `error-...` stop. The in-flight lock
+        // self-heals (re-acquired once the holder finishes or the stale window
+        // elapses), so the recommended posture is wait-and-retry.
         const status = outcome === 'promoted'
             ? 'promoted'
-            : outcome.startsWith('refused-')
-                ? 'refused'
-                : outcome.startsWith('error-')
-                    ? 'error'
-                    : 'not-run';
+            : outcome === 'busy-in-flight'
+                ? 'busy'
+                : outcome.startsWith('refused-')
+                    ? 'refused'
+                    : outcome.startsWith('error-')
+                        ? 'error'
+                        : 'not-run';
         if (status === 'not-run')
             return notRun;
         return {
@@ -181,6 +193,49 @@ async function readEvolutionOutcome(changeDir) {
         return notRun;
     }
 }
+async function readLatestEpisodeOutcome(projectRoot, changeDir, changeName) {
+    let episodes;
+    try {
+        episodes = await listEpisodes(projectRoot);
+    }
+    catch {
+        return null;
+    }
+    const resolvedChangeDir = path.resolve(changeDir);
+    const episode = episodes.find((ep) => ep.changeName === changeName || path.resolve(ep.changeDirPath) === resolvedChangeDir);
+    if (!episode)
+        return null;
+    if (episode.stage === 'errored') {
+        return {
+            status: 'error',
+            reason: episode.terminalError,
+            targetId: episode.targetId,
+            promoted: false,
+            promotedFiles: [],
+            timestamp: episode.updatedAt,
+        };
+    }
+    if (episode.stage === 'evolution-refused') {
+        return {
+            status: 'refused',
+            reason: 'evolution refused',
+            targetId: episode.targetId,
+            promoted: false,
+            promotedFiles: [],
+            timestamp: episode.updatedAt,
+        };
+    }
+    if (episode.stage === 'evolved') {
+        return {
+            status: 'promoted',
+            targetId: episode.targetId,
+            promoted: true,
+            promotedFiles: [],
+            timestamp: episode.updatedAt,
+        };
+    }
+    return null;
+}
 async function testReportRequiresPlan(testReportPath) {
     try {
         const content = await fs.readFile(testReportPath, 'utf-8');

package/dist/core/fitness/test-metrics.d.ts CHANGED Viewed

@@ -31,4 +31,37 @@ export interface TestMetrics {
  * Returns null when no recognized summary is found.
  */
 export declare function parseTestMetrics(reportText: string): TestMetrics | null;
+/**
+ * What a runner actually COLLECTED, independent of how many passed. A green
+ * SUMMARY line ("46 passed") says nothing about WHICH tests ran — a default
+ * `pytest` can pass 46 unrelated tests while a conftest `collect_ignore`
+ * excludes the change's own tests entirely. This is the collection-scope
+ * signal the change-scope guard reads so a passing-but-irrelevant run cannot be
+ * certified as a verified success arm.
+ */
+export interface TestCollection {
+    /**
+     * Number of tests COLLECTED for execution. For pytest this is `collected N`
+     * minus `deselected M` (i.e. the SELECTED count); for vitest the number of
+     * matched test files. null when no collection line was recognized.
+     */
+    collected: number | null;
+    /**
+     * Test FILE paths the runner reported collecting / running, lowercased and
+     * forward-slashed, deduped. Empty when none were itemized (a collected COUNT
+     * with no path list still populates {@link collected}). Used to intersect
+     * against the change's expected test paths.
+     */
+    paths: string[];
+}
+/**
+ * Parse a runner's COLLECTION scope from its output. Pure + dependency-free.
+ *
+ * Returns null ("no collection signal") when no recognized collection line is
+ * present — callers MUST treat that as unknown scope (no gate), never as zero.
+ * A recognized collection line with count 0 (pytest "collected 0 items",
+ * vitest "no test files found") returns `{ collected: 0, paths: [] }` — an
+ * affirmative empty-scope signal.
+ */
+export declare function parseTestCollection(reportText: string): TestCollection | null;
 //# sourceMappingURL=test-metrics.d.ts.map

package/dist/core/fitness/test-metrics.js CHANGED Viewed

@@ -61,4 +61,71 @@ export function parseTestMetrics(reportText) {
     }
     return result;
 }
+// pytest: "collected 46 items" / "collected 46 items / 3 deselected" /
+//         "46 deselected" / "12 selected" (after a deselect).
+const PYTEST_COLLECTED_RE = /\bcollected\s+(\d+)\s+items?\b/i;
+const PYTEST_DESELECTED_RE = /(\d+)\s+deselected\b/i;
+const PYTEST_SELECTED_RE = /(\d+)\s+selected\b/i;
+// vitest: "no test files found" — an explicit zero-collection signal.
+const VITEST_NO_FILES_RE = /\bno\s+test\s+files?\s+found\b/i;
+// A test FILE path token: any *.py / *.ts / *.js / *.tsx / *.spec.* style path.
+// Matched globally on a line so itemized collection output yields every path.
+const TEST_PATH_RE = /(?:^|[\s"'(])([\w./\\-]*\b(?:tests?|spec|specs)\b[\w./\\-]*\.(?:py|tsx?|jsx?))/gi;
+function normPath(p) {
+    return p.replace(/\\/g, '/').toLowerCase().replace(/^\.\//, '');
+}
+/**
+ * Parse a runner's COLLECTION scope from its output. Pure + dependency-free.
+ *
+ * Returns null ("no collection signal") when no recognized collection line is
+ * present — callers MUST treat that as unknown scope (no gate), never as zero.
+ * A recognized collection line with count 0 (pytest "collected 0 items",
+ * vitest "no test files found") returns `{ collected: 0, paths: [] }` — an
+ * affirmative empty-scope signal.
+ */
+export function parseTestCollection(reportText) {
+    if (!reportText)
+        return null;
+    const text = reportText.replace(ANSI_SGR, '');
+    let collected = null;
+    const paths = new Set();
+    let sawSignal = false;
+    for (const raw of text.split(/\r?\n/)) {
+        const line = raw.trim();
+        if (!line)
+            continue;
+        if (VITEST_NO_FILES_RE.test(line)) {
+            collected = 0;
+            sawSignal = true;
+        }
+        const collectedN = count(line, PYTEST_COLLECTED_RE);
+        if (collectedN !== null) {
+            const deselected = count(line, PYTEST_DESELECTED_RE) ?? 0;
+            collected = Math.max(0, collectedN - deselected);
+            sawSignal = true;
+        }
+        else {
+            // A standalone "N selected" / "N deselected" line refines a prior count.
+            const selected = count(line, PYTEST_SELECTED_RE);
+            if (selected !== null) {
+                collected = selected;
+                sawSignal = true;
+            }
+        }
+        // Harvest itemized test file paths from any line (collection listing,
+        // per-file vitest report, or a pytest rootdir/test path echo).
+        TEST_PATH_RE.lastIndex = 0;
+        let m;
+        while ((m = TEST_PATH_RE.exec(line)) !== null) {
+            const p = normPath(m[1]);
+            if (p) {
+                paths.add(p);
+                sawSignal = true;
+            }
+        }
+    }
+    if (!sawSignal)
+        return null;
+    return { collected, paths: [...paths] };
+}
 //# sourceMappingURL=test-metrics.js.map

package/dist/core/learn.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { buildLLMSummaryCandidates, } from './learn/llm-summary.js';
 import { parseTestMetrics, computePerChangeLoss, measureHealthReport, resolveMetricSource, } from './fitness/index.js';
 import { readProjectConfig } from './project-config.js';
 import { getTrajectoryForChange } from './trajectory/registry.js';
-import { toTrajectoryFacts } from './trajectory/facts.js';
+import { toTrajectoryFacts, extractExpectedTestPaths } from './trajectory/facts.js';
 import { toActionSkeleton, renderActionSkeleton } from './trajectory/skeleton.js';
 import { walkCreditPath } from './learn/credit-path.js';
 const PRIMARY_ARTIFACTS = [
@@ -74,7 +74,16 @@ export async function generateLearnReport(args = {}) {
     const trajectory = args.trajectorySource
         ? await args.trajectorySource.getTrajectory(resolved.changeName).catch(() => null)
         : await getTrajectoryForChange(projectRoot, resolved.changeName);
-    const trajectoryFacts = toTrajectoryFacts(trajectory, resolved.changeName);
+    // Change-scope guard input: the change's own expected test paths (from its
+    // spec-tests.md mapping). Lets toTrajectoryFacts DEMOTE a green-but-irrelevant
+    // run (a default `pytest` that collected ZERO of the change's tests) to
+    // unverified so the 奖励智能体 REWARD AGENT abstains instead of certifying a
+    // false-GREEN. Absent/empty ⇒ no gate (byte-identical baseline).
+    const specTestsForScope = artifacts.evidence.find((f) => /(?:^|[\\/])spec-tests\.md$/i.test(f.relativePath));
+    const expectedTestPaths = extractExpectedTestPaths(specTestsForScope?.content);
+    const trajectoryFacts = toTrajectoryFacts(trajectory, resolved.changeName, {
+        expectedTestPaths,
+    });
     // "Trust the trajectory": when a real runner was observed, its pass rate wins
     // over the authored test-report; otherwise the report stands but is flagged
     // unverified (observe-only soft penalty — `unverifiedWeight` defaults to 0, so

package/dist/core/project-config.d.ts CHANGED Viewed

@@ -42,6 +42,9 @@ export declare const ProjectConfigSchema: z.ZodObject<{
                 flag: "flag";
                 route: "route";
             }>>;
+            deepReadGradient: z.ZodOptional<z.ZodBoolean>;
+            deepReadMaxChunks: z.ZodOptional<z.ZodNumber>;
+            deepReadMaxChunkChars: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strip>>;
         critic: z.ZodOptional<z.ZodObject<{
             baselineMode: z.ZodOptional<z.ZodEnum<{

package/dist/core/project-config.js CHANGED Viewed

@@ -91,6 +91,12 @@ export const ProjectConfigSchema = z.object({
             //   confidently prefers the worse-pass-rate arm (the complement to
             //   gate-not-blend), never on a legitimate health/verbosity override.
             divergenceCheck: z.enum(['flag', 'route']).optional(),
+            // M6 POST-SCORE deep read of the full transcript → enrich the textual
+            //   GRADIENT only (never the sealed scalar). Off by default; stochastic,
+            //   so it is confined to the advisory gradient and runs after scoring.
+            deepReadGradient: z.boolean().optional(),
+            deepReadMaxChunks: z.number().optional(),
+            deepReadMaxChunkChars: z.number().optional(),
         })
             .optional(),
         // Loop v2 — CRITIC AGENT（基线智能体 baseline agent）baseline construction.
@@ -319,7 +325,7 @@ export function readProjectConfig(projectRoot) {
                 else if (rawSE.reward !== undefined) {
                     console.warn(`Invalid 'selfEvolution.reward' in config (samples/noiseFloor numbers, ` +
                         `orderSwap/requireCorrectnessGate booleans, tamperCheck off|flag|block, ` +
-                        `divergenceCheck flag|route), ignoring`);
+                        `divergenceCheck flag|route, deepReadGradient boolean), ignoring`);
                 }
                 // Loop v2 — CRITIC AGENT knobs. Resilient: a bad value is dropped with a
                 // warning (the critic default 're-do' then applies). Omitted ⇒ undefined

package/dist/core/self-evolution/critic-agent.js CHANGED Viewed

@@ -47,7 +47,7 @@ import { readProjectConfig } from '../project-config.js';
 import { claudeProjectsDir } from '../learn/trajectory-discovery.js';
 import { claudeSourceFactory } from '../trajectory/adapters/claude.js';
 import { toActionSkeleton } from '../trajectory/skeleton.js';
-import { runHeadlessAgent, DEFAULT_AGENT_TIMEOUT_MS } from './host-harness.js';
+import { runHeadlessAgent, resolveAgentTimeoutMs, } from './host-harness.js';
 import { currentPolicyVersion, readPolicyLedger, readPolicySnapshotFiles, } from './policy/index.js';
 import { advanceEpisodeStage, writeArmCapture } from './episode-store.js';
 /** Error thrown when the worktree could not be created (git AND copy fallback failed). */
@@ -212,7 +212,7 @@ const GIT_TIMEOUT_MS = 60_000;
 export async function runCriticAgent(opts) {
     const repoRoot = path.resolve(opts.repoRoot);
     const spawnImpl = opts.spawn ?? nodeSpawn;
-    const timeoutMs = opts.timeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
+    const timeoutMs = opts.timeoutMs ?? resolveAgentTimeoutMs(opts.harness);
     const gitTimeoutMs = opts.gitTimeoutMs ?? GIT_TIMEOUT_MS;
     const homeDir = opts.homeDir ?? os.homedir();
     const baselineMode = opts.baselineMode ?? 're-do';
@@ -271,10 +271,18 @@ export async function runCriticAgent(opts) {
             homeDir,
             runStartMs: runStart,
         });
+        // Local import keeps the facts derivation in one place (learn uses the same
+        // function); imported lazily to avoid a top-level cycle hazard.
+        const { toTrajectoryFacts, extractExpectedTestPaths } = await import('../trajectory/facts.js');
+        // Change-scope guard input for the baseline arm: the change's expected test
+        // paths from its spec-tests.md (the worktree carries the change dir), so a
+        // green-but-out-of-scope baseline run is demoted symmetrically with the main
+        // arm and advantage stays scope-consistent.
+        const expectedTestPaths = extractExpectedTestPaths(await (await import('node:fs/promises'))
+            .readFile(path.join(worktreePath, 'synergyspec-selfevolving', 'changes', opts.changeName, 'spec-tests.md'), 'utf8')
+            .catch(() => undefined));
         const facts = trajectory
-            ? // Local import keeps the facts derivation in one place (learn uses the
-                // same function); imported lazily to avoid a top-level cycle hazard.
-                (await import('../trajectory/facts.js')).toTrajectoryFacts(trajectory, opts.changeName)
+            ? toTrajectoryFacts(trajectory, opts.changeName, { expectedTestPaths })
             : null;
         // Honesty: prefer the OBSERVED pass rate (a real runner ran), else the
         // stdout-parsed summary; null when neither parsed (never fabricated).

package/dist/core/self-evolution/edits-contract.d.ts CHANGED Viewed

@@ -1,14 +1,24 @@
-export declare class CanonicalProposerOutputInvalid extends Error {
+export declare class EvolvingAgentOutputInvalid extends Error {
     constructor(message: string);
 }
 /** The model declined to edit anything (empty edits). Not an error — a no-op. */
-export declare class CanonicalProposerNoOp extends Error {
+export declare class EvolvingAgentNoOp extends Error {
     constructor();
 }
 /** The headless agent invocation itself failed (crash / empty output). */
-export declare class CanonicalProposerInvocationError extends Error {
+export declare class EvolvingAgentInvocationError extends Error {
     constructor(stderr: string);
 }
+/**
+ * @deprecated v2.0.0 removed the GA "canonical proposer"; these names are
+ * retained only as transitional aliases for any external importer. Use the
+ * `EvolvingAgent*` classes — they are the same constructors.
+ */
+export declare const CanonicalProposerOutputInvalid: typeof EvolvingAgentOutputInvalid;
+/** @deprecated alias of {@link EvolvingAgentNoOp}. */
+export declare const CanonicalProposerNoOp: typeof EvolvingAgentNoOp;
+/** @deprecated alias of {@link EvolvingAgentInvocationError}. */
+export declare const CanonicalProposerInvocationError: typeof EvolvingAgentInvocationError;
 /**
  * The packaged result of one validated candidate edit set: the human-readable
  * unified diff, the POSIX paths actually edited (a subset of the target's
@@ -39,8 +49,8 @@ export interface CanonicalProposeOutput {
  * the loop-v2 演进智能体 EVOLVING AGENT call this so their safety contract is
  * byte-identical. relPaths are normalized to POSIX separators.
  *
- * Throws {@link CanonicalProposerNoOp} when `rawEdits` is empty and
- * {@link CanonicalProposerOutputInvalid} for any shape / frozen / scope
+ * Throws {@link EvolvingAgentNoOp} when `rawEdits` is empty and
+ * {@link EvolvingAgentOutputInvalid} for any shape / frozen / scope
  * violation. Path traversal and absolute paths are rejected transitively: they
  * can never be a member of `allowedFiles`, so they fail the scope check.
  */