npm - synergyspec-selfevolving - Versions diffs - 1.4.0 → 2.1.0 - Mend

synergyspec-selfevolving 1.4.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/README.md +31 -18
package/dist/commands/learn.d.ts +12 -1
package/dist/commands/learn.js +158 -11
package/dist/commands/self-evolution-episode.d.ts +177 -0
package/dist/commands/self-evolution-episode.js +431 -0
package/dist/commands/self-evolution.d.ts +12 -190
package/dist/commands/self-evolution.js +114 -866
package/dist/core/archive.d.ts +0 -1
package/dist/core/archive.js +0 -58
package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
package/dist/core/artifact-graph/instruction-loader.js +3 -31
package/dist/core/fitness/loss.d.ts +5 -5
package/dist/core/fitness/loss.js +4 -4
package/dist/core/fitness/test-failures.js +10 -2
package/dist/core/project-config.d.ts +19 -0
package/dist/core/project-config.js +96 -0
package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
package/dist/core/self-evolution/candidate-fitness.js +31 -5
package/dist/core/self-evolution/candidates.d.ts +0 -9
package/dist/core/self-evolution/critic-agent.d.ts +192 -0
package/dist/core/self-evolution/critic-agent.js +568 -0
package/dist/core/self-evolution/edits-contract.d.ts +53 -0
package/dist/core/self-evolution/edits-contract.js +89 -0
package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
package/dist/core/self-evolution/episode-orchestrator.js +681 -0
package/dist/core/self-evolution/episode-store.d.ts +266 -0
package/dist/core/self-evolution/episode-store.js +573 -0
package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
package/dist/core/self-evolution/evolution-switches.js +5 -10
package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
package/dist/core/self-evolution/evolving-agent.js +535 -0
package/dist/core/self-evolution/host-harness.d.ts +14 -15
package/dist/core/self-evolution/host-harness.js +48 -23
package/dist/core/self-evolution/index.d.ts +11 -6
package/dist/core/self-evolution/index.js +20 -6
package/dist/core/self-evolution/line-diff.d.ts +60 -0
package/dist/core/self-evolution/line-diff.js +130 -0
package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
package/dist/core/self-evolution/policy/fs-safe.js +89 -0
package/dist/core/self-evolution/policy/index.d.ts +13 -0
package/dist/core/self-evolution/policy/index.js +13 -0
package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
package/dist/core/self-evolution/policy/policy-store.js +774 -0
package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
package/dist/core/self-evolution/promote.d.ts +1 -1
package/dist/core/self-evolution/promote.js +6 -33
package/dist/core/self-evolution/promotion.js +1 -2
package/dist/core/self-evolution/reward-agent.d.ts +379 -0
package/dist/core/self-evolution/reward-agent.js +940 -0
package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
package/dist/core/self-evolution/reward-aggregator.js +262 -0
package/dist/core/self-evolution/scope-gate.d.ts +66 -0
package/dist/core/self-evolution/scope-gate.js +107 -0
package/dist/core/self-evolution/success-channel.js +2 -2
package/dist/core/self-evolution/tamper-check.d.ts +24 -0
package/dist/core/self-evolution/tamper-check.js +236 -0
package/dist/core/self-evolution/tool-evolution.js +2 -13
package/dist/core/self-evolution/verdict.d.ts +8 -5
package/dist/core/self-evolution/verdict.js +4 -7
package/dist/core/templates/workflows/gen-tests.js +1 -1
package/dist/core/templates/workflows/learn.d.ts +3 -2
package/dist/core/templates/workflows/learn.js +21 -18
package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
package/dist/core/templates/workflows/self-evolving.js +62 -172
package/dist/core/trajectory/scrub.d.ts +27 -0
package/dist/core/trajectory/scrub.js +79 -0
package/dist/core/trajectory/skeleton.d.ts +27 -1
package/dist/core/trajectory/skeleton.js +152 -8
package/dist/dashboard/data.d.ts +25 -51
package/dist/dashboard/data.js +68 -180
package/dist/dashboard/react-client.js +458 -503
package/dist/dashboard/react-styles.js +3 -3
package/dist/dashboard/server.js +23 -17
package/dist/ui/ascii-patterns.d.ts +7 -15
package/dist/ui/ascii-patterns.js +123 -54
package/dist/ui/welcome-screen.d.ts +0 -14
package/dist/ui/welcome-screen.js +16 -35
package/package.json +1 -1
package/dist/core/self-evolution/ga-selection.d.ts +0 -94
package/dist/core/self-evolution/ga-selection.js +0 -153
package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
package/dist/core/self-evolution/proposer-agent.js +0 -326
package/dist/core/self-evolution/replay-runner.d.ts +0 -100
package/dist/core/self-evolution/replay-runner.js +0 -170
package/dist/core/self-evolution/replay.d.ts +0 -45
package/dist/core/self-evolution/replay.js +0 -56
package/dist/core/self-evolution/template-variants.d.ts +0 -62
package/dist/core/self-evolution/template-variants.js +0 -171
package/dist/core/self-evolution/trajectory.d.ts +0 -65
package/dist/core/self-evolution/trajectory.js +0 -185

package/README.md CHANGED Viewed

@@ -133,7 +133,7 @@ Now tell your AI: `/synspec:propose <what-you-want-to-build>`
 → **[Workflows](docs/workflows.md)**: combos and patterns<br>
 → **[Commands](docs/commands.md)**: slash commands & skills<br>
 → **[CLI](docs/cli.md)**: terminal reference<br>
-→ **[Evaluation Lab](docs/evaluation-lab.md)**: advanced self-evolution harness<br>
+→ **[Self-Evolution Loop](docs/evaluation-lab.md)**: the loop-v2 in-context RL surface<br>
 → **[Supported Tools](docs/supported-tools.md)**: tool integrations & install paths<br>
 → **[Concepts](docs/concepts.md)**: how it all fits<br>
 → **[Multi-Language](docs/multi-language.md)**: multi-language support<br>
@@ -199,9 +199,9 @@ The self-evolution machinery lives in-tree under `src/core/fitness` and
 `src/core/self-evolution`, exposed through the `synergyspec-selfevolving
 self-evolution` subcommands (not as an ordinary day-to-day skill). It treats the
 spec workflow as a learnable system — a change is a forward pass through the
-artifact templates; `learn` runs the backward pass (a per-change loss); and a
-genetic-algorithm outer loop selects among competing candidate template
-variants.
+artifact templates; `learn` runs the backward pass (a per-change loss); and an
+in-context-RL episode improves one design 策略 POLICY lineage from a graded
+advantage, with a rollback that fires *before* any new edit.
 What actually works today:
@@ -233,21 +233,31 @@ What actually works today:
   session: an unresolvable flag is an up-front error (exit non-zero), while a
   missing env handle fails closed — no trajectory, and the observed-verified
   gate refuses to promote.
-- **Code-health gate** (auto-evolve / `evolve-from-edits`): a measured code-health
-  regression vs the last accepted state blocks auto-promotion (and surfaces a
-  loud `health-signal-unavailable` observation if a configured analyzer can't
-  run). No health signal ⇒ no gate, so the loop is never blocked on a missing
+- **Code-health gate** (the episode's evolving agent / `evolve-from-edits`): a
+  measured code-health regression vs the last accepted state blocks promotion (and
+  surfaces a loud `health-signal-unavailable` observation if a configured analyzer
+  can't run). No health signal ⇒ no gate, so the loop is never blocked on a missing
   measurement.
 - **Candidate proposals** (`self-evolution propose-canonical`): turns aggregated
   `learn` hints into human-gated candidate packages under
   `.synergyspec-selfevolving/self-evolution/candidates/`. Proposal-only — no
   canonical file is modified, and the frozen gen-test/run-test oracle is never
   touched.
-- **GA outer loop** (`self-evolution evolve`): groups candidates by canonical
-  target, scores them by accumulated fitness (or `--replay <corpus>` to re-run a
-  change corpus through baseline vs. candidate), ranks them, and generates a
-  **human-gated** promotion report. It never auto-promotes, and a frozen target
-  (per the per-target evolution policy) is skipped.
+- **In-context-RL episode** (`self-evolution episode --change <name>`): runs ONE
+  completed change through a fixed, code-spawned pipeline. A two-arm forward grades
+  the **主智能体 MAIN AGENT** (the frozen current 策略 POLICY) from the change's
+  `learn` report against a **CRITIC AGENT（基线智能体 baseline agent）** that reruns
+  the prior policy on the same change; a **奖励智能体 REWARD AGENT** computes
+  **advantage ＝ reward(主臂) − reward(基线臂)** and names the gap; on a bad advantage
+  the orchestrator rolls the 策略 POLICY back *before* the **演进智能体 EVOLVING
+  AGENT** is called; the evolving agent then makes ONE bounded edit (or refuses)
+  after the static / observed-GREEN / 范围⊆诊断 gates pass, writing the next policy
+  version onto your LOCAL files — no candidate dir, no republish. A frozen target
+  (per the per-target evolution policy) is skipped. Inspect the lineage with
+  `self-evolution policy show [--target <id>]`, re-enter a partial run with
+  `self-evolution episode resume <id>`, and manually restore a version with
+  `self-evolution policy rollback --target <id> --yes`. See
+  [docs/evaluation-lab.md](docs/evaluation-lab.md).
 - **Per-target evolution scope** (`selfEvolution:` in
   `synergyspec-selfevolving/config.yaml`): one switch decides which canonical
   targets may evolve, honored end-to-end by `learn` → `propose-canonical` → the
@@ -260,13 +270,16 @@ What actually works today:
 What is **not** built yet (don't infer it from the architecture diagrams):
-- No autonomous multi-generation **breeding** loop (mutate winners → next
-  generation). `evolve` runs a single generation over pre-existing candidates;
-  new candidates only come from the human-gated `propose-canonical` step.
+- No autonomous multi-episode **breeding** loop. Each `self-evolution episode`
+  improves the policy from one change's graded advantage; it never fans out
+  competing variants or runs unattended generations. New manual candidates only
+  come from the human-gated `propose-canonical` step.
 - No measured benchmark gains. The mechanisms are implemented and unit-tested,
   but the loop has not been run end-to-end to produce quantitative results.
-- The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness) has
-  been removed; the technique was internalized into `src/core`.
+- The earlier out-of-tree `evolve/` DGM lab (and its `pnpm evolve` harness) and
+  the genetic-algorithm outer loop (`self-evolution evolve` / `--replay`) have
+  been removed; the technique was internalized into `src/core` and re-homed on the
+  loop-v2 in-context-RL episode.
 ## Contributing

package/dist/commands/learn.d.ts CHANGED Viewed

@@ -1,3 +1,14 @@
 import { Command } from 'commander';
-export declare function registerLearnCommand(program: Command): void;
+import { type RunEpisodeOptions, type RunEpisodeResult, type RunEpisodeBusy } from '../core/self-evolution/episode-orchestrator.js';
+/**
+ * Injectable dependencies for {@link registerLearnCommand}. The ONLY seam today
+ * is `runEpisode` — the loop-v2 episode runner the autonomous `--apply` entrance
+ * invokes. It defaults to the real {@link runEpisode}; learn tests stub it so the
+ * evolve path is exercised WITHOUT spawning real agents (the orchestrator's three
+ * agents each own a `runHeadlessAgent` spawn, which the stub never reaches).
+ */
+export interface LearnCommandDeps {
+    runEpisode?: (opts: RunEpisodeOptions) => Promise<RunEpisodeResult | RunEpisodeBusy>;
+}
+export declare function registerLearnCommand(program: Command, deps?: LearnCommandDeps): void;
 //# sourceMappingURL=learn.d.ts.map

package/dist/commands/learn.js CHANGED Viewed

@@ -9,12 +9,14 @@ import { toTrajectoryFacts, describeRunnerResults } from '../core/trajectory/fac
 import { toActionSkeleton } from '../core/trajectory/skeleton.js';
 import { resolveHostHarness } from '../core/self-evolution/host-harness.js';
 import { mineSuccessSignals } from '../core/self-evolution/success-channel.js';
+import { captureMainArm, runEpisode, } from '../core/self-evolution/episode-orchestrator.js';
 import { buildLLMSummaryCandidates, ingestLearnHandoff, } from '../core/learn/llm-summary.js';
 function collect(value, previous) {
     previous.push(value);
     return previous;
 }
-export function registerLearnCommand(program) {
+export function registerLearnCommand(program, deps = {}) {
+    const runEpisodeImpl = deps.runEpisode ?? runEpisode;
     const learnCmd = program
         .command('learn [change]')
         .description('Review a completed change and extract reusable learning candidates')
@@ -157,13 +159,52 @@ export function registerLearnCommand(program) {
                     // side-write only; never fail learn over it.
                 }
             }
+            // LOOP-V2 AUTONOMOUS ENTRANCE (CS6-F): on an --apply run that opted into
+            // evolution, resolved EXACTLY ONE concrete evolvable target, and carries
+            // an observed-VERIFIED-GREEN signal, run one self-evolution episode
+            // in-process (rollback-before-evolution). A bare `learn <change>` preview
+            // can never reach this (it is neither --apply nor an evolving run), so the
+            // non-evolving path stays byte-identical to today. The runner is injected
+            // (`deps.runEpisode`) so tests exercise this path WITHOUT spawning real
+            // agents; it defaults to the real orchestrator.
+            let episodeOutcome;
+            const concreteEvolveTarget = resolveSingleConcreteTarget(evolutionPreview);
+            if (options.apply === true &&
+                isEvolvingRun(options) &&
+                concreteEvolveTarget !== undefined &&
+                concreteEvolveTarget.targetId !== null &&
+                reportIsObservedVerifiedGreen(report)) {
+                const mainArm = await captureMainArm({
+                    repoRoot: projectRoot,
+                    changeName: report.changeName,
+                    report,
+                });
+                // Thread the loop-v2 reward judge-quality config (samples / noiseFloor /
+                // orderSwap / tamperCheck). Omitted ⇒ the orchestrator's single-sample,
+                // flag-only default (no extra spawns).
+                const episodeConfig = readProjectConfig(projectRoot);
+                episodeOutcome = await runEpisodeImpl({
+                    repoRoot: projectRoot,
+                    targetId: concreteEvolveTarget.targetId,
+                    changeName: report.changeName,
+                    changeDirPath: report.changeDir,
+                    mainArm,
+                    ...(episodeConfig?.selfEvolution?.reward
+                        ? { reward: episodeConfig.selfEvolution.reward }
+                        : {}),
+                });
+            }
             if (options.json) {
-                printJson(report, applied, evolutionPreview, hintsPath);
+                printJson(report, applied, evolutionPreview, hintsPath, episodeOutcome);
                 return;
             }
             console.log(renderLearnReport(report, applied));
             console.log('');
             console.log(renderLearnTransparency(report, applied, evolutionPreview, hintsPath, options));
+            if (episodeOutcome) {
+                console.log('');
+                console.log(renderEpisodeOutcome(episodeOutcome));
+            }
             if (successSummary) {
                 console.log('');
                 console.log(successSummary);
@@ -515,7 +556,7 @@ function renderIngestHandoff(changeName, ingest, applied) {
     }
     return lines.join('\n');
 }
-function printJson(report, applied, evolutionPreview, hintsPath) {
+function printJson(report, applied, evolutionPreview, hintsPath, episodeOutcome) {
     // `mode` only tracks whether MEMORY candidates were applied (--apply). It does
     // NOT reflect that --persist-hints wrote a hints file, which is what made the
     // old `mode:"preview"` read as "nothing written". `wrote` makes every write this
@@ -540,6 +581,30 @@ function printJson(report, applied, evolutionPreview, hintsPath) {
             hintIds: target.hintIds,
         })),
     };
+    // Loop-v2 episode outcome (CS6-F): present only when the autonomous entrance
+    // ran one this --apply run; absent on every non-evolving / unverified run so
+    // their --json stays byte-identical to today.
+    const episode = episodeOutcome
+        ? 'busy' in episodeOutcome
+            ? { busy: true, reason: episodeOutcome.reason }
+            : {
+                episodeId: episodeOutcome.episodeId,
+                baselineSkipped: episodeOutcome.baselineSkipped,
+                advantage: episodeOutcome.advantage,
+                decision: episodeOutcome.decision,
+                evolution: episodeOutcome.evolution === null
+                    ? null
+                    : episodeOutcome.evolution.kind === 'evolved'
+                        ? {
+                            kind: 'evolved',
+                            version: episodeOutcome.evolution.ledgerEntry.version,
+                        }
+                        : episodeOutcome.evolution.kind === 'refused'
+                            ? { kind: 'refused', reason: episodeOutcome.evolution.reason }
+                            : { kind: 'not-spawned', reason: episodeOutcome.evolution.reason },
+                newPolicyVersion: episodeOutcome.newPolicyVersion,
+            }
+        : undefined;
     console.log(JSON.stringify({
         mode: applied ? 'apply' : 'preview',
         ...report,
@@ -548,6 +613,7 @@ function printJson(report, applied, evolutionPreview, hintsPath) {
         ...(hintsPath ? { hintsPath } : {}),
         wrote,
         evolution,
+        ...(episode ? { episode } : {}),
     }, null, 2));
 }
 async function buildEvolutionPreview(hints, targetPolicy, projectRoot) {
@@ -715,10 +781,18 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
     // full new content for that target's resolved LOCAL file.
     const concreteTargets = evolutionPreview.targets.filter((target) => target.targetId !== null);
     const concreteTarget = concreteTargets.length > 0 ? concreteTargets[0] : undefined;
-    if (hintsPath && concreteTarget) {
+    if (hintsPath && concreteTarget && concreteTarget.targetId !== null) {
+        const concreteTargetId = concreteTarget.targetId;
         lines.push(`- Hints written: ${hintsPath}`);
+        // LOOP-V2 (autonomous, rollback-before-evolution): the in-context-RL episode
+        // is now the default autonomous path. `--apply` on a verified-green run runs
+        // it in-process; this line is the explicit re-runnable form.
+        lines.push(renderEpisodeNextStep(report.changeName, concreteTargetId, options));
         const localFile = concreteTarget.localFiles[0] ?? concreteTarget.files[0] ?? '<target file>';
-        lines.push(`- Evolve from your edits: synergyspec-selfevolving self-evolution evolve-from-edits --from-learn "${hintsPath}" --evolve-target ${concreteTarget.targetId} --from-edits <edits.json> --yes`);
+        // MANUAL channel (host agent authors the edit directly): kept for operators
+        // who want to hand evolve-from-edits a full new-file content themselves
+        // rather than spawn the loop-v2 EVOLVING AGENT.
+        lines.push(`- Manual: evolve from your own edits: synergyspec-selfevolving self-evolution evolve-from-edits --from-learn "${hintsPath}" --evolve-target ${concreteTargetId} --from-edits <edits.json> --yes`);
         lines.push(`  you (the host agent) author edits.json's full new file content for the target file shown above (${localFile}).`);
     }
     else if (hintsPath) {
@@ -754,26 +828,99 @@ function renderLearnTransparency(report, applied, evolutionPreview, hintsPath, o
         lines.push('- After reviewing or evolving, run /synspec:archive to close the change.');
     }
     else {
+        // A bare preview has no agent-in-the-loop signal, so point the human/cron
+        // operator at the LOOP-V2 autonomous entrance (in-context RL,
+        // rollback-before-evolution): `self-evolution episode`.
         lines.push('');
-        lines.push('headless fallback (no host agent):');
-        lines.push(`- One-button local evolve: synergyspec-selfevolving self-evolution auto-evolve --change "${report.changeName}"${renderTargetArgs(options)}`);
+        lines.push('autonomous loop-v2 (rollback-before-evolution):');
+        lines.push(`- One-button local evolve: ${renderEpisodeCommand(report.changeName, undefined, options)}`);
         lines.push('- After reviewing or evolving, run /synspec:archive to close the change.');
     }
     return lines.join('\n');
 }
+/**
+ * The loop-v2 autonomous entrance command (CS6-F):
+ * `self-evolution episode --change "<name>" [--target <id>] [--session-id <id>]`.
+ * Replaces the GA `auto-evolve` / autonomous `evolve-from-edits` suggestions as
+ * the loop-v2 path. The `--target` pin is included only when a concrete target
+ * id resolved; `--session-id` is threaded through when the operator pinned an
+ * explicit trajectory handle so the episode grades the SAME session learn did.
+ */
+function renderEpisodeCommand(changeName, targetId, options) {
+    const parts = [`synergyspec-selfevolving self-evolution episode --change "${changeName}"`];
+    if (targetId)
+        parts.push(`--target ${targetId}`);
+    if (options.sessionId)
+        parts.push(`--session-id ${options.sessionId}`);
+    return parts.join(' ');
+}
+/** The loop-v2 next-step line shown when a concrete target is pinned. */
+function renderEpisodeNextStep(changeName, targetId, options) {
+    return `- Autonomous loop-v2 episode: ${renderEpisodeCommand(changeName, targetId, options)}`;
+}
 /**
  * An "evolving run" is one where the operator opted into evolution
  * (`--apply` / `--persist-hints` / a named `--evolve-target`) — per the skill,
  * the bare CLI previews and only the skill/agent flow passes these flags, so
  * this is the agent-in-the-loop proxy (the same signal that gates the
- * unbindable-hint observations in the learn action). The headless
- * `auto-evolve` fallback (it spawns its proposer internally) is for runs with
- * NO agent in the loop; the skill forbids the headless proposer when an agent
- * IS the proposer, so the fallback is suppressed on evolving runs.
+ * unbindable-hint observations in the learn action). The loop-v2 autonomous
+ * fallback (the `self-evolution episode` next-step, which code-spawns the
+ * reward + evolving agents) is for runs with NO agent in the loop; the skill
+ * forbids surfacing that autonomous entrance when an agent IS already the
+ * proposer, so the fallback is suppressed on evolving runs.
  */
 function isEvolvingRun(options) {
     return (options.apply === true || options.persistHints === true || options.evolveTarget !== undefined);
 }
+/**
+ * The report carries an observed-VERIFIED-GREEN signal: a REAL test runner was
+ * observed (`verified`) AND it came back green (`observedStatus === 'success'`).
+ * This is the SAME condition the failure-evidence routing uses in core/learn.ts
+ * (a verified-green run yields an empty failure list); the loop-v2 episode is
+ * only auto-launched when the main arm actually has a measured, trusted outcome
+ * — never on an authored-only or unverified report.
+ */
+function reportIsObservedVerifiedGreen(report) {
+    const facts = report.fitnessSample?.trajectoryFacts;
+    return facts !== undefined && facts.verified === true && facts.observedStatus === 'success';
+}
+/**
+ * The ONE concrete evolvable target this learn run resolved, or `undefined` when
+ * there is not exactly one (zero, or a kind-only/ambiguous group that still
+ * needs a `--evolve-target` pin). Mirrors the `concreteTarget` the next-steps
+ * renderer picks; the loop-v2 entrance refuses to guess when more than one
+ * concrete target is in play.
+ */
+function resolveSingleConcreteTarget(preview) {
+    const concrete = preview.targets.filter((target) => target.targetId !== null);
+    return concrete.length === 1 ? concrete[0] : undefined;
+}
+/** Render the loop-v2 episode outcome for the human-readable transparency block. */
+function renderEpisodeOutcome(episode) {
+    if ('busy' in episode) {
+        return `- Loop-v2 episode: skipped — another in-flight episode holds the target (${episode.reason}).`;
+    }
+    const lines = [];
+    lines.push(`- Loop-v2 episode: ${episode.episodeId}`);
+    const advantage = episode.advantage === null ? 'n/a (baseline skipped or abstained)' : String(episode.advantage);
+    lines.push(`  advantage: ${advantage}; decision: ${episode.decision}`);
+    if (episode.evolution === null) {
+        lines.push('  evolution: not spawned');
+    }
+    else if (episode.evolution.kind === 'evolved') {
+        lines.push(`  evolution: evolved -> policy v${episode.evolution.ledgerEntry.version}`);
+    }
+    else if (episode.evolution.kind === 'refused') {
+        lines.push(`  evolution: refused (${episode.evolution.reason})`);
+    }
+    else {
+        lines.push(`  evolution: not spawned (${episode.evolution.reason})`);
+    }
+    if (episode.newPolicyVersion !== null) {
+        lines.push(`  policy head: v${episode.newPolicyVersion}`);
+    }
+    return lines.join('\n');
+}
 function renderExplicitPolicy(explicit) {
     if (explicit.length === 0)
         return '';

package/dist/commands/self-evolution-episode.d.ts ADDED Viewed

@@ -0,0 +1,177 @@
+/**
+ * Loop-v2 CLI commands (self-evolution as in-context RL).
+ *
+ * These commands drive ONE bounded episode of the loop documented in
+ * `src/core/self-evolution/episode-orchestrator.ts`:
+ *
+ *   - `episode`               — run ONE episode for a change: build the 主智能体
+ *                               MAIN AGENT (frozen actor, 策略 POLICY vN+1) arm
+ *                               from a learn report (the same grading the `learn`
+ *                               command uses), then run the orchestrator (CRITIC
+ *                               AGENT（基线智能体 baseline agent）arm + 奖励智能体
+ *                               REWARD AGENT scoring + rollback/keep decision +
+ *                               演进智能体 EVOLVING AGENT optimizer.step). Prints a
+ *                               human summary (advantage, decision, evolution
+ *                               kind, new policy version) or --json.
+ *   - `episode resume <id>`   — re-enter a partially-run episode at its recorded
+ *                               stage and finish the remaining steps.
+ *   - `policy show`           — READ-ONLY: print the 版本账本 ledger (versions,
+ *                               actions, Δ stats, predictions) + the 否决缓冲
+ *                               reject-buffer for the target(s). Replaces the
+ *                               read-only role of the (soon-removed) `trajectory`
+ *                               command.
+ *   - `policy rollback`       — manual snapshot rollback to the prior version +
+ *                               a `human-reject` 否决缓冲 entry; requires --yes.
+ *
+ * Like the rest of the self-evolution CLI surface, every programmatic entrypoint
+ * here is exported and fully INJECTABLE (the orchestrator spawn seam, the learn
+ * report generator, stdout/stderr, clock) so the commands are unit-testable
+ * without spawning a real `claude` binary.
+ */
+import { Command } from 'commander';
+import { captureMainArm as captureMainArmImpl, resumeEpisode as resumeEpisodeImpl, type RunEpisodeOptions, type RunEpisodeResult, type RunEpisodeBusy, type ResumeEpisodeResult, type PolicyLedgerEntry, type RejectBufferEntry } from '../core/self-evolution/index.js';
+import { type LearnReport } from '../core/learn.js';
+/**
+ * The options the `episode` command forwards to the runEpisode seam. A superset
+ * of the orchestrator's {@link RunEpisodeOptions} carrying the `--no-baseline`
+ * request as `skipBaseline`. The base orchestrator ignores the extra key (the
+ * CRITIC AGENT arm is gated by its own ledger read); a custom seam may read it.
+ */
+export type EpisodeRunOptions = RunEpisodeOptions & {
+    skipBaseline?: boolean;
+};
+/**
+ * The injectable orchestrator seam. Accepts the {@link EpisodeRunOptions}
+ * superset (the real {@link runEpisodeImpl}, typed for {@link RunEpisodeOptions},
+ * satisfies this because it accepts a subset of the fields).
+ */
+export type EpisodeRunner = (opts: EpisodeRunOptions) => Promise<RunEpisodeResult | RunEpisodeBusy>;
+/**
+ * Attach the loop-v2 `episode` + `policy` subcommands to the parent
+ * `self-evolution` command. Called once from {@link registerSelfEvolutionCommand}.
+ */
+export declare function attachSelfEvolutionEpisodeCommands(parent: Command): void;
+export interface RunEpisodeCommandArgs {
+    /** The completed change to run the episode for. */
+    changeName?: string;
+    /** Canonical target id to evolve. Defaults to the design artifact-template. */
+    target?: string;
+    /** Skip the CRITIC AGENT（基线智能体 baseline agent）arm for this episode. */
+    noBaseline?: boolean;
+    /** Explicit transcript handle (Claude transcript store only). */
+    transcript?: string;
+    /** Explicit Claude session id handle. */
+    sessionId?: string;
+    json?: boolean;
+}
+export interface RunEpisodeCommandOptions {
+    repoRoot: string;
+    stdout?: (l: string) => void;
+    stderr?: (l: string) => void;
+    /**
+     * Test seam: produce the change's learn report (the 主智能体 MAIN AGENT arm's
+     * grading). Defaults to {@link generateLearnReport}; tests inject a graded stub
+     * so the episode runs without a real agent trajectory.
+     */
+    generateReport?: (changeName: string) => Promise<LearnReport>;
+    /**
+     * Test seam: build the {@link MainArmCapture} from a learn report. Defaults to
+     * the orchestrator's {@link captureMainArmImpl}.
+     */
+    captureMainArm?: typeof captureMainArmImpl;
+    /**
+     * Test seam: the orchestrator that runs the three agents. Defaults to the real
+     * {@link runEpisodeImpl}; tests inject a fake so NO agents are spawned.
+     */
+    runEpisode?: EpisodeRunner;
+}
+export interface RunEpisodeCommandResult {
+    exitCode: number;
+    /** Present when the episode ran (not busy / not an error). */
+    result?: RunEpisodeResult;
+    /** Present when the target's in-flight slot was already held. */
+    busy?: RunEpisodeBusy;
+    error?: string;
+}
+/**
+ * Programmatic entrypoint for `self-evolution episode`. Exported so tests can
+ * drive the full episode flow with an injected orchestrator seam (no real agent
+ * spawn).
+ */
+export declare function runEpisodeCommand(args: RunEpisodeCommandArgs, opts: RunEpisodeCommandOptions): Promise<RunEpisodeCommandResult>;
+export interface RunResumeEpisodeCommandArgs {
+    episodeId: string;
+    json?: boolean;
+}
+export interface RunResumeEpisodeCommandResult {
+    exitCode: number;
+    result?: ResumeEpisodeResult;
+    error?: string;
+}
+/**
+ * Programmatic entrypoint for `self-evolution episode resume <id>`. Exported for
+ * tests; the orchestrator seam is injectable.
+ */
+export declare function runResumeEpisodeCommand(args: RunResumeEpisodeCommandArgs, opts: {
+    repoRoot: string;
+    stdout?: (l: string) => void;
+    stderr?: (l: string) => void;
+    resumeEpisode?: typeof resumeEpisodeImpl;
+}): Promise<RunResumeEpisodeCommandResult>;
+export interface RunPolicyShowCommandArgs {
+    /** Restrict to a single canonical target id. */
+    target?: string;
+    json?: boolean;
+}
+export interface PolicyShowTargetView {
+    targetId: string;
+    /** Lineage head version, or null when the lineage has not been initialized. */
+    head: number | null;
+    ledger: PolicyLedgerEntry[];
+    rejectBuffer: RejectBufferEntry[];
+}
+export interface RunPolicyShowCommandResult {
+    exitCode: number;
+    targets: PolicyShowTargetView[];
+    error?: string;
+}
+/**
+ * Programmatic entrypoint for `self-evolution policy show`. READ-ONLY: reads the
+ * 版本账本 ledger + 否决缓冲 reject-buffer for the target(s) and renders them.
+ * Never mutates anything.
+ */
+export declare function runPolicyShowCommand(args: RunPolicyShowCommandArgs, opts: {
+    repoRoot: string;
+    stdout?: (l: string) => void;
+    stderr?: (l: string) => void;
+}): Promise<RunPolicyShowCommandResult>;
+export interface RunPolicyRollbackCommandArgs {
+    target: string;
+    /** Why the version is being rejected (recorded on the 否决缓冲 entry). */
+    reason?: string;
+    /** Required confirmation. */
+    yes?: boolean;
+    json?: boolean;
+}
+export interface RunPolicyRollbackCommandResult {
+    exitCode: number;
+    /** The rollback ledger entry, present on success. */
+    entry?: PolicyLedgerEntry;
+    /** The version the lineage was restored TO. */
+    toVersion?: number;
+    error?: string;
+}
+/**
+ * Programmatic entrypoint for `self-evolution policy rollback`. Manually rolls
+ * the 策略 POLICY lineage back to the previous version (recorded as a NEW
+ * monotonic head, git-revert style) and appends a `human-reject` 否决缓冲
+ * reject-buffer entry so the next 演进智能体 EVOLVING AGENT step sees the rejected
+ * direction. Requires --yes.
+ */
+export declare function runPolicyRollbackCommand(args: RunPolicyRollbackCommandArgs, opts: {
+    repoRoot: string;
+    stdout?: (l: string) => void;
+    stderr?: (l: string) => void;
+    now?: () => Date;
+}): Promise<RunPolicyRollbackCommandResult>;
+//# sourceMappingURL=self-evolution-episode.d.ts.map