npm - synergyspec-selfevolving - Versions diffs - 1.4.0 → 2.1.0 - Mend

synergyspec-selfevolving 1.4.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/README.md +31 -18
package/dist/commands/learn.d.ts +12 -1
package/dist/commands/learn.js +158 -11
package/dist/commands/self-evolution-episode.d.ts +177 -0
package/dist/commands/self-evolution-episode.js +431 -0
package/dist/commands/self-evolution.d.ts +12 -190
package/dist/commands/self-evolution.js +114 -866
package/dist/core/archive.d.ts +0 -1
package/dist/core/archive.js +0 -58
package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
package/dist/core/artifact-graph/instruction-loader.js +3 -31
package/dist/core/fitness/loss.d.ts +5 -5
package/dist/core/fitness/loss.js +4 -4
package/dist/core/fitness/test-failures.js +10 -2
package/dist/core/project-config.d.ts +19 -0
package/dist/core/project-config.js +96 -0
package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
package/dist/core/self-evolution/candidate-fitness.js +31 -5
package/dist/core/self-evolution/candidates.d.ts +0 -9
package/dist/core/self-evolution/critic-agent.d.ts +192 -0
package/dist/core/self-evolution/critic-agent.js +568 -0
package/dist/core/self-evolution/edits-contract.d.ts +53 -0
package/dist/core/self-evolution/edits-contract.js +89 -0
package/dist/core/self-evolution/episode-orchestrator.d.ts +234 -0
package/dist/core/self-evolution/episode-orchestrator.js +681 -0
package/dist/core/self-evolution/episode-store.d.ts +266 -0
package/dist/core/self-evolution/episode-store.js +573 -0
package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
package/dist/core/self-evolution/evolution-switches.js +5 -10
package/dist/core/self-evolution/evolving-agent.d.ts +208 -0
package/dist/core/self-evolution/evolving-agent.js +535 -0
package/dist/core/self-evolution/host-harness.d.ts +14 -15
package/dist/core/self-evolution/host-harness.js +48 -23
package/dist/core/self-evolution/index.d.ts +11 -6
package/dist/core/self-evolution/index.js +20 -6
package/dist/core/self-evolution/line-diff.d.ts +60 -0
package/dist/core/self-evolution/line-diff.js +130 -0
package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
package/dist/core/self-evolution/policy/fs-safe.js +89 -0
package/dist/core/self-evolution/policy/index.d.ts +13 -0
package/dist/core/self-evolution/policy/index.js +13 -0
package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
package/dist/core/self-evolution/policy/policy-store.js +774 -0
package/dist/core/self-evolution/policy/prediction-reconcile.d.ts +54 -0
package/dist/core/self-evolution/policy/prediction-reconcile.js +191 -0
package/dist/core/self-evolution/policy/reject-buffer.d.ts +55 -0
package/dist/core/self-evolution/policy/reject-buffer.js +170 -0
package/dist/core/self-evolution/promote.d.ts +1 -1
package/dist/core/self-evolution/promote.js +6 -33
package/dist/core/self-evolution/promotion.js +1 -2
package/dist/core/self-evolution/reward-agent.d.ts +379 -0
package/dist/core/self-evolution/reward-agent.js +940 -0
package/dist/core/self-evolution/reward-aggregator.d.ts +59 -0
package/dist/core/self-evolution/reward-aggregator.js +262 -0
package/dist/core/self-evolution/scope-gate.d.ts +66 -0
package/dist/core/self-evolution/scope-gate.js +107 -0
package/dist/core/self-evolution/success-channel.js +2 -2
package/dist/core/self-evolution/tamper-check.d.ts +24 -0
package/dist/core/self-evolution/tamper-check.js +236 -0
package/dist/core/self-evolution/tool-evolution.js +2 -13
package/dist/core/self-evolution/verdict.d.ts +8 -5
package/dist/core/self-evolution/verdict.js +4 -7
package/dist/core/templates/workflows/gen-tests.js +1 -1
package/dist/core/templates/workflows/learn.d.ts +3 -2
package/dist/core/templates/workflows/learn.js +21 -18
package/dist/core/templates/workflows/self-evolving.d.ts +6 -4
package/dist/core/templates/workflows/self-evolving.js +62 -172
package/dist/core/trajectory/scrub.d.ts +27 -0
package/dist/core/trajectory/scrub.js +79 -0
package/dist/core/trajectory/skeleton.d.ts +27 -1
package/dist/core/trajectory/skeleton.js +152 -8
package/dist/dashboard/data.d.ts +25 -51
package/dist/dashboard/data.js +68 -180
package/dist/dashboard/react-client.js +458 -503
package/dist/dashboard/react-styles.js +3 -3
package/dist/dashboard/server.js +23 -17
package/dist/ui/ascii-patterns.d.ts +7 -15
package/dist/ui/ascii-patterns.js +123 -54
package/dist/ui/welcome-screen.d.ts +0 -14
package/dist/ui/welcome-screen.js +16 -35
package/package.json +1 -1
package/dist/core/self-evolution/ga-selection.d.ts +0 -94
package/dist/core/self-evolution/ga-selection.js +0 -153
package/dist/core/self-evolution/proposer-agent.d.ts +0 -182
package/dist/core/self-evolution/proposer-agent.js +0 -326
package/dist/core/self-evolution/replay-runner.d.ts +0 -100
package/dist/core/self-evolution/replay-runner.js +0 -170
package/dist/core/self-evolution/replay.d.ts +0 -45
package/dist/core/self-evolution/replay.js +0 -56
package/dist/core/self-evolution/template-variants.d.ts +0 -62
package/dist/core/self-evolution/template-variants.js +0 -171
package/dist/core/self-evolution/trajectory.d.ts +0 -65
package/dist/core/self-evolution/trajectory.js +0 -185

package/dist/core/trajectory/skeleton.js CHANGED Viewed

@@ -15,8 +15,17 @@
  */
 import { parseTestMetrics } from '../fitness/test-metrics.js';
 import { commandText, inputLooksLikeRunner, isExecTool } from './facts.js';
+import { scrub } from './scrub.js';
 const MAX_SKELETON_EVENTS = 40;
 const MAX_COMMAND_CHARS = 120;
+/** Failed command/test-run events keep more of their command line so flags survive (P7). */
+const FAILED_COMMAND_CHARS = 240;
+/** Per-event cap on a kept error tail. */
+const MAX_ERROR_TAIL_CHARS = 400;
+/** Global budget across all kept error tails — bounds a flailing run's failures. */
+const MAX_TOTAL_ERROR_BODY_CHARS = 2000;
+/** A reasoning/text block at least this long counts as a "stated plan" (⑥). */
+const STATED_PLAN_MIN_CHARS = 120;
 /**
  * Matches the NAME of a file-mutating tool across harnesses — Claude
  * `Write`/`Edit`/`MultiEdit`/`NotebookEdit`; opencode `write`/`edit`/`patch`;
@@ -65,11 +74,16 @@ function editedFiles(input) {
     }
     return patchFilesFromPayload(input);
 }
-function capCommand(command) {
+function capCommand(command, max = MAX_COMMAND_CHARS) {
     if (!command)
         return undefined;
     const c = command.trim().replace(/\s+/g, ' ');
-    return c.length > MAX_COMMAND_CHARS ? `${c.slice(0, MAX_COMMAND_CHARS - 1)}…` : c;
+    return c.length > max ? `${c.slice(0, max - 1)}…` : c;
+}
+/** Last `max` chars of a (single-spaced) output — error summaries live at the end. */
+function tailExcerpt(text, max) {
+    const t = text.trimEnd();
+    return t.length > max ? `…${t.slice(t.length - (max - 1))}` : t;
 }
 /**
  * Project the bounded action skeleton from a normalized trajectory. One walk,
@@ -83,6 +97,14 @@ export function toActionSkeleton(trajectory) {
     let lastPending = null;
     let totalToolCalls = 0;
     const events = [];
+    // Raw (uncapped) command + raw failed-output, keyed by event reference, so the
+    // failed-command higher cap (P7) and error tails (P4) can be finalized AFTER
+    // salience truncation against only the KEPT events.
+    const rawCommand = new Map();
+    const rawErrorOutput = new Map();
+    // ⑥ Non-narrative plan signal: did the agent ever state a substantial plan?
+    let statedPlanPresent = false;
+    let sawNarration = false;
     const append = (event) => {
         // Per-file rollup: consecutive edits to the same file collapse.
         const prev = events[events.length - 1];
@@ -102,7 +124,8 @@ export function toActionSkeleton(trajectory) {
                 totalToolCalls++;
                 const exec = isExecTool(part.tool);
                 if (exec) {
-                    const command = capCommand(commandText(part.input));
+                    const rawCmd = commandText(part.input);
+                    const command = capCommand(rawCmd);
                     // A shell-driven apply_patch (codex heredoc style) carries
                     // `*** Update File: <path>` lines in its payload — that's a file
                     // edit, not a command. Markers only: an exec input's `path`-like
@@ -126,9 +149,14 @@ export function toActionSkeleton(trajectory) {
                         kind: inputLooksLikeRunner(part.input) ? 'test-run' : 'command',
                         ordinal: 0,
                         tool: part.tool,
-                        ...(command ? { command } : {}),
+                        // Scrub the command BEFORE it lands in the skeleton JSON the judge
+                        // reads — a `curl …?key=…` / `git clone https://user:pass@…` would
+                        // otherwise leak a credential verbatim into the judge's prompt.
+                        ...(command ? { command: scrub(command) } : {}),
                         ...(turn.sessionId ? { sessionId: turn.sessionId } : {}),
                     });
+                    if (rawCmd)
+                        rawCommand.set(event, rawCmd);
                     const pending = { event };
                     lastPending = pending;
                     if (part.callId)
@@ -166,30 +194,146 @@ export function toActionSkeleton(trajectory) {
                             e.failedCount = metrics.failed;
                         }
                     }
+                    // Failure flag (isError / nonzero exit / failed tests) — drives both
+                    // salience ranking and which events keep an error tail. Stash the raw
+                    // output so the tail is finalized later, against only kept events.
+                    const failed = part.isError === true ||
+                        (typeof e.exitCode === 'number' && e.exitCode > 0) ||
+                        (typeof e.failedCount === 'number' && e.failedCount > 0);
+                    if (failed) {
+                        e.isError = true;
+                        if (typeof part.output === 'string' && part.output.trim().length > 0) {
+                            rawErrorOutput.set(e, part.output);
+                        }
+                    }
                 }
                 lastPending = null;
             }
+            else if (part.kind === 'text' || part.kind === 'reasoning') {
+                // ⑥ A substantial assistant plan/reasoning block — presence only.
+                sawNarration = true;
+                if (part.text.trim().length >= STATED_PLAN_MIN_CHARS)
+                    statedPlanPresent = true;
+            }
         }
     }
-    // Stamp ordinals on the full (rolled-up) sequence, then middle-out truncate.
+    // Stamp ordinals on the full (rolled-up) sequence.
     events.forEach((e, i) => {
         e.ordinal = i;
     });
+    const preTruncationEventCount = events.length;
+    // ── P3: salience-ranked retention (replaces middle-out) ────────────────────
+    // Keep the most diagnostic events within the SAME hard cap, not the events in
+    // the most convenient POSITIONS. Position-based loss routinely discarded the
+    // failing-then-recovering MIDDLE; salience keeps failures, pass-rate
+    // transitions, and session endpoints, and degrades gracefully when a harness
+    // never parses a pass rate (failures are still ranked by isError/exit code).
     let bounded = events;
     let truncated = false;
     if (events.length > MAX_SKELETON_EVENTS) {
-        const head = Math.ceil(MAX_SKELETON_EVENTS / 2);
-        const tail = MAX_SKELETON_EVENTS - head;
-        bounded = [...events.slice(0, head), ...events.slice(events.length - tail)];
+        const transitions = transitionOrdinals(events);
+        const forced = forcedOrdinals(events);
+        const priority = (e) => (forced.has(e.ordinal) ? 1000 : 0) + salience(e, transitions.has(e.ordinal));
+        const kept = [...events]
+            .sort((a, b) => priority(b) - priority(a) || a.ordinal - b.ordinal)
+            .slice(0, MAX_SKELETON_EVENTS)
+            .sort((a, b) => a.ordinal - b.ordinal);
+        // Honest per-gap elision marker: how many ORIGINAL events were dropped in the
+        // contiguous run immediately before each kept event (so a non-contiguous
+        // record is never read as a continuous causal narrative).
+        let prevOrdinal = -1;
+        for (const e of kept) {
+            const gap = e.ordinal - prevOrdinal - 1;
+            if (gap > 0)
+                e.elidedBefore = gap;
+            prevOrdinal = e.ordinal;
+        }
+        bounded = kept;
         truncated = true;
     }
+    // ── P7 + P4: failed KEPT events keep a longer command line (flags survive)
+    // and a bounded, scrubbed error tail (the actionable message), within a global
+    // budget so a flailing run's failures cannot flood the prompt.
+    let errorBudget = MAX_TOTAL_ERROR_BODY_CHARS;
+    let errorBodiesElided = 0;
+    for (const e of bounded) {
+        if (!e.isError || (e.kind !== 'command' && e.kind !== 'test-run'))
+            continue;
+        const raw = rawCommand.get(e);
+        if (raw) {
+            const capped = capCommand(raw, FAILED_COMMAND_CHARS);
+            if (capped)
+                e.command = scrub(capped); // scrub the longer failed-command form too
+        }
+        const output = rawErrorOutput.get(e);
+        if (!output)
+            continue;
+        if (errorBudget <= 0) {
+            errorBodiesElided++;
+            continue;
+        }
+        // Hard cap AFTER scrub (a redaction marker could be marginally longer than
+        // what it replaced) so a kept tail never exceeds the per-event bound.
+        const tail = scrub(tailExcerpt(output, Math.min(MAX_ERROR_TAIL_CHARS, errorBudget))).slice(0, MAX_ERROR_TAIL_CHARS);
+        if (tail.length > 0) {
+            e.errorTail = tail;
+            errorBudget -= tail.length;
+        }
+    }
     return {
         harness: trajectory.harness,
         events: bounded,
         totalToolCalls,
         truncated,
+        preTruncationEventCount,
+        ...(errorBodiesElided > 0 ? { errorBodiesElided } : {}),
+        ...(sawNarration ? { statedPlanPresent } : {}),
     };
 }
+/**
+ * Ordinals of test-runs whose signal CHANGED from the previous test-run (or the
+ * first test-run, which establishes signal) — the moments credit/blame lives.
+ */
+function transitionOrdinals(events) {
+    const set = new Set();
+    let prev = null;
+    for (const e of events) {
+        if (e.kind !== 'test-run')
+            continue;
+        const cur = { passRate: e.passRate ?? null, failedCount: e.failedCount ?? null };
+        if (prev === null || cur.passRate !== prev.passRate || cur.failedCount !== prev.failedCount) {
+            set.add(e.ordinal);
+        }
+        prev = cur;
+    }
+    return set;
+}
+/** Always-retained ordinals: the global endpoints + the last test-run per session. */
+function forcedOrdinals(events) {
+    const set = new Set();
+    if (events.length > 0) {
+        set.add(events[0].ordinal);
+        set.add(events[events.length - 1].ordinal);
+    }
+    const lastTestRunBySession = new Map();
+    for (const e of events) {
+        if (e.kind === 'test-run')
+            lastTestRunBySession.set(e.sessionId ?? '', e.ordinal);
+    }
+    for (const ord of lastTestRunBySession.values())
+        set.add(ord);
+    return set;
+}
+/** Deterministic, code-derived salience: failures > transitions > runs > commands > edits. */
+function salience(e, isTransition) {
+    if (e.isError)
+        return 100;
+    if (e.kind === 'test-run')
+        return isTransition ? 90 : 70;
+    if (e.kind === 'command')
+        return 30;
+    return 20;
+}
 function basename(p) {
     const parts = p.split('/');
     return parts[parts.length - 1] || p;

package/dist/dashboard/data.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { type EpisodeRecord, type PolicyLedgerEntry, type RejectBufferEntry } from '../core/self-evolution/index.js';
 export interface ProjectInfo {
     name: string;
     version: string;
@@ -32,39 +33,6 @@ export interface CliHistoryEvent {
     durationMs?: number;
     metadata?: Record<string, unknown>;
 }
-export type EvolveRunStatus = 'completed' | 'errored' | 'pending' | 'empty';
-export interface EvolveRunSummary {
-    schemaVersion?: number;
-    runId: string;
-    benchmarkId?: string;
-    harnessVariant?: string;
-    startedAt?: string;
-    finishedAt?: string;
-    taskCount?: number;
-    verdictCounts?: Record<string, number>;
-    passRate?: number;
-    totalCostUsd?: number;
-    totalWallTimeMs?: number;
-    interrupted?: boolean;
-    isolationMode?: string;
-    budget?: Record<string, unknown>;
-    status?: EvolveRunStatus;
-    failureReasonSummary?: string;
-}
-export interface EvolveArchive {
-    schemaVersion?: number;
-    createdAt?: string;
-    entries: Array<{
-        id: string;
-        parentId: string | null;
-        generation: number;
-        createdAt?: string;
-        snapshotPath?: string;
-        runs?: unknown[];
-        childCount?: number;
-    }>;
-    generations?: unknown[];
-}
 export interface ProjectOverview {
     project: ProjectInfo;
     changes: {
@@ -73,15 +41,25 @@ export interface ProjectOverview {
         inProgress: number;
     };
     evolve: {
-        runs: number;
-        lastRunAt: string | null;
-        lastVerdict: string | null;
+        episodes: number;
+        lastEpisodeAt: string | null;
+        lastStage: string | null;
+        headVersion: number | null;
     };
     cli: {
         totalEvents: number;
         recentFailures: number;
     };
 }
+export interface PolicyLineage {
+    targetId: string;
+    headVersion: number | null;
+    entries: PolicyLedgerEntry[];
+    evolveCount: number;
+    rollbackCount: number;
+    refusedCount: number;
+    lastAt: string | null;
+}
 export interface AgentInterfacePlan {
     schemaVersion: 1;
     generatedAt: string;
@@ -193,21 +171,17 @@ export declare function readProjectInfo(root: string): Promise<ProjectInfo>;
 export declare function readChange(root: string, id: string): Promise<ChangeSummary | null>;
 export declare function listChanges(root: string): Promise<ChangeSummary[]>;
 export declare function readCliHistory(root: string, limit?: number): Promise<CliHistoryEvent[]>;
-export interface EvolveRunDetail extends EvolveRunSummary {
-    tasks: Array<{
-        taskId: string;
-        verdict?: string;
-        wallTimeMs?: number;
-        totalCostUsd?: number;
-        reason?: string;
-    }>;
-    wrapperStderrTail?: string;
-    wrapperStdoutTail?: string;
-    fileListing?: string[];
-}
-export declare function listEvolveRuns(root: string): Promise<EvolveRunSummary[]>;
-export declare function readEvolveRun(root: string, runId: string): Promise<EvolveRunDetail | null>;
-export declare function readEvolveArchive(root: string): Promise<EvolveArchive | null>;
+/**
+ * Read the loop-v2 self-evolution surface: per-episode two-arm forward records,
+ * the policy version ledger grouped into per-target lineages, and the
+ * reject-buffer of rolled-back episodes. Each reader is independently guarded so
+ * a single missing/unreadable store yields an empty slice rather than throwing.
+ */
+export declare function readSelfEvolution(root: string): Promise<{
+    episodes: EpisodeRecord[];
+    policyLineages: PolicyLineage[];
+    rejectBuffer: RejectBufferEntry[];
+}>;
 export declare function readAgentInterfacePlan(root: string): Promise<AgentInterfacePlan>;
 export declare function readOverview(root: string): Promise<ProjectOverview>;
 /**

package/dist/dashboard/data.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { promises as fs } from 'fs';
 import { join, resolve, dirname } from 'path';
 import { readAllJsonLines } from './tail.js';
 import { readAgentCognitiveEvents, summarizeAgentCognitiveTrace, } from '../history/cognitive.js';
+import { listEpisodes, readPolicyLedgerAll, readRejectBufferAll, } from '../core/self-evolution/index.js';
 async function tryReadJson(path) {
     try {
         const raw = await fs.readFile(path, 'utf8');
@@ -137,156 +138,42 @@ export async function readCliHistory(root, limit = 200) {
     const events = await readAllJsonLines(path);
     return events.slice(-limit).reverse();
 }
-const LOG_TAIL_BYTES = 16 * 1024;
-async function tryReadFileTail(path, maxBytes = LOG_TAIL_BYTES) {
-    try {
-        const stat = await fs.stat(path);
-        if (stat.size === 0)
-            return null;
-        if (stat.size <= maxBytes)
-            return await fs.readFile(path, 'utf8');
-        const handle = await fs.open(path, 'r');
-        try {
-            const buf = Buffer.alloc(maxBytes);
-            await handle.read(buf, 0, maxBytes, stat.size - maxBytes);
-            return '…\n' + buf.toString('utf8');
-        }
-        finally {
-            await handle.close();
-        }
-    }
-    catch {
-        return null;
-    }
-}
-async function classifyRunStatus(dir, summary) {
-    if (summary && summary.verdictCounts)
-        return 'completed';
-    let hasAnyFiles = false;
-    let hasWrapperStderr = false;
-    try {
-        const items = await fs.readdir(dir, { withFileTypes: true });
-        hasAnyFiles = items.length > 0;
-        for (const item of items) {
-            if (item.isFile() && item.name === 'wrapper.stderr.log') {
-                const stat = await fs.stat(join(dir, item.name));
-                if (stat.size > 0)
-                    hasWrapperStderr = true;
-            }
-        }
-    }
-    catch {
-        return 'empty';
-    }
-    if (hasWrapperStderr)
-        return 'errored';
-    if (hasAnyFiles)
-        return 'pending';
-    return 'empty';
-}
-function summarizeFailureReason(reason) {
-    if (!reason)
-        return undefined;
-    const lines = reason
-        .split(/\r?\n/)
-        .map((line) => line.trim())
-        .filter(Boolean);
-    if (!lines.length)
-        return undefined;
-    const mismatchCount = lines.filter((line) => /sha256 mismatch/i.test(line)).length;
-    if (mismatchCount > 1)
-        return `${mismatchCount} genome file hash mismatches`;
-    return lines[0].replace(/\s+/g, ' ').slice(0, 180);
-}
-async function readRunFailureReasonSummary(runDir) {
-    const tasksDir = join(runDir, 'tasks');
-    let taskNames = [];
-    try {
-        const items = await fs.readdir(tasksDir, { withFileTypes: true });
-        taskNames = items.filter((d) => d.isDirectory()).map((d) => d.name);
-    }
-    catch {
-        return undefined;
-    }
-    for (const taskName of taskNames) {
-        const result = await tryReadJson(join(tasksDir, taskName, 'result.json'));
-        const summary = summarizeFailureReason(result?.reason);
-        if (summary)
-            return summary;
-    }
-    return undefined;
-}
-export async function listEvolveRuns(root) {
-    const dir = join(root, 'evolve', 'runs');
-    let entries = [];
-    try {
-        const items = await fs.readdir(dir, { withFileTypes: true });
-        entries = items.filter((d) => d.isDirectory()).map((d) => d.name);
-    }
-    catch {
-        return [];
-    }
-    const summaries = await Promise.all(entries.map(async (id) => {
-        const runDir = join(dir, id);
-        const summary = await tryReadJson(join(runDir, 'summary.json'));
-        const status = await classifyRunStatus(runDir, summary);
-        const failureReasonSummary = await readRunFailureReasonSummary(runDir);
-        if (!summary)
-            return { runId: id, status, failureReasonSummary };
-        return { ...summary, runId: summary.runId ?? id, status, failureReasonSummary };
-    }));
-    return summaries.sort((a, b) => (b.startedAt ?? '').localeCompare(a.startedAt ?? ''));
-}
-export async function readEvolveRun(root, runId) {
-    const dir = join(root, 'evolve', 'runs', runId);
-    const dirStat = await tryStat(dir);
-    if (!dirStat)
-        return null;
-    const summary = await tryReadJson(join(dir, 'summary.json'));
-    const status = await classifyRunStatus(dir, summary);
-    const tasksDir = join(dir, 'tasks');
-    let taskNames = [];
-    try {
-        const items = await fs.readdir(tasksDir, { withFileTypes: true });
-        taskNames = items.filter((d) => d.isDirectory()).map((d) => d.name);
-    }
-    catch {
-        // No tasks dir.
-    }
-    const tasks = await Promise.all(taskNames.map(async (taskId) => {
-        const result = await tryReadJson(join(tasksDir, taskId, 'result.json'));
-        return {
-            taskId,
-            verdict: result?.verdict,
-            wallTimeMs: result?.telemetry?.wallTimeMs,
-            totalCostUsd: result?.telemetry?.totalCostUsd,
-            reason: result?.reason,
-        };
-    }));
-    const needsDiagnostics = !summary || status !== 'completed';
-    const wrapperStderrTail = needsDiagnostics
-        ? (await tryReadFileTail(join(dir, 'wrapper.stderr.log'))) ?? undefined
-        : undefined;
-    const wrapperStdoutTail = needsDiagnostics
-        ? (await tryReadFileTail(join(dir, 'wrapper.stdout.log'))) ?? undefined
-        : undefined;
-    let fileListing;
-    if (needsDiagnostics) {
-        try {
-            const items = await fs.readdir(dir, { withFileTypes: true });
-            fileListing = items.map((d) => d.name + (d.isDirectory() ? '/' : ''));
-        }
-        catch {
-            fileListing = undefined;
-        }
+/**
+ * Read the loop-v2 self-evolution surface: per-episode two-arm forward records,
+ * the policy version ledger grouped into per-target lineages, and the
+ * reject-buffer of rolled-back episodes. Each reader is independently guarded so
+ * a single missing/unreadable store yields an empty slice rather than throwing.
+ */
+export async function readSelfEvolution(root) {
+    const [episodes, ledger, rejectBuffer] = await Promise.all([
+        listEpisodes(root).catch(() => []),
+        readPolicyLedgerAll(root).catch(() => []),
+        readRejectBufferAll(root).catch(() => []),
+    ]);
+    // Group ledger entries by targetId, preserving append order within each group.
+    const byTarget = new Map();
+    for (const entry of ledger) {
+        const list = byTarget.get(entry.targetId);
+        if (list)
+            list.push(entry);
+        else
+            byTarget.set(entry.targetId, [entry]);
+    }
+    const policyLineages = [];
+    for (const [targetId, entries] of byTarget) {
+        const last = entries[entries.length - 1];
+        policyLineages.push({
+            targetId,
+            headVersion: last ? last.version : null,
+            entries,
+            evolveCount: entries.filter((e) => e.action === 'evolve').length,
+            rollbackCount: entries.filter((e) => e.action === 'rollback').length,
+            refusedCount: entries.filter((e) => e.action === 'refused').length,
+            lastAt: last ? last.at : null,
+        });
     }
-    const base = summary
-        ? { ...summary, runId: summary.runId ?? runId, status }
-        : { runId, status };
-    return { ...base, tasks, wrapperStderrTail, wrapperStdoutTail, fileListing };
-}
-export async function readEvolveArchive(root) {
-    return tryReadJson(join(root, 'evolve', 'archive', 'archive.json'));
+    policyLineages.sort((a, b) => (b.lastAt ?? '').localeCompare(a.lastAt ?? ''));
+    return { episodes, policyLineages, rejectBuffer };
 }
 export async function readAgentInterfacePlan(root) {
     const { events, skippedRecords } = await readAgentCognitiveEvents({
@@ -366,10 +253,10 @@ export async function readAgentInterfacePlan(root) {
                 boundary: 'Same workflow, tool-native syntax.',
             },
             {
-                id: 'lab',
+                id: 'self-evolution',
                 label: 'Self-evolution',
-                command: 'synergyspec-selfevolving self-evolution evolve',
-                purpose: 'Score and rank candidate template variants into a human-gated promotion report.',
+                command: 'synergyspec-selfevolving self-evolution episode',
+                purpose: 'Run one self-evolution episode (main ∥ baseline arms → reward → bounded policy edit).',
                 boundary: 'Maintainer/research surface, not the normal user workflow.',
             },
         ],
@@ -384,15 +271,14 @@ export async function readAgentInterfacePlan(root) {
     };
 }
 export async function readOverview(root) {
-    const [project, changes, runs, cli] = await Promise.all([
+    const [project, changes, se, cli] = await Promise.all([
         readProjectInfo(root),
         listChanges(root),
-        listEvolveRuns(root),
+        readSelfEvolution(root),
         readCliHistory(root, 500),
     ]);
     const completedChanges = changes.filter((c) => c.status === 'completed').length;
     const inProgressChanges = changes.filter((c) => c.status === 'in-progress').length;
-    const lastRun = runs[0];
     const recentFailures = cli.filter((e) => e.outcome === 'failure').length;
     return {
         project,
@@ -402,14 +288,10 @@ export async function readOverview(root) {
             inProgress: inProgressChanges,
         },
         evolve: {
-            runs: runs.length,
-            lastRunAt: lastRun?.startedAt ?? null,
-            lastVerdict: lastRun
-                ? Object.entries(lastRun.verdictCounts ?? {})
-                    .filter(([, n]) => n > 0)
-                    .map(([k]) => k)
-                    .join(', ') || null
-                : null,
+            episodes: se.episodes.length,
+            lastEpisodeAt: se.episodes[0]?.updatedAt ?? null,
+            lastStage: se.episodes[0]?.stage ?? null,
+            headVersion: se.policyLineages[0]?.headVersion ?? null,
         },
         cli: { totalEvents: cli.length, recentFailures },
     };
@@ -604,8 +486,8 @@ export async function readArchitecture(root) {
         const domain = [
             { id: 'change', label: 'Change', role: 'A unit of planned work with its artifacts' },
             { id: 'memory', label: 'Memory', role: 'Durable attributed lessons across runs' },
-            { id: 'evolveRun', label: 'Evolve run', role: 'A benchmark-backed self-evolution attempt' },
-            { id: 'archive', label: 'Archive', role: 'Finalized changes and evolution lineage' },
+            { id: 'episode', label: 'Episode', role: 'One two-arm forward self-evolution episode (main ∥ baseline arms -> graded advantage)' },
+            { id: 'archive', label: 'Archive', role: 'Finalized changes' },
         ];
         logical = { entities: [...artifacts, ...domain] };
     }
@@ -624,20 +506,16 @@ export async function readArchitecture(root) {
     };
     try {
         const plan = await readAgentInterfacePlan(root);
-        const runs = await listEvolveRuns(root);
+        const se = await readSelfEvolution(root);
         const cli = await readCliHistory(root, 500);
-        const lastRun = runs[0];
-        const lastVerdict = lastRun
-            ? Object.entries(lastRun.verdictCounts ?? {})
-                .filter(([, n]) => n > 0)
-                .map(([k]) => k)
-                .join(', ') || null
-            : null;
-        const passRates = runs
-            .map((r) => r.passRate)
+        const lastVerdict = se.episodes[0]?.stage ?? null;
+        // passRate = fraction of GRADED episodes (advantage measured) that were
+        // positive (advantage > 0), over all graded episodes; null when none graded.
+        const gradedAdvantages = se.episodes
+            .map((e) => e.advantage)
             .filter((v) => typeof v === 'number' && Number.isFinite(v));
-        const passRate = passRates.length > 0
-            ? passRates.reduce((sum, v) => sum + v, 0) / passRates.length
+        const passRate = gradedAdvantages.length > 0
+            ? gradedAdvantages.filter((v) => v > 0).length / gradedAdvantages.length
             : null;
         const toolCounts = new Map();
         for (const event of cli) {
@@ -653,7 +531,7 @@ export async function readArchitecture(root) {
             toolDistribution,
             traceEvents: plan.summary.traces,
             decisions: plan.summary.decisions,
-            runs: runs.length,
+            runs: se.episodes.length,
             lastVerdict,
             passRate,
         };
@@ -701,8 +579,18 @@ export async function readArchitecture(root) {
                 rel: '.synergyspec-selfevolving/history/events.ndjson',
                 detail: 'Append-only CLI history event log',
             },
-            { rel: 'evolve/runs', detail: 'Per-run self-evolution outputs' },
-            { rel: 'evolve/archive/archive.json', detail: 'Self-evolution lineage archive' },
+            {
+                rel: '.synergyspec-selfevolving/self-evolution/episodes',
+                detail: 'Per-episode two-arm forward records',
+            },
+            {
+                rel: '.synergyspec-selfevolving/self-evolution/policy/ledger.ndjson',
+                detail: 'Append-only policy version ledger',
+            },
+            {
+                rel: '.synergyspec-selfevolving/self-evolution/policy/reject-buffer.ndjson',
+                detail: 'Rolled-back episodes (reject-buffer)',
+            },
         ];
         const storeSurfaces = [];
         for (const store of storePaths) {