npm - planpong - Versions diffs - 0.3.0 → 0.5.0 - Mend

planpong 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/dist/src/config/defaults.js +1 -0
package/dist/src/config/defaults.js.map +1 -1
package/dist/src/config/loader.d.ts +1 -0
package/dist/src/config/loader.js +3 -0
package/dist/src/config/loader.js.map +1 -1
package/dist/src/core/apply-edits.d.ts +40 -0
package/dist/src/core/apply-edits.js +220 -0
package/dist/src/core/apply-edits.js.map +1 -0
package/dist/src/core/convergence.d.ts +57 -4
package/dist/src/core/convergence.js +134 -6
package/dist/src/core/convergence.js.map +1 -1
package/dist/src/core/loop.js +3 -3
package/dist/src/core/loop.js.map +1 -1
package/dist/src/core/operations.d.ts +14 -1
package/dist/src/core/operations.js +592 -56
package/dist/src/core/operations.js.map +1 -1
package/dist/src/core/plan-diff.d.ts +23 -0
package/dist/src/core/plan-diff.js +135 -0
package/dist/src/core/plan-diff.js.map +1 -0
package/dist/src/core/session.d.ts +11 -0
package/dist/src/core/session.js +51 -1
package/dist/src/core/session.js.map +1 -1
package/dist/src/mcp/tools/get-feedback.d.ts +16 -0
package/dist/src/mcp/tools/get-feedback.js +118 -114
package/dist/src/mcp/tools/get-feedback.js.map +1 -1
package/dist/src/mcp/tools/revise.d.ts +16 -0
package/dist/src/mcp/tools/revise.js +76 -61
package/dist/src/mcp/tools/revise.js.map +1 -1
package/dist/src/mcp/tools/status.js +15 -1
package/dist/src/mcp/tools/status.js.map +1 -1
package/dist/src/prompts/planner.d.ts +34 -1
package/dist/src/prompts/planner.js +272 -17
package/dist/src/prompts/planner.js.map +1 -1
package/dist/src/prompts/reviewer.d.ts +14 -1
package/dist/src/prompts/reviewer.js +84 -1
package/dist/src/prompts/reviewer.js.map +1 -1
package/dist/src/providers/claude.d.ts +3 -0
package/dist/src/providers/claude.js +151 -13
package/dist/src/providers/claude.js.map +1 -1
package/dist/src/providers/codex.d.ts +3 -0
package/dist/src/providers/codex.js +150 -14
package/dist/src/providers/codex.js.map +1 -1
package/dist/src/providers/types.d.ts +69 -3
package/dist/src/schemas/config.d.ts +3 -0
package/dist/src/schemas/config.js +6 -0
package/dist/src/schemas/config.js.map +1 -1
package/dist/src/schemas/json-schema.d.ts +21 -0
package/dist/src/schemas/json-schema.js +172 -0
package/dist/src/schemas/json-schema.js.map +1 -0
package/dist/src/schemas/metrics.d.ts +171 -0
package/dist/src/schemas/metrics.js +49 -0
package/dist/src/schemas/metrics.js.map +1 -0
package/dist/src/schemas/revision.d.ts +166 -2
package/dist/src/schemas/revision.js +35 -2
package/dist/src/schemas/revision.js.map +1 -1
package/dist/src/schemas/session.d.ts +6 -0
package/dist/src/schemas/session.js +10 -0
package/dist/src/schemas/session.js.map +1 -1
package/package.json +4 -2

package/dist/src/core/operations.js CHANGED Viewed

@@ -1,10 +1,15 @@
 import { createHash } from "node:crypto";
 import { readFileSync, writeFileSync, existsSync } from "node:fs";
 import { relative, resolve } from "node:path";
-import { buildRevisionPrompt } from "../prompts/planner.js";
-import { buildReviewPrompt, formatPriorDecisions, getReviewPhase, } from "../prompts/reviewer.js";
-import { parseFeedbackForPhase, parseRevision, isConverged, } from "./convergence.js";
-import { createSession, writeSessionState, writeRoundFeedback, writeRoundResponse, readRoundFeedback, readRoundResponse, writeInitialPlan, } from "./session.js";
+import { isEditsRevision, isDirectionRevision, } from "../schemas/revision.js";
+import { buildRevisionPrompt, buildEditsRetryPrompt, } from "../prompts/planner.js";
+import { buildReviewPrompt, buildIncrementalReviewPrompt, formatPriorDecisions, getReviewPhase, } from "../prompts/reviewer.js";
+import { buildPlanDiff } from "./plan-diff.js";
+import { parseFeedbackForPhase, parseRevision, parseStructuredFeedbackForPhase, parseStructuredRevision, isConverged, StructuredOutputParseError, ZodValidationError, } from "./convergence.js";
+import { getFeedbackJsonSchemaForPhase, getRevisionJsonSchema, } from "../schemas/json-schema.js";
+import { applyEdits, logFailures, summarizeApply, } from "./apply-edits.js";
+import { createSession, writeSessionState, writeRoundFeedback, writeRoundResponse, readRoundFeedback, readRoundResponse, writeInitialPlan, writeRoundMetrics, writeRoundPlanSnapshot, readRoundPlanSnapshot, } from "./session.js";
+import { summarizeTiming, } from "../schemas/metrics.js";
 // --- Utility functions ---
 export function hashFile(path) {
     const content = readFileSync(path, "utf-8");
@@ -226,6 +231,216 @@ function buildPriorDecisions(cwd, sessionId, currentRound) {
         return null;
     return formatPriorDecisions(priorRounds);
 }
+/**
+ * Invocation state machine — single owner of all retry/downgrade logic for
+ * provider invocations. Providers are single-shot; this function decides
+ * when to downgrade from structured output to legacy mode.
+ *
+ * Strict 2-attempt cap: structured (1) -> legacy fallback (1) -> terminal.
+ *
+ * Failure handling:
+ * - Provider `capability` error in structured mode → downgrade
+ * - Provider `fatal` error → terminal (no downgrade)
+ * - JSON.parse failure on structured output → downgrade
+ * - Zod validation failure on structured output → terminal (NOT retried)
+ * - Any failure in legacy mode → terminal
+ *
+ * Observability: when `metricsContext` is provided, each attempt emits a
+ * start/end line to stderr, collects `InvocationAttempt` records, and
+ * persists a `RoundMetrics` file in the session directory. All telemetry
+ * I/O is fail-open — failures log a warning and are swallowed, never
+ * altering the invocation outcome. The in-memory metrics object is
+ * returned alongside the result so callers get timing data without a
+ * filesystem round-trip.
+ */
+async function invokeWithStateMachine(args) {
+    const { provider, invokeOptions, jsonSchema, buildPrompt, parseStructured, parseLegacy, roundLabel, metricsContext, } = args;
+    const supported = await provider.checkStructuredOutputSupport();
+    let mode = supported ? "structured" : "legacy";
+    let attempt = 0;
+    const maxAttempts = 2;
+    let lastError = null;
+    // Metrics collection — only active when metricsContext is provided.
+    const attempts = [];
+    const startedAt = new Date().toISOString();
+    const startedAtMs = Date.now();
+    const providerLabel = buildProviderLabel(provider.name, invokeOptions.model, invokeOptions.effort);
+    const writeMetricsNow = () => {
+        if (!metricsContext)
+            return;
+        try {
+            const metrics = {
+                schema_version: 1,
+                session_id: metricsContext.sessionId,
+                round: metricsContext.round,
+                phase: metricsContext.phase,
+                role: metricsContext.role,
+                started_at: startedAt,
+                completed_at: new Date().toISOString(),
+                total_duration_ms: Date.now() - startedAtMs,
+                attempts,
+            };
+            writeRoundMetrics(invokeOptions.cwd, metricsContext.sessionId, metricsContext.round, metricsContext.role, metrics);
+        }
+        catch {
+            // writeRoundMetrics is already fail-open; catch here belts-and-braces
+            // against unexpected synchronous errors building the metrics object.
+        }
+    };
+    const buildMetrics = () => {
+        if (!metricsContext)
+            return null;
+        try {
+            return {
+                schema_version: 1,
+                session_id: metricsContext.sessionId,
+                round: metricsContext.round,
+                phase: metricsContext.phase,
+                role: metricsContext.role,
+                started_at: startedAt,
+                completed_at: new Date().toISOString(),
+                total_duration_ms: Date.now() - startedAtMs,
+                attempts,
+            };
+        }
+        catch {
+            return null;
+        }
+    };
+    try {
+        while (attempt < maxAttempts) {
+            attempt++;
+            const prompt = buildPrompt(mode === "structured");
+            const promptChars = prompt.length;
+            const promptLines = prompt.split("\n").length;
+            const options = mode === "structured"
+                ? { ...invokeOptions, jsonSchema }
+                : { ...invokeOptions };
+            logStart(roundLabel, providerLabel, mode, promptChars, metricsContext);
+            const response = await provider.invoke(prompt, options);
+            // Base attempt record — filled in below.
+            const attemptRecord = {
+                mode,
+                provider: provider.name,
+                model: invokeOptions.model ?? null,
+                effort: invokeOptions.effort ?? null,
+                prompt_chars: promptChars,
+                prompt_lines: promptLines,
+                output_chars: null,
+                output_lines: null,
+                duration_ms: response.duration ?? 0,
+                ok: false,
+                error_kind: null,
+                error_exit_code: null,
+            };
+            if (!response.ok) {
+                attemptRecord.ok = false;
+                attemptRecord.error_kind = response.error.kind;
+                attemptRecord.error_exit_code = response.error.exitCode;
+                attempts.push(attemptRecord);
+                logEnd(roundLabel, providerLabel, mode, promptChars, null, response.duration ?? 0, false, `${response.error.kind}: ${truncate(response.error.message, 200)}`, metricsContext);
+                if (mode === "structured" &&
+                    response.error.kind === "capability" &&
+                    attempt < maxAttempts) {
+                    provider.markNonCapable();
+                    mode = "legacy";
+                    continue;
+                }
+                // Fatal, or already in legacy mode — terminal
+                throw new Error(`${roundLabel} failed (exit ${response.error.exitCode}, ${response.error.kind}):\n${response.error.message}`);
+            }
+            // Provider returned output — record output size, try to parse.
+            const outputChars = response.output.length;
+            const outputLines = response.output.split("\n").length;
+            attemptRecord.output_chars = outputChars;
+            attemptRecord.output_lines = outputLines;
+            try {
+                const parsed = mode === "structured"
+                    ? parseStructured(response.output)
+                    : parseLegacy(response.output);
+                attemptRecord.ok = true;
+                attempts.push(attemptRecord);
+                logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, true, null, metricsContext);
+                return {
+                    result: parsed,
+                    metrics: buildMetrics(),
+                    sessionId: response.ok ? response.sessionId : undefined,
+                };
+            }
+            catch (parseError) {
+                lastError = parseError instanceof Error ? parseError : new Error(String(parseError));
+                // Zod validation failure on structured output is terminal — the model
+                // produced semantically invalid content, retrying won't help.
+                if (parseError instanceof ZodValidationError) {
+                    attemptRecord.ok = false;
+                    attemptRecord.error_kind = "zod";
+                    attempts.push(attemptRecord);
+                    logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, false, `zod: ${truncate(lastError.message, 200)}`, metricsContext);
+                    throw parseError;
+                }
+                // JSON.parse failure on structured output triggers downgrade
+                if (mode === "structured" &&
+                    parseError instanceof StructuredOutputParseError &&
+                    attempt < maxAttempts) {
+                    attemptRecord.ok = false;
+                    attemptRecord.error_kind = "parse";
+                    attempts.push(attemptRecord);
+                    logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, false, `parse: ${truncate(lastError.message, 200)}`, metricsContext);
+                    provider.markNonCapable();
+                    mode = "legacy";
+                    continue;
+                }
+                // Legacy parse failure — terminal
+                attemptRecord.ok = false;
+                attemptRecord.error_kind = "parse";
+                attempts.push(attemptRecord);
+                logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, response.duration ?? 0, false, `parse: ${truncate(lastError.message, 200)}`, metricsContext);
+                throw new Error(`${roundLabel} parse failed in ${mode} mode: ${lastError.message}`);
+            }
+        }
+        // Unreachable in normal flow — defensive
+        throw lastError ?? new Error(`${roundLabel} exhausted all attempts`);
+    }
+    finally {
+        // Persist metrics on every exit (success or throw). Fail-open — this
+        // never throws; writeRoundMetrics catches its own errors.
+        writeMetricsNow();
+    }
+}
+function buildProviderLabel(providerName, model, effort) {
+    return formatProviderLabel({
+        provider: providerName,
+        model: model ?? undefined,
+        effort: effort ?? undefined,
+    });
+}
+function truncate(text, max) {
+    return text.length > max ? text.slice(0, max) : text;
+}
+function safeStderr(line) {
+    try {
+        process.stderr.write(line);
+    }
+    catch {
+        // stderr unavailable — nothing else we can do
+    }
+}
+function logStart(roundLabel, providerLabel, mode, promptChars, ctx) {
+    if (!ctx)
+        return;
+    safeStderr(`[planpong] R${ctx.round} ${ctx.role} | ${providerLabel} | ${mode} | prompt=${promptChars}c\n`);
+}
+function logEnd(roundLabel, providerLabel, mode, promptChars, outputChars, durationMs, ok, failDetail, ctx) {
+    if (!ctx)
+        return;
+    const durationStr = formatDuration(durationMs);
+    if (ok && outputChars !== null) {
+        safeStderr(`[planpong] R${ctx.round} ${ctx.role} | ${providerLabel} | ${mode} | prompt=${promptChars}c output=${outputChars}c duration=${durationStr} | ok\n`);
+    }
+    else {
+        safeStderr(`[planpong] R${ctx.round} ${ctx.role} | ${providerLabel} | ${mode} | prompt=${promptChars}c duration=${durationStr} | fail (${failDetail ?? "unknown"})\n`);
+    }
+}
 /**
  * Run a single review round: send current plan to the reviewer for critique.
  */
@@ -235,34 +450,68 @@ export async function runReviewRound(session, cwd, config, reviewerProvider) {
     const planContent = readFileSync(planPath, "utf-8");
     const phase = getReviewPhase(round);
     const priorDecisions = buildPriorDecisions(cwd, session.id, round);
-    const reviewPrompt = buildReviewPrompt(planContent, priorDecisions, phase);
-    const reviewResponse = await reviewerProvider.invoke(reviewPrompt, {
-        cwd,
-        model: config.reviewer.model,
-        effort: config.reviewer.effort,
-    });
-    // Try to parse even on non-zero exit — CLIs can exit 1 with valid output
-    let feedback;
-    try {
-        feedback = parseFeedbackForPhase(reviewResponse.content, phase);
-    }
-    catch (parseError) {
-        // If exit code was also non-zero, the provider genuinely failed
-        if (reviewResponse.exitCode !== 0) {
-            throw new Error(`Reviewer failed (exit ${reviewResponse.exitCode}):\n${reviewResponse.content.slice(0, 500)}`);
-        }
-        // Exit was 0 but parse failed — retry
-        const retryPrompt = `Your previous response could not be parsed. Please output ONLY a valid JSON object wrapped in <planpong-feedback> tags. The error was: ${parseError instanceof Error ? parseError.message : "parse error"}\n\nOriginal prompt:\n${reviewPrompt}`;
-        const retryResponse = await reviewerProvider.invoke(retryPrompt, {
+    // Persist a snapshot of the plan as the reviewer is about to see it. On
+    // round N+1 we'll diff against this snapshot to produce the incremental
+    // "what changed" content for the resumed reviewer session.
+    writeRoundPlanSnapshot(cwd, session.id, round, planContent);
+    // Reviewer-side persistent sessions. Both claude and codex support this:
+    //   - claude: we generate the UUID and pass it via --session-id (first)
+    //     or --resume (subsequent).
+    //   - codex: codex generates its own thread_id; we capture it from the
+    //     `--json` event stream and pass it via `codex exec resume <id>`
+    //     on subsequent calls.
+    // The canonical reviewer session ID is `session.reviewerSessionId` — for
+    // claude this is the pre-generated UUID; for codex it's overwritten
+    // after the first call with the captured thread_id.
+    const reviewerSessionInited = session.reviewerSessionInitialized === true;
+    const isResumedReviewerSession = reviewerSessionInited;
+    const priorPlanContent = isResumedReviewerSession
+        ? readRoundPlanSnapshot(cwd, session.id, round - 1)
+        : null;
+    const planDiff = priorPlanContent
+        ? buildPlanDiff(priorPlanContent, planContent)
+        : null;
+    const newSessionId = !reviewerSessionInited && reviewerProvider.name === "claude"
+        ? session.reviewerSessionId
+        : undefined;
+    const resumeSessionId = reviewerSessionInited
+        ? session.reviewerSessionId
+        : undefined;
+    const { result: feedback, metrics, sessionId: capturedSessionId, } = await invokeWithStateMachine({
+        provider: reviewerProvider,
+        invokeOptions: {
             cwd,
             model: config.reviewer.model,
             effort: config.reviewer.effort,
-        });
-        feedback = parseFeedbackForPhase(retryResponse.content, phase);
-    }
+            newSessionId,
+            resumeSessionId,
+        },
+        jsonSchema: getFeedbackJsonSchemaForPhase(phase),
+        buildPrompt: (structuredOutput) => isResumedReviewerSession
+            ? buildIncrementalReviewPrompt(planDiff ?? planContent, priorDecisions, phase, structuredOutput)
+            : buildReviewPrompt(planContent, priorDecisions, phase, structuredOutput),
+        parseStructured: (output) => parseStructuredFeedbackForPhase(output, phase),
+        parseLegacy: (output) => parseFeedbackForPhase(output, phase),
+        roundLabel: `Round ${round} review`,
+        metricsContext: {
+            sessionId: session.id,
+            round,
+            phase,
+            role: "review",
+        },
+    });
     writeRoundFeedback(cwd, session.id, round, feedback);
     const severity = severityFromFeedback(feedback);
     const converged = isConverged(feedback);
+    const timing = metrics ? summarizeTiming(metrics) : undefined;
+    // Persist the canonical reviewer session ID. For claude this is the
+    // UUID we generated; for codex it's the thread_id captured from --json
+    // output. Either way, future rounds resume this conversation.
+    if (!reviewerSessionInited && capturedSessionId) {
+        session.reviewerSessionId = capturedSessionId;
+        session.reviewerSessionInitialized = true;
+        writeSessionState(cwd, session);
+    }
     // Extract phase-specific extras for status line
     const phaseExtras = {};
     if (feedback.verdict === "blocked") {
@@ -281,7 +530,7 @@ export async function runReviewRound(session, cwd, config, reviewerProvider) {
             phaseExtras.risks_promoted = feedback.issues.length;
         }
     }
-    return { round, feedback, severity, converged, phaseExtras };
+    return { round, feedback, severity, converged, phaseExtras, timing };
 }
 /**
  * Run a single revision round: send plan + feedback to the planner for revision.
@@ -296,37 +545,86 @@ export async function runRevisionRound(session, cwd, config, plannerProvider) {
     }
     const phase = getReviewPhase(round);
     const keyDecisions = extractKeyDecisions(planContent);
-    const revisionPrompt = buildRevisionPrompt(planContent, feedback, keyDecisions, null, phase);
-    const revisionResponse = await plannerProvider.invoke(revisionPrompt, {
-        cwd,
-        model: config.planner.model,
-        effort: config.planner.effort,
-    });
-    // Try to parse even on non-zero exit — CLIs can exit 1 with valid output
-    let revision;
-    try {
-        revision = parseRevision(revisionResponse.content);
-    }
-    catch (parseError) {
-        // If exit code was also non-zero, the provider genuinely failed
-        if (revisionResponse.exitCode !== 0) {
-            throw new Error(`Planner revision failed (exit ${revisionResponse.exitCode}):\n${revisionResponse.content.slice(0, 500)}`);
-        }
-        // Exit was 0 but parse failed — retry
-        const retryPrompt = `Your previous response could not be parsed. Please output ONLY a valid JSON object wrapped in <planpong-revision> tags. The error was: ${parseError instanceof Error ? parseError.message : "parse error"}\n\nOriginal prompt:\n${revisionPrompt}`;
-        const retryResponse = await plannerProvider.invoke(retryPrompt, {
+    // Direction phase always uses full-plan output. Risk + detail honor
+    // config.revision_mode. The shape decision is made once here and threaded
+    // through prompt + JSON schema + parser.
+    const useEdits = phase !== "direction" && config.revision_mode === "edits";
+    const revisionShape = useEdits ? "edits" : "full";
+    const jsonSchema = getRevisionJsonSchema(phase, config.revision_mode);
+    // Planner-side persistent sessions were tested and found to INCREASE wall
+    // time — the model used the spared context budget to do more work per
+    // round (more edits, deeper revisions), not to do the same work faster.
+    // Reviewer-side persistent sessions are kept (see runReviewRound).
+    const { result: revision, metrics } = await invokeWithStateMachine({
+        provider: plannerProvider,
+        invokeOptions: {
             cwd,
             model: config.planner.model,
             effort: config.planner.effort,
+        },
+        jsonSchema,
+        buildPrompt: (structuredOutput) => buildRevisionPrompt(planContent, feedback, keyDecisions, null, phase, structuredOutput, config.revision_mode),
+        parseStructured: (output) => parseStructuredRevision(output, revisionShape),
+        parseLegacy: (output) => parseRevision(output, revisionShape),
+        roundLabel: `Round ${round} revision`,
+        metricsContext: {
+            sessionId: session.id,
+            round,
+            phase,
+            role: "revision",
+        },
+    });
+    writeRoundResponse(cwd, session.id, round, revision);
+    const timing = metrics ? summarizeTiming(metrics) : undefined;
+    // Apply revision to disk. Two paths: full (today's behavior) or edits
+    // (apply edit list, retry failures, atomic write).
+    let editTelemetry;
+    let finalRevision = revision;
+    if (useEdits && isEditsRevision(revision)) {
+        const result = await applyRevisionEdits({
+            session,
+            cwd,
+            planPath,
+            planContent,
+            revision,
+            plannerProvider,
+            config,
+            phase,
+            metrics,
         });
-        revision = parseRevision(retryResponse.content);
+        finalRevision = result.revision;
+        editTelemetry = result.telemetry;
     }
-    writeRoundResponse(cwd, session.id, round, revision);
-    // Tally responses
+    else if (isDirectionRevision(revision)) {
+        writeFileSync(planPath, revision.updated_plan);
+        editTelemetry = {
+            revision_mode: "full",
+            edits_attempted: null,
+            edits_applied: null,
+            edits_failed: null,
+            edits_retried: null,
+            edits_recovered: null,
+            retry_invoked: false,
+        };
+        persistRevisionMetrics({
+            cwd,
+            session,
+            round,
+            phase,
+            metrics,
+            telemetry: editTelemetry,
+        });
+    }
+    else {
+        throw new Error(`runRevisionRound: revision shape mismatch — expected ${useEdits ? "edits" : "full"} but got ${"updated_plan" in revision ? "full" : "edits"}`);
+    }
+    session.planHash = hashFile(planPath);
+    writeSessionState(cwd, session);
+    // Tally responses (use the possibly-downgraded responses from finalRevision).
     let accepted = 0;
     let rejected = 0;
     let deferred = 0;
-    for (const resp of revision.responses) {
+    for (const resp of finalRevision.responses) {
         if (resp.action === "accepted")
             accepted++;
         else if (resp.action === "rejected")
@@ -334,19 +632,257 @@ export async function runRevisionRound(session, cwd, config, plannerProvider) {
         else if (resp.action === "deferred")
             deferred++;
     }
-    // Write updated plan to disk
-    const updatedPlan = revision.updated_plan;
-    writeFileSync(planPath, updatedPlan);
-    session.planHash = hashFile(planPath);
-    writeSessionState(cwd, session);
     return {
         round,
-        revision,
+        revision: finalRevision,
         accepted,
         rejected,
         deferred,
         planUpdated: true,
+        timing,
+        edits: editTelemetry,
+    };
+}
+/**
+ * Apply an edits-mode revision: first-pass apply, targeted retry on failures,
+ * atomic write, response-edit consistency check. All mutations to the plan
+ * happen in memory; a single writeFileSync persists the final state.
+ */
+async function applyRevisionEdits(args) {
+    const { session, cwd, planPath, planContent, revision, plannerProvider, config, phase, metrics, } = args;
+    const round = session.currentRound;
+    const editsAttempted = revision.edits.length;
+    // First-pass apply.
+    const firstPass = applyEdits(planContent, revision.edits);
+    if (firstPass.failures.length > 0) {
+        logFailures(`R${round} edits first-pass`, firstPass.failures);
+    }
+    safeStderr(`[planpong] R${round} edits | first-pass | ${summarizeApply(firstPass)}\n`);
+    let working = firstPass.plan;
+    const successfulEdits = firstPass.applied.map((a) => a.edit);
+    const recoveredEdits = [];
+    const unrecoverableFailures = [];
+    let retryInvoked = false;
+    let retriedCount = 0;
+    if (firstPass.failures.length > 0) {
+        retryInvoked = true;
+        retriedCount = firstPass.failures.length;
+        try {
+            const retryResult = await runEditsRetry({
+                cwd,
+                session,
+                round,
+                phase,
+                plannerProvider,
+                config,
+                currentPlan: working,
+                failures: firstPass.failures,
+            });
+            const secondPass = applyEdits(working, retryResult.edits);
+            if (secondPass.failures.length > 0) {
+                logFailures(`R${round} edits retry`, secondPass.failures);
+            }
+            safeStderr(`[planpong] R${round} edits | retry | ${summarizeApply(secondPass)}\n`);
+            working = secondPass.plan;
+            for (const a of secondPass.applied)
+                recoveredEdits.push(a.edit);
+            unrecoverableFailures.push(...secondPass.failures);
+            // Track the retry as an additional invocation attempt in metrics.
+            if (metrics) {
+                metrics.attempts.push(retryResult.attemptRecord);
+            }
+        }
+        catch (err) {
+            // Retry failed entirely (provider error, parse error). Surface but
+            // keep first-pass partial result — strictly better than nothing.
+            safeStderr(`[planpong] R${round} edits | retry failed: ${err instanceof Error ? err.message : String(err)}\n`);
+            unrecoverableFailures.push(...firstPass.failures);
+        }
+    }
+    // Atomic write of the final plan state.
+    writeFileSync(planPath, working);
+    // Response-edit consistency check: if an `accepted` response has no
+    // surviving edit anywhere in its rationale or suggestion's section, the
+    // planner claimed to have addressed an issue without a corresponding plan
+    // change. Downgrade to `deferred`. The match is heuristic — keyed on the
+    // response's `issue_id` appearing in the edit's after text or in any
+    // edit's section that maps to the issue's section field. This is the same
+    // tradeoff the plan documents (R3 F2 issue, accepted as heuristic).
+    const survivingEdits = [...successfulEdits, ...recoveredEdits];
+    const downgraded = downgradeOrphanedResponses(revision, survivingEdits, unrecoverableFailures);
+    // Persist failure metadata in the round response JSON alongside responses.
+    // We rewrite the response file to include the (possibly-downgraded)
+    // responses + edit application result.
+    writeRoundResponse(cwd, session.id, round, downgraded);
+    const telemetry = {
+        revision_mode: "edits",
+        edits_attempted: editsAttempted,
+        edits_applied: successfulEdits.length,
+        edits_failed: firstPass.failures.length,
+        edits_retried: retriedCount,
+        edits_recovered: recoveredEdits.length,
+        retry_invoked: retryInvoked,
+    };
+    persistRevisionMetrics({
+        cwd,
+        session,
+        round,
+        phase,
+        metrics,
+        telemetry,
+    });
+    return { revision: downgraded, telemetry };
+}
+/**
+ * One-shot retry for failed edits. Builds a targeted prompt with only the
+ * failures + current (partially-edited) plan and asks the planner to
+ * re-express each failed edit. The retry is best-effort — provider/parse
+ * errors are caught by the caller and treated as "no recovery."
+ */
+async function runEditsRetry(args) {
+    const { plannerProvider, config, currentPlan, failures } = args;
+    const supported = await plannerProvider.checkStructuredOutputSupport();
+    const useStructured = supported;
+    const prompt = buildEditsRetryPrompt(currentPlan, failures.map((f) => ({
+        edit: f.edit,
+        reason: f.reason,
+        section_searched: f.section_searched,
+        diagnostic: f.diagnostic,
+    })), useStructured);
+    // Use a minimal JSON schema for the retry — only `edits` array. We lift
+    // the EditsRevisionJsonSchema's `edits` block by using the full schema
+    // and then ignoring the `responses` field (the planner is asked to omit
+    // it). For simplicity reuse the full edits schema; the retry prompt
+    // explicitly tells the planner not to include `responses`.
+    const jsonSchema = getRevisionJsonSchema("detail", "edits");
+    const promptChars = prompt.length;
+    const promptLines = prompt.split("\n").length;
+    const options = useStructured
+        ? {
+            cwd: args.cwd,
+            model: config.planner.model,
+            effort: config.planner.effort,
+            jsonSchema,
+        }
+        : {
+            cwd: args.cwd,
+            model: config.planner.model,
+            effort: config.planner.effort,
+        };
+    const response = await plannerProvider.invoke(prompt, options);
+    const attemptRecord = {
+        mode: useStructured ? "structured" : "legacy",
+        provider: plannerProvider.name,
+        model: config.planner.model ?? null,
+        effort: config.planner.effort ?? null,
+        prompt_chars: promptChars,
+        prompt_lines: promptLines,
+        output_chars: response.ok ? response.output.length : null,
+        output_lines: response.ok ? response.output.split("\n").length : null,
+        duration_ms: response.duration ?? 0,
+        ok: false,
+        error_kind: "edit-retry",
+        error_exit_code: null,
     };
+    if (!response.ok) {
+        throw new Error(`edits retry: provider error (${response.error.kind}: ${response.error.exitCode})`);
+    }
+    // Parse the retry response — accept either a full edits revision (with
+    // empty responses) or just an `edits` array wrapped in the standard tags.
+    let edits;
+    try {
+        if (useStructured) {
+            const parsed = JSON.parse(response.output);
+            edits = extractEditsFromRetryPayload(parsed);
+        }
+        else {
+            const json = response.output.match(/<planpong-revision>([\s\S]*?)<\/planpong-revision>/i)?.[1] ??
+                response.output;
+            const parsed = JSON.parse(json);
+            edits = extractEditsFromRetryPayload(parsed);
+        }
+    }
+    catch (err) {
+        throw new Error(`edits retry: parse failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+    attemptRecord.ok = true;
+    return { edits, attemptRecord };
+}
+function extractEditsFromRetryPayload(payload) {
+    if (payload &&
+        typeof payload === "object" &&
+        "edits" in payload &&
+        Array.isArray(payload.edits)) {
+        return payload.edits;
+    }
+    if (Array.isArray(payload))
+        return payload;
+    throw new Error("retry payload missing `edits` array");
+}
+/**
+ * Heuristic response-edit consistency check.
+ *
+ * For each `accepted` response, look for at least one surviving edit in the
+ * response's `section`. If none exists, downgrade the response action to
+ * `deferred` with rationale prefixed `edit_not_applied: ...`. The plan
+ * acknowledges this is heuristic (no explicit issue↔edit ID mapping in the
+ * schema). False negatives are possible — an accepted response that didn't
+ * require a plan change (e.g., "this was already addressed") is incorrectly
+ * downgraded if no edit lands in its declared section. To reduce noise, we
+ * only downgrade when there's at least one unrecoverable failure — if every
+ * edit succeeded, the planner's accepts are taken at face value.
+ */
+function downgradeOrphanedResponses(revision, survivingEdits, unrecoverableFailures) {
+    if (unrecoverableFailures.length === 0)
+        return revision;
+    // Build a set of sections that have at least one surviving edit.
+    const editedSections = new Set(survivingEdits.map((e) => e.section.trim()));
+    const downgradedResponses = revision.responses.map((resp) => {
+        if (resp.action !== "accepted")
+            return resp;
+        // Section is not on IssueResponse; we have no per-issue section mapping
+        // (R3 F2 limitation). Without that, we treat ANY surviving-edit set as
+        // "the planner did some work" and only downgrade accepts when ALL edits
+        // failed — i.e., the plan didn't change at all. This is conservative
+        // but minimizes false-positive downgrades while still preventing the
+        // worst case ("everything accepted, no edits applied").
+        if (editedSections.size === 0) {
+            return {
+                ...resp,
+                action: "deferred",
+                rationale: `edit_not_applied: corresponding plan edit failed and could not be recovered. Original rationale: ${resp.rationale}`,
+            };
+        }
+        return resp;
+    });
+    return { ...revision, responses: downgradedResponses };
+}
+/**
+ * Re-persist the revision metrics file with augmented edit telemetry. The
+ * state machine has already written the basic metrics file in its finally
+ * block; this overwrites with the same data plus revision_mode + edit
+ * counts. Fail-open — telemetry write errors never propagate.
+ */
+function persistRevisionMetrics(args) {
+    const { cwd, session, round, metrics, telemetry } = args;
+    if (!metrics)
+        return;
+    try {
+        const augmented = {
+            ...metrics,
+            revision_mode: telemetry.revision_mode,
+            edits_attempted: telemetry.edits_attempted,
+            edits_applied: telemetry.edits_applied,
+            edits_failed: telemetry.edits_failed,
+            edits_retried: telemetry.edits_retried,
+            edits_recovered: telemetry.edits_recovered,
+            retry_invoked: telemetry.retry_invoked,
+        };
+        writeRoundMetrics(cwd, session.id, round, "revision", augmented);
+    }
+    catch {
+        // fail-open — telemetry never breaks the run
+    }
 }
 /**
  * Mark the session as approved and update the plan's status line.