npm - claude-overnight - Versions diffs - 1.25.42 → 1.25.43 - Mend

claude-overnight 1.25.42 → 1.25.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/planner-query.js +15 -0
package/dist/providers.js +5 -0
package/dist/run.js +150 -29
package/dist/state.d.ts +1 -1
package/dist/state.js +6 -2
package/dist/steering.d.ts +49 -0
package/dist/steering.js +114 -44
package/dist/transcripts.d.ts +1 -1
package/dist/transcripts.js +10 -2
package/dist/types.d.ts +2 -1
package/package.json +1 -1
package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1

package/dist/planner-query.js CHANGED Viewed

@@ -619,6 +619,21 @@ function extractOutermostBraces(text) {
     return null;
 }
 export function attemptJsonParse(text) {
+    // Strip conversational prefaces/suffixes that weak-schema models sometimes
+    // wrap around the JSON body (e.g. "Here is the JSON: { ... } Let me know…").
+    const preface = /^\s*(?:Here (?:is|are)[^{]*|Let me[^{]*|I'?ll[^{]*|Sure[^{]*|Okay[^{]*)/i;
+    const suffix = /\n\n(?:Let me know|Hope this|Please let me)[\s\S]*$/i;
+    if (preface.test(text) || suffix.test(text)) {
+        const cleaned = text.replace(preface, "").replace(suffix, "").trim();
+        if (cleaned && cleaned !== text) {
+            try {
+                const obj = JSON.parse(cleaned);
+                if (typeof obj === "object" && obj !== null)
+                    return obj;
+            }
+            catch { }
+        }
+    }
     try {
         const obj = JSON.parse(text);
         if (typeof obj === "object" && obj !== null)

package/dist/providers.js CHANGED Viewed

@@ -178,6 +178,11 @@ export function envFor(p) {
         base.ANTHROPIC_AUTH_TOKEN = key;
     }
     delete base.ANTHROPIC_API_KEY;
+    // Prevent CURSOR_API_KEY from leaking into non-proxy envs — would cause
+    // isCursorProxyEnv false-positive, silently rerouting through direct fetch
+    // which ignores outputFormat (no JSON schema enforcement).
+    delete base.CURSOR_API_KEY;
+    delete base.CURSOR_AUTH_TOKEN;
     return base;
 }
 /**

package/dist/run.js CHANGED Viewed

@@ -3,8 +3,8 @@ import { join } from "path";
 import { execSync } from "child_process";
 import chalk from "chalk";
 import { Swarm } from "./swarm.js";
-import { steerWave } from "./steering.js";
-import { getTotalPlannerCost, getPlannerRateLimitInfo, getPeakPlannerContext, runPlannerQuery, setPlannerEnvResolver } from "./planner-query.js";
+import { steerWave, STEER_SCHEMA } from "./steering.js";
+import { getTotalPlannerCost, getPlannerRateLimitInfo, getPeakPlannerContext, runPlannerQuery, setPlannerEnvResolver, attemptJsonParse } from "./planner-query.js";
 import { contextFillInfo } from "./render.js";
 import { getModelCapability } from "./models.js";
 import { buildEnvResolver, isCursorProxyProvider } from "./providers.js";
@@ -55,6 +55,8 @@ export async function executeRun(cfg) {
     let lastCapped = false, lastAborted = false, objectiveComplete = false;
     let lastEstimate;
     const branches = [];
+    let healFailStreak = 0; // consecutive waves where heal-0 agent changed 0 files
+    let zeroFileWaves = 0; // consecutive waves with 0 files across non-heal tasks
     if (cfg.resuming && cfg.resumeState) {
         const rs = cfg.resumeState;
         remaining = Math.max(1, rs.remaining);
@@ -295,8 +297,21 @@ export async function executeRun(cfg) {
     // Shared steering logic used by both resume-steering and in-loop steering
     const runSteering = async () => {
         let steered = false;
+        // ── B1: Skip steering when ≥2 unresolved merge-failed branches exist ──
+        const mergeFailedBranches = branches.filter(b => b.status === "merge-failed");
+        if (mergeFailedBranches.length >= 2) {
+            currentTasks = mergeFailedBranches.map((b, i) => ({
+                id: `branch-retry-${i}`,
+                prompt: `Your previous attempt at this task merge-failed against main. Redo it against the current state of main with minimal, focused edits. Original task:\n\n${b.taskPrompt}`,
+                model: workerModel,
+                postcondition: "pnpm run build",
+            }));
+            display.appendSteeringEvent(`Skipping steering — ${mergeFailedBranches.length} merge-failed branches form the wave`);
+            return true;
+        }
         let steerAttempts = 0;
-        while (!steered && remaining > 0 && !stopping && steerAttempts < 3) {
+        const MAX_STEER_ATTEMPTS = 2; // B2: retry threshold 3 → 2
+        while (!steered && remaining > 0 && !stopping && steerAttempts < MAX_STEER_ATTEMPTS) {
             steerAttempts++;
             const plannerCostBefore = getTotalPlannerCost();
             try {
@@ -350,23 +365,52 @@ export async function executeRun(cfg) {
             }
             catch (err) {
                 accCost += getTotalPlannerCost() - plannerCostBefore;
-                if (steerAttempts < 3) {
-                    display.appendSteeringEvent(`Steering failed (attempt ${steerAttempts}/3)  -- retrying...`);
+                const rawPreview = err?.message?.slice(0, 200) || "(no details)";
+                if (steerAttempts < MAX_STEER_ATTEMPTS) {
+                    display.appendSteeringEvent(`Steering failed (attempt ${steerAttempts}/${MAX_STEER_ATTEMPTS})  -- retrying... ${rawPreview}`);
                     continue;
                 }
-                display.appendSteeringEvent(`Steering failed ${steerAttempts}×  -- falling back`);
-                let fallbackStatus = "";
+                // ── B3: Decomposer fallback (replaces single-giant-fallback) ──
+                display.appendSteeringEvent(`Steering failed ${MAX_STEER_ATTEMPTS}×  — decomposer fallback`);
+                // First: try merge-failed recycling even if only 1 unresolved branch exists
+                const stillFailed = branches.filter(b => b.status === "merge-failed");
+                if (stillFailed.length >= 1) {
+                    currentTasks = stillFailed.map((b, i) => ({
+                        id: `branch-retry-${i}`,
+                        prompt: `Your previous attempt at this task merge-failed against main. Redo it against the current state of main with minimal, focused edits. Original task:\n\n${b.taskPrompt}`,
+                        model: workerModel,
+                        postcondition: "pnpm run build",
+                    }));
+                    display.appendSteeringEvent(`Decomposer: ${stillFailed.length} merge-failed branch(es) retried as swarm tasks`);
+                    steered = true;
+                    break;
+                }
+                // Second: minimal-prompt planner query
+                display.appendSteeringEvent("Decomposer: minimal planner query…");
                 try {
-                    fallbackStatus = readFileSync(join(runDir, "status.md"), "utf-8");
+                    let statusText = "";
+                    try {
+                        statusText = readFileSync(join(runDir, "status.md"), "utf-8");
+                    }
+                    catch { }
+                    const minimalPrompt = `${objective ? `Objective: ${objective}` : ""}\n\nStatus:\n${statusText || "(none)"}\n\nReturn tasks: string[] — 3-6 specific follow-ups. JSON only. {"tasks":[{"prompt":"..."}]}`;
+                    const minimalText = await runPlannerQuery(minimalPrompt, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA, transcriptName: "decomposer-minimal", maxTurns: 40 }, () => { });
+                    const parsed = attemptJsonParse(minimalText);
+                    if (parsed?.tasks?.length > 0) {
+                        currentTasks = parsed.tasks.map((t, i) => ({
+                            id: `decompose-${i}`,
+                            prompt: typeof t === "string" ? t : t.prompt,
+                            model: workerModel,
+                        }));
+                        display.appendSteeringEvent(`Decomposer: ${currentTasks.length} tasks from minimal planner`);
+                        steered = true;
+                        break;
+                    }
                 }
                 catch { }
-                currentTasks = [{
-                        id: "fallback-0",
-                        prompt: `Steering couldn't decide the next step. Read the project, assess what's done vs. remaining, and do the most impactful work.\n\nObjective: ${objective}${fallbackStatus ? `\n\nStatus:\n${fallbackStatus}` : ""}`,
-                        type: "execute",
-                    }];
-                steered = true;
-                break;
+                // Finally: halt
+                display.appendSteeringEvent(`Decomposer: no tasks produced — halting`);
+                return false;
             }
         }
         return steered;
@@ -389,12 +433,26 @@ export async function executeRun(cfg) {
             // Health check before each wave: a broken build poisons every subsequent
             // agent context, so prepend a heal task when detected. Steering-planned
             // tasks still run, just after the build is green again.
+            // Skip if prior heal changed 0 files (heal unable to fix).
             {
-                const healTask = checkProjectHealth(cwd);
-                if (healTask && remaining > 0) {
-                    const withoutDup = currentTasks.filter(t => t.id !== "heal-0");
-                    currentTasks = [healTask, ...withoutDup];
-                    display.appendSteeringEvent(`Health check: build broken — queued heal task`);
+                const healTasks = healFailStreak > 0 ? [] : checkProjectHealth(cwd);
+                if (healTasks.length > 0 && remaining > 0) {
+                    const healIds = healTasks.map(t => t.id);
+                    const withoutDup = currentTasks.filter(t => !healIds.includes(t.id));
+                    currentTasks = [...healTasks, ...withoutDup];
+                    display.appendSteeringEvent(`Health check: build broken — queued ${healTasks.length} heal task(s)`);
+                }
+                else if (healTasks.length === 0 && healFailStreak > 0 && checkProjectHealth(cwd).length > 0) {
+                    display.appendSteeringEvent(`Health check: build broken — heal skipped after ${healFailStreak} failed attempts, needs manual intervention`);
+                    try {
+                        const statusPath2 = join(runDir, "status.md");
+                        const existing2 = existsSync(statusPath2) ? readFileSync(statusPath2, "utf-8") : "";
+                        const marker = "## Heal blocked";
+                        if (!existing2.includes(marker)) {
+                            writeFileSync(statusPath2, `${existing2}${existing2 ? "\n\n" : ""}${marker}\nBuild has been broken for ${healFailStreak} waves, heal agents unable to fix — intervene manually.\n`, "utf-8");
+                        }
+                    }
+                    catch { }
                 }
             }
             if (currentTasks.length > remaining)
@@ -598,7 +656,7 @@ export async function executeRun(cfg) {
             liveConfig.remaining = remaining;
             lastCapped = swarm.cappedOut;
             lastAborted = swarm.aborted;
-            recordBranches(swarm.agents, swarm.mergeResults, branches);
+            recordBranches(swarm.agents, swarm.mergeResults, branches, waveNum);
             saveWaveSession(runDir, waveNum, swarm.agents, swarm.totalCostUsd);
             // Tasks that never made it into the swarm (queue cleared on abort/cap)
             // are preserved as currentTasks so resume picks them up. Budget for these
@@ -623,6 +681,34 @@ export async function executeRun(cfg) {
                     };
                 }),
             });
+            // Track heal fail streak: if a heal-0 task existed this wave and changed 0 files, increment.
+            // If any non-heal execute task changed files, reset.
+            const lastWave = waveHistory[waveHistory.length - 1];
+            const healTask = lastWave?.tasks.find(t => t.type === "heal");
+            if (healTask && !healTask.filesChanged) {
+                healFailStreak++;
+            }
+            else if (lastWave?.tasks.some(t => (t.type !== "heal") && (t.filesChanged ?? 0) > 0)) {
+                healFailStreak = 0;
+            }
+            // C1: Circuit breaker — halt after 2 consecutive waves with 0 files across non-heal tasks
+            const nonHealFiles = lastWave?.tasks.filter(t => t.type !== "heal").reduce((sum, t) => sum + (t.filesChanged ?? 0), 0) ?? 0;
+            if (nonHealFiles === 0 && waveNum > 0) {
+                zeroFileWaves++;
+                if (zeroFileWaves >= 2) {
+                    display.appendSteeringEvent(`Circuit breaker: 2 consecutive waves produced no merged changes — halting to prevent budget drain`);
+                    display.stop();
+                    saveRunState(runDir, buildRunState({ remaining, phase: "stopped", currentTasks: [] }));
+                    display.stop();
+                    restore();
+                    console.log(chalk.red(`\n  Circuit breaker: 2 consecutive waves produced no merged changes.`));
+                    console.log(chalk.red(`  Halting to prevent budget drain. Run preserved at ${runDir}.`));
+                    process.exit(3);
+                }
+            }
+            else {
+                zeroFileWaves = 0;
+            }
             // Hook-blocked work: agents that touched files but nothing landed on the
             // branch (pre-commit hooks, gitignore, writes outside worktree). Surface
             // as a wave-level warning so steering sees it, not just a per-agent log.
@@ -670,6 +756,20 @@ export async function executeRun(cfg) {
                 }
                 if (next !== existing)
                     writeFileSync(statusPath, next, "utf-8");
+                // GC ghost branches: delete merge-failed branches ≥2 waves old and mark discarded.
+                // Safe: their work never landed. The decomposer (Phase B) will re-attempt from saved taskPrompt.
+                const gcCandidates = branches.filter(b => b.status === "merge-failed" && b.firstFailedWave !== undefined && (waveNum - b.firstFailedWave) >= 2);
+                let gcCount = 0;
+                for (const b of gcCandidates) {
+                    try {
+                        execSync(`git branch -D "${b.branch}"`, { cwd, stdio: "ignore" });
+                    }
+                    catch { }
+                    b.status = "discarded";
+                    gcCount++;
+                }
+                if (gcCount > 0)
+                    display.appendSteeringEvent(`GC: discarded ${gcCount} ghost branch(es) ≥2 waves old`);
             }
             catch { }
             // Fire-and-forget debrief after each wave.
@@ -1039,24 +1139,45 @@ async function promptBudgetExtension(ctx) {
         return suggested;
     return n;
 }
+/** Detect build errors and return one or more heal tasks. If errors span ≥2 files,
+ *  emit one task per file so they heal in parallel without merge conflicts. */
 function checkProjectHealth(cwd) {
     const cmd = detectHealthCommand(cwd);
     if (!cmd)
-        return undefined;
+        return [];
     try {
         execSync(cmd, { cwd, encoding: "utf-8", stdio: "pipe", timeout: 60_000 });
-        return undefined;
+        return [];
     }
     catch (err) {
         if (err.killed)
-            return undefined;
+            return [];
         const output = ((err.stdout || "") + "\n" + (err.stderr || "")).trim();
         const trimmed = output.length > 4000 ? output.slice(0, 2000) + "\n…\n" + output.slice(-2000) : output;
-        return {
-            id: "heal-0",
-            prompt: `Fix the broken build. \`${cmd}\` fails after merging parallel work:\n\`\`\`\n${trimmed}\n\`\`\`\nFix every error. Run \`${cmd}\` when done to verify.`,
-            type: "heal",
-        };
+        // B4: Split heal by file — extract distinct source file paths from errors
+        const fileRe = /\/src\/[\w./-]+\.(ts|tsx|js|jsx)/g;
+        const files = new Set();
+        for (const m of trimmed.matchAll(fileRe))
+            files.add(m[0]);
+        if (files.size >= 2) {
+            // One task per file — each agent gets only that file's error context
+            const fileErrors = new Map();
+            for (const f of files) {
+                // Extract lines mentioning this file
+                const lines = trimmed.split("\n").filter(l => l.includes(f));
+                fileErrors.set(f, lines.slice(0, 30).join("\n"));
+            }
+            return Array.from(fileErrors.entries()).map(([file, errs], i) => ({
+                id: `heal-${i}`,
+                prompt: `Fix the broken build errors in \`${file}\`. \`${cmd}\` fails:\n\`\`\`\n${errs}\n\`\`\`\nFix every error in this file. Run \`${cmd}\` when done to verify.`,
+                type: "heal",
+            }));
+        }
+        return [{
+                id: "heal-0",
+                prompt: `Fix the broken build. \`${cmd}\` fails after merging parallel work:\n\`\`\`\n${trimmed}\n\`\`\`\nFix every error. Run \`${cmd}\` when done to verify.`,
+                type: "heal",
+            }];
     }
 }
 function detectHealthCommand(cwd) {

package/dist/state.d.ts CHANGED Viewed

@@ -72,6 +72,6 @@ export declare function recordBranches(agents: {
 }[], mergeResults: {
     branch: string;
     ok: boolean;
-}[], branches: BranchRecord[]): void;
+}[], branches: BranchRecord[], currentWave?: number): void;
 export declare function autoMergeBranches(cwd: string, branches: BranchRecord[], onLog: (msg: string) => void): void;
 export declare function archiveMilestone(baseDir: string, waveNum: number): void;

package/dist/state.js CHANGED Viewed

@@ -461,7 +461,7 @@ export function loadWaveHistory(runDir) {
     }
 }
 // ── Branch management ──
-export function recordBranches(agents, mergeResults, branches) {
+export function recordBranches(agents, mergeResults, branches, currentWave) {
     for (const a of agents) {
         if (a.branch) {
             branches.push({
@@ -475,8 +475,12 @@ export function recordBranches(agents, mergeResults, branches) {
     }
     for (const mr of mergeResults) {
         const br = branches.find(b => b.branch === mr.branch);
-        if (br)
+        if (br) {
             br.status = mr.ok ? "merged" : "merge-failed";
+            if (!mr.ok && !br.firstFailedWave && currentWave !== undefined) {
+                br.firstFailedWave = currentWave;
+            }
+        }
     }
 }
 export function autoMergeBranches(cwd, branches, onLog) {

package/dist/steering.d.ts CHANGED Viewed

@@ -1,3 +1,52 @@
 import type { PermMode, SteerResult, RunMemory, WaveSummary } from "./types.js";
 import { type PlannerLog } from "./planner-query.js";
+export declare const STEER_SCHEMA: {
+    type: "json_schema";
+    schema: {
+        type: string;
+        properties: {
+            done: {
+                type: string;
+            };
+            reasoning: {
+                type: string;
+            };
+            statusUpdate: {
+                type: string;
+            };
+            goalUpdate: {
+                type: string;
+            };
+            estimatedSessionsRemaining: {
+                type: string;
+            };
+            tasks: {
+                type: string;
+                items: {
+                    type: string;
+                    properties: {
+                        prompt: {
+                            type: string;
+                        };
+                        model: {
+                            type: string;
+                        };
+                        noWorktree: {
+                            type: string;
+                        };
+                        type: {
+                            type: string;
+                            enum: string[];
+                        };
+                        postcondition: {
+                            type: string;
+                        };
+                    };
+                    required: string[];
+                };
+            };
+        };
+        required: string[];
+    };
+};
 export declare function steerWave(objective: string, history: WaveSummary[], remainingBudget: number, cwd: string, plannerModel: string, workerModel: string, fastModel: string | undefined, permissionMode: PermMode, concurrency: number, onLog: PlannerLog, runMemory?: RunMemory, transcriptName?: string): Promise<SteerResult>;

package/dist/steering.js CHANGED Viewed

@@ -2,7 +2,10 @@ import { runPlannerQuery, attemptJsonParse, postProcess } from "./planner-query.
 import { contextConstraintNote } from "./models.js";
 import { DESIGN_THINKING } from "./planner.js";
 import { createTurn, beginTurn, endTurn } from "./turns.js";
-const STEER_SCHEMA = {
+import { writeFileSync, mkdirSync } from "fs";
+import { join } from "path";
+import { getTranscriptRunDir } from "./transcripts.js";
+export const STEER_SCHEMA = {
     type: "json_schema",
     schema: {
         type: "object",
@@ -24,10 +27,11 @@ const STEER_SCHEMA = {
         required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
     },
 };
-export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, fastModel, permissionMode, concurrency, onLog, runMemory, transcriptName = "steer") {
-    const constraint = contextConstraintNote(workerModel);
-    const recentWaves = history.slice(-3);
-    const recentText = recentWaves.length > 0 ? recentWaves.map(w => {
+const PROMPT_BUDGET = 6000;
+/** Build a compact wave summary; keepLast controls how many recent waves to include. */
+function buildRecentText(history, keepLast) {
+    const recentWaves = history.slice(-keepLast);
+    return recentWaves.length > 0 ? recentWaves.map(w => {
         const lines = w.tasks.map(t => {
             const isExecute = !t.type || t.type === "execute";
             const files = t.filesChanged ? ` (${t.filesChanged} files)` : isExecute ? " (0 files)" : " (read-only)";
@@ -39,16 +43,25 @@ export async function steerWave(objective, history, remainingBudget, cwd, planne
         const warn = totalExecute > 0 && zeroExecute > totalExecute / 2 ? `\n  ⚠ ${zeroExecute}/${totalExecute} execute tasks changed 0 files  -- tasks may be mis-scoped or blocked` : "";
         return `Wave ${w.wave + 1}:\n${lines}${warn}`;
     }).join("\n\n") : "(first wave)";
+}
+export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, fastModel, permissionMode, concurrency, onLog, runMemory, transcriptName = "steer") {
+    const constraint = contextConstraintNote(workerModel);
     const cap = (s, max) => s.length > max ? s.slice(0, max) + "\n...(truncated)" : s;
     const statusBlock = runMemory?.status ? `\nCurrent project status:\n${runMemory.status}\n` : "";
-    const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 4000)}\n` : "";
-    const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 4000)}\n` : "";
-    const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 3000)}\n` : "";
-    const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 3000)}\n` : "";
+    const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 2000)}\n` : "";
+    const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 1500)}\n` : "";
+    const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 1000)}\n` : "";
+    const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 1000)}\n` : "";
     const goalBlock = runMemory?.goal ? `\nNorth star  -- what "amazing" means:\n${runMemory.goal}\n` : "";
-    const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 3000)}\n` : "";
+    const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 800)}\n` : "";
     const guidanceBlock = runMemory?.userGuidance ? `\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\nUSER DIRECTIVES  -- highest priority\nThese come directly from the user running this session. They override prior assumptions about status, goal, and next steps. Incorporate them into the wave you compose below. If they conflict with earlier decisions, the user wins. Reflect the new direction in statusUpdate so future waves remember.\n\n${cap(runMemory.userGuidance, 4000)}\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n` : "";
-    const prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
+    // Collapse archetype menu after wave 3 to save ~2 KB
+    const archetypesShort = `Archetypes: execute | explore | critique | synthesize | verify | user-test | polish | simplify`;
+    const archetypeBlock = history.length >= 3
+        ? archetypesShort
+        : null;
+    let recentText = buildRecentText(history, 3);
+    let prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
 ${guidanceBlock}
 Objective: ${objective}
 ${goalBlock}${statusBlock}${milestoneBlock}${prevRunBlock}
@@ -66,7 +79,7 @@ If verification found issues, those are the priority. Fix what's broken before b
 ## Compose the next wave
-You have full creative freedom. Design the wave that will have the highest impact right now. Here are archetypes to draw from  -- mix, adapt, or invent your own:
+You have full creative freedom. Design the wave that will have the highest impact right now.${archetypeBlock ? `\n\nUse these archetypes as shorthand — mix, adapt, or invent your own:\n\n${archetypeBlock}` : ` Here are archetypes to draw from  -- mix, adapt, or invent your own:
 **Execute**  -- Agents implement concrete changes in parallel. Each touches different files. The bread and butter.
   Example: 5 agents each owning a different feature or fix
@@ -90,52 +103,86 @@ You have full creative freedom. Design the wave that will have the highest impac
   Example: 2 agents, one on happy paths, one on error/edge states
 **Simplify**  -- Invoke the 'simplify' skill. It reviews changed code and spawns parallel sub-agents for thorough review.
-  Example: 1 agent per wave with task type "review", let the skill handle the rest
-You can combine these. A wave can have 3 execute agents + 1 verification agent. Or 2 divergent explorers. Whatever the situation calls for.
+  Example: 1 agent per wave with task type "review", let the skill handle the rest`}
-For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-N-{topic}.md or .claude-overnight/latest/verifications/wave-N-{topic}.md.
+For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-n-{topic}.md or .claude-overnight/latest/verifications/wave-n-{topic}.md.
 IMPORTANT: You cannot declare "done" unless at least one verification has confirmed the app works. If you're considering done but haven't verified, compose a verification task first.
 Respond with ONLY a JSON object (no markdown fences):
-{
-  "done": false,
-  "reasoning": "your assessment and why you chose this wave composition",
-  "goalUpdate": "optional  -- refine what 'amazing' means as you learn more",
-  "statusUpdate": "REQUIRED  -- concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
-  "estimatedSessionsRemaining": 15,
-  "tasks": [
-    {"prompt": "task instruction...", "model": "worker", "postcondition": "test -f src/new-file.ts"},
-    {"prompt": "quick icon fix, verified by next wave's workers...", "model": "fast"},
-    {"prompt": "verify the app end-to-end...", "model": "worker", "noWorktree": true}
-  ]
-}
+{"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"tasks":[{"prompt":"...","model":"worker|fast","noWorktree":true/false,"postcondition":"..."}]}
 "estimatedSessionsRemaining" is REQUIRED. Your best honest estimate of how many MORE agent sessions (beyond the wave you just composed above) are needed to reach 'amazing'  -- include follow-up fixes, polish, verification, and anything else you'd want before shipping. Be realistic, not optimistic. Use 0 only if truly done.
-The "model" field on each task — you have **two kinds of workers**, both first-class. Pick the right one per task:
+The "model" field on each task — two kinds of workers. Pick the right one:
-**Fast worker — "fast" (${fastModel ?? "not set"})** is the default workhorse for well-scoped, mechanical tasks. It's a real worker, same tools, same environment — just a cheaper, faster model. The next wave's workers (fast or main) will catch and fix any issues. Route here by default when any of these apply:
-- Single-file edits, refactors, renames
-- Surgical multi-line changes with a clear spec (add a param, wrap a call, tweak a prompt line)
-- Read/research: scan files, summarize findings
-- Build checks, postcondition verification
-- E2E test runs with concrete steps
-- Simple critiques, polish tweaks
-- Running existing scripts/tests and capturing output
-- Docs / markdown updates
-- Stdlib-only utility scripts with a crisp spec
+**Fast worker — "fast" (${fastModel ?? "not set"})** for well-scoped, mechanical tasks: single-file edits, refactors, renames, read/research, build checks, simple critiques, docs updates.
-**Main worker — "worker" (${workerModel})** is for tasks that genuinely need deeper reasoning: multi-file features, complex logic, architectural changes, ambiguous specs, anything where a mis-step costs more than a wave to recover from.
+**Main worker — "worker" (${workerModel})** for tasks that need deeper reasoning: multi-file features, complex logic, architectural changes, ambiguous specs.
-When in doubt, pick "fast". Both are workers; the wave loop iterates. Over-using "worker" is a real cost — aim to route the clear majority of well-scoped tasks to the fast worker whenever a fast worker is configured.
+When in doubt, pick "fast".
-Set "noWorktree": true for verify/user-test tasks -- they need the real project directory with env files, dependencies, and local config.
+Set "noWorktree": true for verify/user-test tasks.
-OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. The framework runs it after merge; if it fails, the agent's "no-op" claim is rejected and the task is retried with the failure output as context. Use it whenever the task has a concrete, machine-checkable outcome. Examples: \`test -f src/tracking/watchlist-poller.ts && grep -q "runWatchlistPoll" src/tracking/watchlist-poller.ts\`, \`grep -q "watchlistPollerTask" src/scraper/scheduler.ts\`, \`pnpm run build\`, \`diff -q src/public/index.html frontend/dist/index.html\`. Keep it cheap (sub-second, no network). Omit for exploratory/research tasks where there is no crisp check.
+OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. Keep it cheap. Omit for exploratory tasks.
-If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSessionsRemaining": 0, "tasks": []}`;
+If done: {"done":true,"reasoning":"...","statusUpdate":"...","estimatedSessionsRemaining":0,"tasks":[]}`;
+    // ── Hard 6 KB budget: trim non-critical blocks if over limit ──
+    let trimmed = 0;
+    if (prompt.length > PROMPT_BUDGET) {
+        // 1. Keep last 2 waves instead of 3
+        recentText = buildRecentText(history, 2);
+        prompt = prompt.replace(`Recent waves:\n${buildRecentText(history, 3)}`, `Recent waves:\n${recentText}`);
+        trimmed++;
+    }
+    if (prompt.length > PROMPT_BUDGET && runMemory?.milestones) {
+        const old = `\nMilestone snapshots:\n${cap(runMemory.milestones, 2000)}\n`;
+        const neu = `\nMilestone snapshots:\n${cap(runMemory.milestones, 1000)}\n`;
+        if (old !== neu) {
+            prompt = prompt.replace(old, neu);
+            trimmed++;
+        }
+    }
+    if (prompt.length > PROMPT_BUDGET && runMemory?.designs) {
+        const old = `\nArchitectural research:\n${cap(runMemory.designs, 1500)}\n`;
+        const neu = `\nArchitectural research:\n${cap(runMemory.designs, 1000)}\n`;
+        if (old !== neu) {
+            prompt = prompt.replace(old, neu);
+            trimmed++;
+        }
+    }
+    if (prompt.length > PROMPT_BUDGET && runMemory?.reflections) {
+        const old = `\nLatest quality reports:\n${cap(runMemory.reflections, 1000)}\n`;
+        const neu = `\nLatest quality reports:\n${cap(runMemory.reflections, 500)}\n`;
+        if (old !== neu) {
+            prompt = prompt.replace(old, neu);
+            trimmed++;
+        }
+    }
+    if (prompt.length > PROMPT_BUDGET && runMemory?.verifications) {
+        const old = `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 1000)}\n`;
+        const neu = `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 500)}\n`;
+        if (old !== neu) {
+            prompt = prompt.replace(old, neu);
+            trimmed++;
+        }
+    }
+    if (prompt.length > PROMPT_BUDGET && runMemory?.previousRuns) {
+        const old = `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 800)}\n`;
+        const neu = `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 400)}\n`;
+        if (old !== neu) {
+            prompt = prompt.replace(old, neu);
+            trimmed++;
+        }
+    }
+    if (trimmed > 0) {
+        onLog(`Steering prompt trimmed ${trimmed} blocks (${prompt.length}/${PROMPT_BUDGET} chars)`, "event");
+    }
+    // ── Non-Claude planner JSON hardening ──
+    if (!/^claude/i.test(plannerModel)) {
+        const directive = `OUTPUT: single JSON object. No prose. No markdown fences.`;
+        prompt = `${directive}\n\n${prompt}\n\n${directive}`;
+    }
     onLog("Assessing...", "status");
     onLog(`Reading codebase  -- wave ${history.length + 1}`, "event");
     const turn = createTurn("steer", `Steer wave ${history.length + 1}`, `steer-${history.length}`, plannerModel);
@@ -146,11 +193,34 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSes
         if (first)
             return first;
         onLog(`Steering parse failed (${resultText.length} chars). Asking model to fix...`, "event");
+        // C2: persist raw output on parse failure
+        const steerDir = getTranscriptRunDir() ? join(getTranscriptRunDir(), "steering") : undefined;
+        if (steerDir) {
+            try {
+                mkdirSync(steerDir, { recursive: true });
+            }
+            catch { }
+            // Extract wave info from transcriptName (e.g. "steer-wave-32-attempt-1")
+            const waveMatch = transcriptName.match(/wave-(\d+)-attempt-(\d+)/);
+            if (waveMatch) {
+                writeFileSync(join(steerDir, `wave-${waveMatch[1]}-attempt-${waveMatch[2]}-raw.txt`), resultText, "utf-8");
+            }
+        }
         const snippet = resultText.length > 2000 ? resultText.slice(0, 1000) + "\n...\n" + resultText.slice(-800) : resultText;
         const retryText = await runPlannerQuery(`Your previous steering response could not be parsed as JSON. Here is what you returned:\n\n---\n${snippet}\n---\n\nExtract or rewrite the above as ONLY a valid JSON object with this schema: {"done":boolean,"reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}\n\nRespond with ONLY the JSON, no markdown fences, no explanation.`, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA, transcriptName: `${transcriptName}-retry`, turnId: turn.id }, onLog);
         const retryParsed = attemptJsonParse(retryText);
         if (retryParsed)
             return retryParsed;
+        // C2: persist retry raw output
+        if (steerDir) {
+            try {
+                const waveMatch2 = transcriptName.match(/wave-(\d+)-attempt-(\d+)/);
+                if (waveMatch2) {
+                    writeFileSync(join(steerDir, `wave-${waveMatch2[1]}-attempt-${waveMatch2[2]}-retry-raw.txt`), retryText, "utf-8");
+                }
+            }
+            catch { }
+        }
         throw new Error(`Could not parse steering response after retry (${resultText.length} chars: ${resultText.slice(0, 120)}...)`);
     })();
     const isDone = parsed.done === true;

package/dist/transcripts.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 export declare function setTranscriptRunDir(dir: string | undefined): void;
 export declare function getTranscriptRunDir(): string | undefined;
 export declare function transcriptPath(name: string): string | undefined;
-/** Append a single event; silent on error (disk full, permission, etc.). */
+/** Append a single event; log to stderr once per name on failure (C5). */
 export declare function writeTranscriptEvent(name: string, event: Record<string, unknown>): void;

package/dist/transcripts.js CHANGED Viewed

@@ -25,7 +25,9 @@ export function getTranscriptRunDir() {
 export function transcriptPath(name) {
     return _runDir ? join(_runDir, "transcripts", `${name}.ndjson`) : undefined;
 }
-/** Append a single event; silent on error (disk full, permission, etc.). */
+/** Names that already errored — guard against repeated stderr spam. */
+const _seenErrors = new Set();
+/** Append a single event; log to stderr once per name on failure (C5). */
 export function writeTranscriptEvent(name, event) {
     const path = transcriptPath(name);
     if (!path)
@@ -34,5 +36,11 @@ export function writeTranscriptEvent(name, event) {
         mkdirSync(dirname(path), { recursive: true });
         appendFileSync(path, JSON.stringify({ t: Date.now(), ...event }) + "\n", "utf-8");
     }
-    catch { }
+    catch (err) {
+        if (!_seenErrors.has(name)) {
+            _seenErrors.add(name);
+            const msg = err instanceof Error ? err.message : String(err);
+            process.stderr.write(`[transcript] writeTranscriptEvent("${name}") failed: ${msg}\n`);
+        }
+    }
 }

package/dist/types.d.ts CHANGED Viewed

@@ -156,9 +156,10 @@ export type MergeStrategy = "yolo" | "branch";
 export interface BranchRecord {
     branch: string;
     taskPrompt: string;
-    status: "merged" | "unmerged" | "failed" | "merge-failed";
+    status: "merged" | "unmerged" | "failed" | "merge-failed" | "discarded";
     filesChanged: number;
     costUsd: number;
+    firstFailedWave?: number;
 }
 /** Per-window rate limit snapshot (matches SDK rateLimitType). */
 export interface RateLimitWindow {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.25.42",
+  "version": "1.25.43",
   "description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
   "type": "module",
   "bin": {

package/plugins/claude-overnight/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.25.42",
+  "version": "1.25.43",
   "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs  -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
   "author": {
     "name": "Francesco Fornace"