npm - claude-overnight - Versions diffs - 1.4.0 → 1.6.0 - Mend

claude-overnight 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md CHANGED Viewed

@@ -49,6 +49,7 @@ claude-overnight
 ◆ Thinking: 5 agents exploring...         ← architects analyze your codebase
 ◆ Orchestrating plan...                   ← synthesizes 50 concrete tasks
 ◆ Wave 1 · 50 tasks · $4.20 spent        ← fully autonomous from here
+  ↑ 1.2M in  ↓ 340K out  $4.20 / $4.24 total
 ◆ Assessing... how close to amazing?
 ◆ Wave 2 · 30 tasks · $18.50 spent       ← improvements from assessment
 ◆ Reflection: 2 agents reviewing          ← deep quality audit
@@ -223,6 +224,8 @@ Changes take effect between waves — active agents finish their current task.
 The usage bar cycles through all rate limit windows (5h, 7d, etc.) every 3 seconds, showing utilization per window. Usage info is shown during all phases — thinking, orchestration, steering, and execution.
+When using extra usage with a budget, a dedicated progress bar shows spend vs limit with color-coded fill (magenta → yellow → red).
 ## Rate limits
 Built for unattended runs lasting hours or days.

package/dist/index.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { createInterface } from "readline";
 import chalk from "chalk";
 import { query } from "@anthropic-ai/claude-agent-sdk";
 import { Swarm } from "./swarm.js";
-import { planTasks, refinePlan, detectModelTier, steerWave, identifyThemes, buildThinkingTasks, buildReflectionTasks, orchestrate } from "./planner.js";
+import { planTasks, refinePlan, detectModelTier, steerWave, identifyThemes, buildThinkingTasks, orchestrate } from "./planner.js";
 import { startRenderLoop, renderSummary } from "./ui.js";
 // ── CLI flag parsing ──
 function parseCliFlags(argv) {
@@ -295,6 +295,7 @@ function readRunMemory(runDir, previousRuns) {
     return {
         designs: readMdDir(join(runDir, "designs")),
         reflections: readMdDir(join(runDir, "reflections")),
+        verifications: readMdDir(join(runDir, "verifications")),
         milestones: readMdDir(join(runDir, "milestones")),
         status,
         goal,
@@ -385,6 +386,7 @@ function createRunDir(rootDir) {
     const runDir = join(rootDir, "runs", ts);
     mkdirSync(join(runDir, "designs"), { recursive: true });
     mkdirSync(join(runDir, "reflections"), { recursive: true });
+    mkdirSync(join(runDir, "verifications"), { recursive: true });
     mkdirSync(join(runDir, "milestones"), { recursive: true });
     mkdirSync(join(runDir, "sessions"), { recursive: true });
     return runDir;
@@ -408,6 +410,35 @@ function saveWaveSession(baseDir, waveNum, kind, swarm) {
         totalCost: swarm.totalCostUsd,
     }, null, 2), "utf-8");
 }
+/** Rebuild waveHistory from saved session files on resume. */
+function loadWaveHistory(runDir) {
+    const dir = join(runDir, "sessions");
+    try {
+        return readdirSync(dir)
+            .filter(f => f.startsWith("wave-") && f.endsWith(".json"))
+            .sort((a, b) => {
+            const numA = parseInt(a.replace("wave-", "").replace(".json", ""));
+            const numB = parseInt(b.replace("wave-", "").replace(".json", ""));
+            return numA - numB;
+        })
+            .map(f => {
+            const data = JSON.parse(readFileSync(join(dir, f), "utf-8"));
+            return {
+                wave: data.wave,
+                kind: data.kind,
+                tasks: (data.agents || []).map((a) => ({
+                    prompt: a.prompt,
+                    status: a.status,
+                    filesChanged: a.filesChanged,
+                    error: a.error,
+                })),
+            };
+        });
+    }
+    catch {
+        return [];
+    }
+}
 function recordBranches(swarm, branches) {
     for (const a of swarm.agents) {
         if (a.branch) {
@@ -669,6 +700,11 @@ async function main() {
             if (unmerged > 0) {
                 console.log("");
                 autoMergeBranches(cwd, prev.branches, (msg) => console.log(chalk.dim(`  ${msg}`)));
+                // Persist merged branch statuses immediately so they survive a crash before next saveRunState
+                try {
+                    saveRunState(incomplete.dir, prev);
+                }
+                catch { }
             }
         }
     }
@@ -681,7 +717,18 @@ async function main() {
     let usageCap;
     let allowExtraUsage = false;
     let extraUsageBudget;
-    if (!nonInteractive) {
+    if (resuming) {
+        // Skip interactive flow entirely — all config is restored from saved state later
+        workerModel = resumeState.workerModel;
+        plannerModel = resumeState.plannerModel;
+        budget = resumeState.budget;
+        concurrency = resumeState.concurrency;
+        objective = resumeState.objective;
+        usageCap = resumeState.usageCap;
+        allowExtraUsage = resumeState.allowExtraUsage ?? false;
+        extraUsageBudget = resumeState.extraUsageBudget;
+    }
+    else if (!nonInteractive) {
         // ① Objective
         while (true) {
             objective = await ask(`\n  ${chalk.cyan("①")} ${chalk.bold("What should the agents do?")}\n  ${chalk.cyan(">")} `);
@@ -817,11 +864,11 @@ async function main() {
         }
     }
     validateConcurrency(concurrency);
-    const permissionMode = fileCfg?.permissionMode ?? "auto";
-    const useWorktrees = fileCfg?.useWorktrees ?? (isGitRepo(cwd));
+    let permissionMode = resuming ? resumeState.permissionMode : (fileCfg?.permissionMode ?? "auto");
+    let useWorktrees = resuming ? resumeState.useWorktrees : (fileCfg?.useWorktrees ?? isGitRepo(cwd));
     if (useWorktrees)
         validateGitRepo(cwd);
-    const mergeStrategy = fileCfg?.mergeStrategy ?? "yolo";
+    let mergeStrategy = resuming ? resumeState.mergeStrategy : (fileCfg?.mergeStrategy ?? "yolo");
     if (nonInteractive) {
         const capStr = usageCap != null ? `  cap=${Math.round(usageCap * 100)}%` : "";
         const extraStr = allowExtraUsage ? (extraUsageBudget ? `  extra=$${extraUsageBudget}` : "  extra=∞") : "  extra=off";
@@ -838,7 +885,7 @@ async function main() {
     const runDir = resuming && resumeRunDir ? resumeRunDir : (orphanedDir ?? createRunDir(rootDir));
     const previousKnowledge = readPreviousRunKnowledge(rootDir);
     // ── Plan phase (interactive: review loop, non-interactive: auto-plan or skip) ──
-    const needsPlan = tasks.length === 0;
+    const needsPlan = tasks.length === 0 && !resuming;
     const designDir = join(runDir, "designs");
     if (needsPlan) {
         if (noTTY) {
@@ -1052,7 +1099,7 @@ async function main() {
             process.exit(1);
         }
     }
-    if (tasks.length === 0) {
+    if (tasks.length === 0 && !resuming) {
         console.error("No tasks provided.");
         process.exit(1);
     }
@@ -1064,7 +1111,7 @@ async function main() {
     // ── Run (wave loop) ──
     process.stdout.write("\x1B[?25l");
     const restore = () => process.stdout.write("\x1B[?25h\n");
-    const runStartedAt = Date.now();
+    const runStartedAt = resuming && resumeState?.startedAt ? new Date(resumeState.startedAt).getTime() : Date.now();
     // Wave-loop state — either fresh or resumed
     mkdirSync(join(runDir, "reflections"), { recursive: true });
     mkdirSync(join(runDir, "milestones"), { recursive: true });
@@ -1079,7 +1126,7 @@ async function main() {
     let accIn = 0, accOut = 0;
     let lastCapped = false, lastAborted = false, objectiveComplete = false;
     let lastWaveKind;
-    let reflectionBudgetUsed;
+    let overheadBudgetUsed;
     const branches = [];
     if (resuming && resumeState) {
         // Restore ALL config from saved state
@@ -1091,7 +1138,7 @@ async function main() {
         accFailed = resumeState.accFailed;
         accTools = 0;
         lastWaveKind = resumeState.lastWaveKind;
-        reflectionBudgetUsed = resumeState.reflectionBudgetUsed;
+        overheadBudgetUsed = resumeState.overheadBudgetUsed ?? (resumeState.reflectionBudgetUsed ?? 0) + (resumeState.verificationBudgetUsed ?? 0);
         branches.push(...resumeState.branches);
         objective = resumeState.objective;
         workerModel = resumeState.workerModel;
@@ -1102,7 +1149,12 @@ async function main() {
         usageCap = resumeState.usageCap;
         allowExtraUsage = resumeState.allowExtraUsage ?? false;
         extraUsageBudget = resumeState.extraUsageBudget;
-        console.log(chalk.green(`\n  ✓ Resumed`) + chalk.dim(` · wave ${waveNum + 1} · ${remaining} remaining · $${accCost.toFixed(2)} spent\n`));
+        permissionMode = resumeState.permissionMode;
+        useWorktrees = resumeState.useWorktrees;
+        mergeStrategy = resumeState.mergeStrategy;
+        // Restore wave history from saved session files so steerer has full context
+        waveHistory.push(...loadWaveHistory(runDir));
+        console.log(chalk.green(`\n  ✓ Resumed`) + chalk.dim(` · wave ${waveNum + 1} · ${remaining} remaining · $${accCost.toFixed(2)} spent · ${waveHistory.length} prior waves\n`));
     }
     else {
         // Fresh run
@@ -1121,15 +1173,15 @@ async function main() {
         accIn = thinkingIn;
         accOut = thinkingOut;
         lastWaveKind = "execute";
-        reflectionBudgetUsed = 0;
+        overheadBudgetUsed = 0;
     }
     liveConfig.remaining = remaining;
     liveConfig.usageCap = usageCap;
-    const maxReflectionBudget = Math.max(2, Math.ceil((budget ?? 10) * 0.05));
+    const maxOverheadBudget = Math.max(4, Math.ceil((budget ?? 10) * 0.15));
     // For flex + branch strategy: create one target branch, waves merge via yolo into it
     let runBranch;
     let originalRef;
-    if (flex && mergeStrategy === "branch" && useWorktrees) {
+    if (flex && mergeStrategy === "branch" && useWorktrees && !resuming) {
         try {
             originalRef = execSync("git rev-parse --abbrev-ref HEAD", { cwd, encoding: "utf-8", stdio: "pipe" }).trim();
             if (originalRef === "HEAD")
@@ -1158,6 +1210,38 @@ async function main() {
     process.on("SIGTERM", () => gracefulStop("SIGTERM"));
     process.on("uncaughtException", (err) => { currentSwarm?.abort(); currentSwarm?.cleanup(); restore(); console.error(chalk.red(`\n  Uncaught: ${err.message}`)); process.exit(1); });
     process.on("unhandledRejection", (reason) => { currentSwarm?.abort(); currentSwarm?.cleanup(); restore(); console.error(chalk.red(`\n  Unhandled: ${reason instanceof Error ? reason.message : reason}`)); process.exit(1); });
+    // When resuming a flex run with no queued tasks, steer immediately to get the next wave
+    if (resuming && flex && currentTasks.length === 0 && remaining > 0) {
+        console.log(chalk.cyan(`\n  ◆ Assessing...\n`));
+        process.stdout.write("\x1B[?25l");
+        try {
+            const memory = readRunMemory(runDir, previousKnowledge || undefined);
+            const steer = await steerWave(objective, waveHistory, remaining, cwd, plannerModel, workerModel, permissionMode, concurrency, makeProgressLog(), memory);
+            process.stdout.write(`\x1B[2K\r`);
+            process.stdout.write("\x1B[?25h");
+            if (steer.statusUpdate)
+                writeStatus(runDir, steer.statusUpdate);
+            if (steer.goalUpdate)
+                writeGoalUpdate(runDir, steer.goalUpdate);
+            if (!steer.done && steer.tasks.length > 0) {
+                console.log(chalk.dim(`  ${steer.reasoning}\n`));
+                currentTasks = steer.tasks.map(t => ({
+                    ...t,
+                    model: t.model === "planner" ? plannerModel : t.model === "worker" ? workerModel : t.model,
+                }));
+                lastWaveKind = steer.waveKind;
+            }
+            else if (steer.done) {
+                console.log(chalk.green(`  \u2713 ${steer.reasoning}\n`));
+                objectiveComplete = true;
+                remaining = 0;
+            }
+        }
+        catch (err) {
+            process.stdout.write("\x1B[?25h");
+            console.log(chalk.yellow(`  Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
+        }
+    }
     while (remaining > 0 && currentTasks.length > 0 && !stopping) {
         if (currentTasks.length > remaining)
             currentTasks = currentTasks.slice(0, remaining);
@@ -1168,6 +1252,7 @@ async function main() {
         const swarm = new Swarm({
             tasks: currentTasks, concurrency, cwd, model: workerModel, permissionMode, allowedTools,
             useWorktrees, mergeStrategy: waveMerge, agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
+            baseCostUsd: accCost,
         });
         currentSwarm = swarm;
         const stopRender = startRenderLoop(swarm, liveConfig);
@@ -1209,8 +1294,8 @@ async function main() {
         saveRunState(runDir, {
             id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective, budget: budget ?? tasks.length,
             remaining, workerModel, plannerModel, concurrency, permissionMode,
-            usageCap, allowExtraUsage, extraUsageBudget, flex, useWorktrees, mergeStrategy, waveNum, currentTasks,
-            lastWaveKind, reflectionBudgetUsed, accCost, accCompleted, accFailed,
+            usageCap, allowExtraUsage, extraUsageBudget, flex, useWorktrees, mergeStrategy, waveNum, currentTasks: [],
+            lastWaveKind, overheadBudgetUsed, accCost, accCompleted, accFailed,
             branches, phase: "steering", startedAt: new Date(runStartedAt).toISOString(), cwd,
         });
         waveHistory.push({
@@ -1225,11 +1310,10 @@ async function main() {
         });
         if (!flex || remaining <= 0 || swarm.aborted || swarm.cappedOut)
             break;
-        // ── Steer: assess quality and decide next action ──
-        // May loop through reflect→re-steer cycles before producing execution tasks
-        let steerDone = false;
+        // ── Steer: assess and compose the next wave ──
+        let steered = false;
         let steerAttempts = 0;
-        while (!steerDone && remaining > 0 && !stopping && steerAttempts < 4) {
+        while (!steered && remaining > 0 && !stopping && steerAttempts < 3) {
             steerAttempts++;
             console.log(chalk.cyan(`\n  ◆ Assessing...\n`));
             process.stdout.write("\x1B[?25l");
@@ -1238,86 +1322,51 @@ async function main() {
                 const steer = await steerWave(objective, waveHistory, remaining, cwd, plannerModel, workerModel, permissionMode, concurrency, makeProgressLog(), memory);
                 process.stdout.write(`\x1B[2K\r`);
                 process.stdout.write("\x1B[?25h");
-                // Persist context layers
                 if (steer.statusUpdate)
                     writeStatus(runDir, steer.statusUpdate);
                 if (steer.goalUpdate) {
                     writeGoalUpdate(runDir, steer.goalUpdate);
                     console.log(chalk.dim(`  Goal refined: ${steer.goalUpdate.slice(0, 100)}\n`));
                 }
-                // Archive milestone every ~5 execution waves
                 const execWaves = waveHistory.filter(w => w.kind === "execute").length;
                 if (execWaves > 0 && execWaves % 5 === 0)
                     archiveMilestone(runDir, waveNum);
-                if (steer.done || steer.action === "done") {
-                    console.log(chalk.green(`  \u2713 ${steer.reasoning}\n`));
-                    steerDone = true;
-                    objectiveComplete = true;
-                    remaining = 0; // exit outer loop too
-                    break;
-                }
-                if (steer.action === "reflect") {
-                    // Safety: no consecutive reflections, budget cap
-                    const canReflect = lastWaveKind !== "reflect" && reflectionBudgetUsed + 2 <= maxReflectionBudget;
-                    if (!canReflect) {
+                if (steer.done || steer.tasks.length === 0) {
+                    const hasVerification = waveHistory.some(w => w.kind.includes("verif"));
+                    if (!hasVerification && remaining >= 1) {
                         console.log(chalk.dim(`  ${steer.reasoning}`));
-                        console.log(chalk.yellow(`  Reflection skipped (${lastWaveKind === "reflect" ? "consecutive" : "budget cap"}) — re-assessing\n`));
-                        lastWaveKind = "execute"; // allow next steer to see non-reflect
-                        continue; // re-steer in this inner loop
+                        console.log(chalk.yellow(`  Done blocked — verification required before completion\n`));
+                        lastWaveKind = "done-blocked";
+                        continue; // re-steer — steerer will see the hint
                     }
-                    // Run reflection wave
-                    console.log(chalk.dim(`  ${steer.reasoning}`));
-                    console.log(chalk.cyan(`\n  ◆ Reflection: 2 agents reviewing...\n`));
-                    const reflectionDir = join(runDir, "reflections");
-                    waveNum++;
-                    const reflTasks = buildReflectionTasks(objective, memory.goal, reflectionDir, waveNum, plannerModel);
-                    const reflSwarm = new Swarm({
-                        tasks: reflTasks, concurrency: 2, cwd,
-                        model: plannerModel, permissionMode,
-                        useWorktrees: false, mergeStrategy: "yolo",
-                        agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
-                    });
-                    currentSwarm = reflSwarm;
-                    const stopReflRender = startRenderLoop(reflSwarm, liveConfig);
-                    try {
-                        await reflSwarm.run();
-                    }
-                    finally {
-                        stopReflRender();
-                    }
-                    console.log(renderSummary(reflSwarm));
-                    accCost += reflSwarm.totalCostUsd;
-                    accIn += reflSwarm.totalInputTokens;
-                    accOut += reflSwarm.totalOutputTokens;
-                    accCompleted += reflSwarm.completed;
-                    accFailed += reflSwarm.failed;
-                    accTools += reflSwarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
-                    remaining -= reflSwarm.completed + reflSwarm.failed;
-                    reflectionBudgetUsed += reflSwarm.completed + reflSwarm.failed;
-                    waveHistory.push({
-                        wave: waveNum,
-                        kind: "reflect",
-                        tasks: reflSwarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, filesChanged: a.filesChanged, error: a.error })),
-                    });
-                    lastWaveKind = "reflect";
-                    continue; // re-steer with reflection artifacts
-                }
-                // action === "execute"
-                if (steer.tasks.length === 0) {
                     console.log(chalk.green(`  \u2713 ${steer.reasoning}\n`));
                     objectiveComplete = true;
                     remaining = 0;
                     break;
                 }
+                const isOverhead = steer.waveKind !== "execute";
+                if (isOverhead && overheadBudgetUsed + steer.tasks.length > maxOverheadBudget) {
+                    console.log(chalk.dim(`  ${steer.reasoning}`));
+                    console.log(chalk.yellow(`  Overhead budget exhausted (${overheadBudgetUsed}/${maxOverheadBudget}) — re-assessing\n`));
+                    lastWaveKind = "overhead-capped";
+                    continue; // re-steer
+                }
                 console.log(chalk.dim(`  ${steer.reasoning}\n`));
-                currentTasks = steer.tasks;
-                lastWaveKind = "execute";
-                steerDone = true; // exit inner loop, outer loop runs the tasks
+                // Resolve model aliases: "planner" → plannerModel, "worker" → workerModel
+                currentTasks = steer.tasks.map(t => ({
+                    ...t,
+                    model: t.model === "planner" ? plannerModel : t.model === "worker" ? workerModel
+                        : isOverhead && !t.model ? plannerModel : t.model,
+                }));
+                lastWaveKind = steer.waveKind;
+                if (isOverhead)
+                    overheadBudgetUsed += currentTasks.length;
+                steered = true;
             }
             catch (err) {
                 process.stdout.write("\x1B[?25h");
                 console.log(chalk.yellow(`  Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
-                remaining = 0;
+                // Don't zero out remaining — preserve unspent budget so resume works
                 break;
             }
         }
@@ -1330,7 +1379,7 @@ async function main() {
         id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: budget ?? tasks.length,
         remaining, workerModel, plannerModel, concurrency, permissionMode,
         usageCap, allowExtraUsage, extraUsageBudget, flex, useWorktrees, mergeStrategy, waveNum, currentTasks: [],
-        lastWaveKind, reflectionBudgetUsed, accCost, accCompleted, accFailed,
+        lastWaveKind, overheadBudgetUsed, accCost, accCompleted, accFailed,
         branches, phase: finalPhase, startedAt: new Date(runStartedAt).toISOString(), cwd,
     });
     if (trulyDone) {
@@ -1364,8 +1413,8 @@ async function main() {
     boxLines.push(`${elapsedStr} · ${fmtTokens(accIn)} in / ${fmtTokens(accOut)} out · ${accTools} tools`);
     if (totalMerged > 0 || totalConflicts > 0)
         boxLines.push(`${totalMerged} merged${totalConflicts > 0 ? ` · ${totalConflicts} conflicts` : ""}`);
-    if (reflectionBudgetUsed > 0)
-        boxLines.push(`${reflectionBudgetUsed} reflection agents`);
+    if (overheadBudgetUsed > 0)
+        boxLines.push(`${overheadBudgetUsed} overhead agents (review/verify/explore)`);
     if (lastCapped)
         boxLines.push(chalk.yellow(`Capped at ${usageCap != null ? Math.round(usageCap * 100) : 100}%`));
     const boxW = Math.max(...boxLines.map(l => l.replace(/\x1B\[[0-9;]*m/g, "").length)) + 4;

package/dist/planner.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export interface PlannerRateLimitInfo {
 }
 export interface WaveSummary {
     wave: number;
-    kind: "execute" | "reflect" | "think";
+    kind: string;
     tasks: {
         prompt: string;
         status: string;
@@ -20,15 +20,16 @@ export interface WaveSummary {
 }
 export interface SteerResult {
     done: boolean;
-    action: "execute" | "reflect" | "done";
     tasks: Task[];
     reasoning: string;
+    waveKind: string;
     goalUpdate?: string;
     statusUpdate?: string;
 }
 export interface RunMemory {
     designs: string;
     reflections: string;
+    verifications: string;
     milestones: string;
     status: string;
     goal: string;
@@ -40,7 +41,6 @@ export declare function getPlannerRateLimitInfo(): PlannerRateLimitInfo;
 export declare function planTasks(objective: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string): Promise<Task[]>;
 export declare function identifyThemes(objective: string, count: number, model: string, permissionMode: PermMode, onLog?: (text: string) => void): Promise<string[]>;
 export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string, previousKnowledge?: string): Task[];
-export declare function buildReflectionTasks(objective: string, goal: string, reflectionDir: string, waveNum: number, plannerModel: string): Task[];
 export declare function orchestrate(objective: string, designDocs: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string): Promise<Task[]>;
 export declare function refinePlan(objective: string, previousTasks: Task[], feedback: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void): Promise<Task[]>;
 export declare function steerWave(objective: string, history: WaveSummary[], remainingBudget: number, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, concurrency: number, onLog: (text: string) => void, runMemory?: RunMemory): Promise<SteerResult>;

package/dist/planner.js CHANGED Viewed

@@ -1,6 +1,18 @@
 import { query } from "@anthropic-ai/claude-agent-sdk";
 import { readFileSync } from "fs";
 import { NudgeError } from "./types.js";
+// The core framing for all planning. Not a checklist — a way of thinking.
+const DESIGN_THINKING = `
+HOW TO THINK ABOUT EVERY TASK:
+Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply" — not "I need a currency conversion API." Every decision — what to build, how fast it responds, what happens on error — flows from the job.
+The experience IS the product. A 200ms server response is not a "performance metric" — it's the difference between an app that feels alive and one that feels broken. A loading state is not "polish" — it's the user knowing the app heard them. An error message is not "error handling" — it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render — they're all one experience the user either trusts or doesn't.
+Build the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.
+Consistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.
+`;
 const NUDGE_MS = 15 * 60 * 1000; // 15 min — close & restart with "continue"
 const HARD_TIMEOUT_MS = 30 * 60 * 1000; // 30 min — give up
 export function detectModelTier(model) {
@@ -412,17 +424,20 @@ export function buildThinkingTasks(objective, themes, designDir, plannerModel, p
 OVERALL OBJECTIVE: ${objective}
 ${prevBlock}
 YOUR FOCUS: ${theme}
+${DESIGN_THINKING}
 Explore the codebase thoroughly using Read, Glob, and Grep. Then write a design document to ${designDir}/focus-${i}.md with these sections:
 ## Findings
 Key files, patterns, and architecture you discovered. Cite specific file paths and function names.
+## The Job
+What is someone hiring this product to do? Not the feature — the outcome. Frame everything below through this lens.
 ## Proposed Work Items
 For each item:
 - **What**: What to build or change
 - **Where**: Specific file paths
-- **Why**: Why this matters
+- **Why**: How this serves the job — including how fast it needs to respond and what happens when it fails
 - **Risk**: Conflicts or complications
 ## Key Files
@@ -432,44 +447,6 @@ Be thorough — your findings drive the execution plan.`,
         model: plannerModel,
     }));
 }
-export function buildReflectionTasks(objective, goal, reflectionDir, waveNum, plannerModel) {
-    const goalBlock = goal ? `\nEVOLVED GOAL:\n${goal}\n` : "";
-    return [
-        {
-            id: "review-0",
-            prompt: `You are a senior code reviewer performing a deep quality audit.
-OBJECTIVE: ${objective}
-${goalBlock}
-Read the codebase thoroughly. Assess:
-- **Correctness**: Bugs, missing error handling, broken flows?
-- **Architecture**: Clean design? Unnecessary or missing abstractions?
-- **Code quality**: Readability, naming, duplication, dead code?
-- **Completeness**: What's missing vs. the objective? Half-done work?
-- **Polish**: Edge cases, error messages, loading states?
-Write findings to ${reflectionDir}/wave-${waveNum}-quality.md.
-End with a ## Verdict: is this closer to "good enough" or "amazing"? What would make the biggest difference?`,
-            model: plannerModel,
-        },
-        {
-            id: "review-1",
-            prompt: `You are a UX and integration reviewer.
-OBJECTIVE: ${objective}
-${goalBlock}
-Read the codebase. Assess:
-- **UX coherence**: Do user-facing flows make sense end-to-end? Consistent experience?
-- **Integration**: Do pieces fit together? Seams, inconsistencies, broken contracts?
-- **Testing**: Meaningful coverage? Testing the right things?
-- **Gaps**: Unhandled use cases? What would surprise a user?
-Write findings to ${reflectionDir}/wave-${waveNum}-ux.md.
-End with ## Priorities: rank the top 3 things that would most improve the result.`,
-            model: plannerModel,
-        },
-    ];
-}
 export async function orchestrate(objective, designDocs, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote, outFile) {
     const capability = modelCapabilityBlock(workerModel);
     const flexLine = flexNote ? `\n\n${flexNote}` : "";
@@ -483,7 +460,7 @@ Your architects explored the codebase and found:
 ${designDocs}
 AGENT CAPABILITY: ${capability}
+${DESIGN_THINKING}
 Create exactly ~${budget} concrete execution tasks based on these findings.
 Requirements:
@@ -492,7 +469,8 @@ Requirements:
 - ${concurrency} agents run in parallel — tasks must touch DIFFERENT files
 - Trust the research — don't tell agents to re-explore what's documented
 - Reference specific files and patterns from the findings
-- Priority order: foundational first, polish last${flexLine}
+- Build the core user job first, then expand. Each task should produce something complete and usable — not scaffolding for later
+- There is no separate "polish" phase. Loading states, error handling, sub-200ms responses, and edge cases are part of every task${flexLine}
 Respond with ONLY a JSON object (no markdown fences):
 {"tasks": [{"prompt": "..."}]}${fileInstruction}`;
@@ -655,24 +633,25 @@ async function extractTaskJson(raw, retry, onLog, outFile) {
 // ── Wave steering ──
 export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog, runMemory) {
     const capability = modelCapabilityBlock(workerModel);
-    // Three-layer context: status (current), milestones (strategic), recent waves (tactical)
     const recentWaves = history.slice(-3);
     const recentText = recentWaves.length > 0 ? recentWaves.map(w => {
-        const tag = w.kind === "reflect" ? " (reflection)" : w.kind === "think" ? " (thinking)" : "";
         const lines = w.tasks.map(t => {
             const files = t.filesChanged ? ` (${t.filesChanged} files)` : "";
             const err = t.error ? ` — ${t.error}` : "";
             return `  - [${t.status}] ${t.prompt.slice(0, 120)}${files}${err}`;
         }).join("\n");
-        return `Wave ${w.wave + 1}${tag}:\n${lines}`;
+        return `Wave ${w.wave + 1} (${w.kind}):\n${lines}`;
     }).join("\n\n") : "(first wave)";
-    const lastWasReflection = history.length > 0 && history[history.length - 1].kind === "reflect";
-    const noReflectHint = lastWasReflection ? `\nIMPORTANT: The previous wave was a reflection. You MUST choose "execute" or "done" — not "reflect" again.\n` : "";
+    const lastKind = history.length > 0 ? history[history.length - 1].kind : "";
+    const repeatHint = lastKind && lastKind !== "execute"
+        ? `\nThe previous wave was "${lastKind}". Don't repeat the same wave kind unless you have a strong reason.\n`
+        : "";
     const cap = (s, max) => s.length > max ? s.slice(0, max) + "\n...(truncated)" : s;
     const statusBlock = runMemory?.status ? `\nCurrent project status:\n${runMemory.status}\n` : "";
     const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 4000)}\n` : "";
     const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 4000)}\n` : "";
     const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 3000)}\n` : "";
+    const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 3000)}\n` : "";
     const goalBlock = runMemory?.goal ? `\nNorth star — what "amazing" means:\n${runMemory.goal}\n` : "";
     const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 3000)}\n` : "";
     const prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
@@ -681,38 +660,63 @@ Objective: ${objective}
 ${goalBlock}${statusBlock}${milestoneBlock}${prevRunBlock}
 Recent waves:
 ${recentText}
-${designBlock}${reflectionBlock}
+${designBlock}${reflectionBlock}${verificationBlock}
 Remaining budget: ${remainingBudget} agent sessions. ${concurrency} agents run in parallel — tasks must touch DIFFERENT files.
 ${capability}
+${DESIGN_THINKING}
 Total waves completed: ${history.length}
-Read the codebase. Assess: how close is this to the VISION? Not "what's missing" — "how good is what we built?"
+Read the codebase. Assess from the user's chair: does this product do the job someone would hire it for? Does it feel fast, honest, and trustworthy? Not "is the code clean" — "would I use this?"
+If verification found issues, those are the priority. Fix what's broken before building what's missing. Iterate on what exists before expanding scope.
+## Compose the next wave
+You have full creative freedom. Design the wave that will have the highest impact right now. Here are archetypes to draw from — mix, adapt, or invent your own:
-Then choose ONE action:
+**Execute** — Agents implement concrete changes in parallel. Each touches different files. The bread and butter.
+  Example: 5 agents each owning a different feature or fix
-**"reflect"** — Spin up 1-2 review agents for a deep quality audit. Choose when:
-  - Substantial new code shipped and hasn't been reviewed
-  - You're unsure about quality and need expert eyes
-  - A subsystem just "completed" and deserves verification
+**Explore** — Multiple agents independently tackle the same problem from different angles. Each writes a design/approach to a separate file. Use when you need creative alternatives before committing.
+  Example: 3 agents each design a different navigation approach, writing to designs/nav-{approach}.md
-**"execute"** — Plan the next batch of tasks. Choose when:
-  - You know what needs doing (from reviews or your own assessment)
-  - There are clear gaps, bugs, or improvements to make
+**Critique** — Agents review what exists as skeptical experts. They read the codebase and write findings to files. Use after substantial new code ships.
+  Example: 1 code quality reviewer, 1 UX reviewer examining flows end-to-end
-**"done"** — The objective is met at high quality. Choose when:
-  - The code works correctly and handles edge cases
-  - The architecture is clean and pieces fit together
-  - Further work would be diminishing returns
-${noReflectHint}
+**Synthesize** — An agent reads multiple alternatives or review findings and makes a decision. Writes the chosen approach or prioritized fix list.
+  Example: 1 agent reads 3 design docs and writes the implementation plan
+**Verify** — Agents actually RUN the application: build it, start it, navigate it, click things, try edge cases. They report what works and what's broken. Not code reading — real testing.
+  Example: 1 agent does end-to-end QA, writing a report with reproduction steps
+**User-test** — Agents emulate specific user personas interacting with the product. "First-time user who just downloaded this." "Power user trying to do X fast." They test from that perspective and report friction.
+  Example: 2 agents, one new user, one power user, each writing a report
+**Polish** — Agents focus purely on feel: loading states, error messages, micro-interactions, empty states, responsiveness. Not features — the texture that makes users trust the product.
+  Example: 2 agents, one on happy paths, one on error/edge states
+You can combine these. A wave can have 3 execute agents + 1 verification agent. Or 2 divergent explorers. Whatever the situation calls for.
+For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-N-{topic}.md or .claude-overnight/latest/verifications/wave-N-{topic}.md.
+IMPORTANT: You cannot declare "done" unless at least one verification wave has confirmed the app works. If you're considering done but haven't verified, compose a verification wave first.
+${repeatHint}
 Respond with ONLY a JSON object (no markdown fences):
 {
-  "action": "execute" | "reflect" | "done",
-  "done": true/false,
-  "reasoning": "your assessment and why you chose this action",
+  "done": false,
+  "waveKind": "execute",
+  "reasoning": "your assessment and why you chose this wave composition",
   "goalUpdate": "optional — refine what 'amazing' means as you learn more",
-  "statusUpdate": "REQUIRED — write a concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status and is your memory for future waves.",
-  "tasks": [{"prompt": "..."}]
-}`;
+  "statusUpdate": "REQUIRED — concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
+  "tasks": [
+    {"prompt": "task instruction...", "model": "worker"},
+    {"prompt": "review task...", "model": "planner"}
+  ]
+}
+The "model" field on each task: use "worker" (${workerModel}) for implementation tasks, "planner" (${plannerModel}) for review/analysis/verification tasks. Default is "worker".
+If done: {"done": true, "waveKind": "done", "reasoning": "...", "statusUpdate": "...", "tasks": []}`;
     onLog("Assessing...");
     const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
     const parsed = await (async () => {
@@ -720,21 +724,25 @@ Respond with ONLY a JSON object (no markdown fences):
         if (first)
             return first;
         onLog("Retrying...");
-        const retryText = await runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"action":"execute"|"reflect"|"done","done":true/false,"reasoning":"...","tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
-        return attemptJsonParse(retryText) ?? { action: "done", done: true, reasoning: "Could not parse steering response" };
+        const retryText = await runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"done":false,"waveKind":"execute","reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
+        const retryParsed = attemptJsonParse(retryText);
+        if (retryParsed)
+            return retryParsed;
+        // Don't return done:true on parse failure — that permanently marks the run complete.
+        // Throw so the caller's catch block handles it as a transient steering failure.
+        throw new Error("Could not parse steering response after retry");
     })();
-    const action = parsed.action || (parsed.done ? "done" : "execute");
+    const isDone = parsed.done === true;
+    const waveKind = parsed.waveKind || parsed.action || (isDone ? "done" : "execute");
     const statusUpdate = parsed.statusUpdate || undefined;
-    if (action === "done") {
-        return { done: true, action: "done", tasks: [], reasoning: parsed.reasoning || "Objective complete", goalUpdate: parsed.goalUpdate, statusUpdate };
-    }
-    if (action === "reflect") {
-        return { done: false, action: "reflect", tasks: [], reasoning: parsed.reasoning || "Quality audit needed", goalUpdate: parsed.goalUpdate, statusUpdate };
+    if (isDone) {
+        return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete", waveKind: "done", goalUpdate: parsed.goalUpdate, statusUpdate };
     }
     let tasks = (parsed.tasks || []).map((t, i) => ({
         id: String(i),
         prompt: typeof t === "string" ? t : t.prompt,
+        ...(t.model && { model: t.model }),
     }));
     tasks = postProcess(tasks, remainingBudget, onLog);
-    return { done: tasks.length === 0, action: tasks.length === 0 ? "done" : "execute", tasks, reasoning: parsed.reasoning || "", goalUpdate: parsed.goalUpdate, statusUpdate };
+    return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "", waveKind: tasks.length === 0 ? "done" : waveKind, goalUpdate: parsed.goalUpdate, statusUpdate };
 }

package/dist/swarm.d.ts CHANGED Viewed

@@ -16,6 +16,8 @@ export interface SwarmConfig {
     allowExtraUsage?: boolean;
     /** Max $ to spend on extra usage before stopping. Only applies when allowExtraUsage is true. */
     extraUsageBudget?: number;
+    /** Cost from previous waves — lets the UI show an accurate running total. */
+    baseCostUsd?: number;
 }
 export interface MergeResult {
     branch: string;
@@ -64,6 +66,7 @@ export declare class Swarm {
     usageCap: number | undefined;
     readonly allowExtraUsage: boolean;
     readonly extraUsageBudget: number | undefined;
+    readonly baseCostUsd: number;
     constructor(config: SwarmConfig);
     get active(): number;
     get pending(): number;

package/dist/swarm.js CHANGED Viewed

@@ -50,6 +50,7 @@ export class Swarm {
     usageCap; // mutable — can be changed live
     allowExtraUsage;
     extraUsageBudget;
+    baseCostUsd;
     constructor(config) {
         if (!config.tasks.length) {
             throw new Error("SwarmConfig: tasks array must not be empty");
@@ -73,6 +74,7 @@ export class Swarm {
         this.usageCap = config.usageCap;
         this.allowExtraUsage = config.allowExtraUsage ?? false;
         this.extraUsageBudget = config.extraUsageBudget;
+        this.baseCostUsd = config.baseCostUsd ?? 0;
         this.queue = [...config.tasks];
         this.total = config.tasks.length;
     }

package/dist/types.d.ts CHANGED Viewed

@@ -133,13 +133,13 @@ export interface RunState {
     mergeStrategy: MergeStrategy;
     waveNum: number;
     currentTasks: Task[];
-    lastWaveKind: "execute" | "reflect" | "think";
-    reflectionBudgetUsed: number;
+    lastWaveKind: string;
+    overheadBudgetUsed: number;
     accCost: number;
     accCompleted: number;
     accFailed: number;
     branches: BranchRecord[];
-    phase: "executing" | "steering" | "reflecting" | "capped" | "done";
+    phase: "executing" | "steering" | "reflecting" | "verifying" | "capped" | "done";
     startedAt: string;
     cwd: string;
 }

package/dist/ui.js CHANGED Viewed

@@ -39,14 +39,19 @@ export function renderFrame(swarm, showHotkeys = false) {
         chalk.gray(`${swarm.pending} queued`) +
         "  " +
         chalk.gray(`\u23F1 ${fmtDur(Date.now() - swarm.startedAt)}`));
-    // Stats line
+    // Stats line — show wave cost + overall if there's a base
     const tokIn = fmtTokens(swarm.totalInputTokens);
     const tokOut = fmtTokens(swarm.totalOutputTokens);
-    const cost = swarm.totalCostUsd > 0
-        ? chalk.yellow(`$${swarm.totalCostUsd.toFixed(3)}`)
-        : "";
+    const waveCost = swarm.totalCostUsd;
+    const totalCost = swarm.baseCostUsd + waveCost;
+    let costStr = "";
+    if (totalCost > 0) {
+        costStr = swarm.baseCostUsd > 0
+            ? chalk.yellow(`$${waveCost.toFixed(3)}`) + chalk.dim(` / $${totalCost.toFixed(2)} total`)
+            : chalk.yellow(`$${waveCost.toFixed(3)}`);
+    }
     out.push(chalk.gray(`  \u2191 ${tokIn} in  \u2193 ${tokOut} out`) +
-        (cost ? `  ${cost}` : ""));
+        (costStr ? `  ${costStr}` : ""));
     // ── Usage bar(s) — cycle through windows every 3s ──
     const windows = Array.from(swarm.rateLimitWindows.values());
     const rlPct = swarm.rateLimitUtilization;
@@ -82,10 +87,7 @@ export function renderFrame(swarm, showHotkeys = false) {
                 label = chalk.red(`Waiting for reset ${mm > 0 ? `${mm}m ${ss}s` : `${ss}s`}`);
             }
             if (swarm.isUsingOverage && !swarm.cappedOut) {
-                const budgetInfo = swarm.extraUsageBudget != null
-                    ? ` $${swarm.overageCostUsd.toFixed(2)}/$${swarm.extraUsageBudget}`
-                    : "";
-                label += chalk.red(` [EXTRA USAGE${budgetInfo}]`);
+                label += chalk.red(" [EXTRA USAGE]");
             }
             const prefix = windowLabel ? chalk.dim(windowLabel.padEnd(6)) : chalk.dim("Usage ");
             out.push(`  ${prefix}${barStr}  ${label}`);
@@ -104,6 +106,23 @@ export function renderFrame(swarm, showHotkeys = false) {
             renderBar(rlPct);
         }
     }
+    // ── Extra usage budget bar ──
+    if (swarm.isUsingOverage && swarm.extraUsageBudget != null && swarm.extraUsageBudget > 0) {
+        const barW = Math.min(30, w - 40);
+        const pct = Math.min(1, swarm.overageCostUsd / swarm.extraUsageBudget);
+        const filled = Math.round(pct * barW);
+        let barStr = "";
+        for (let i = 0; i < barW; i++) {
+            if (i < filled)
+                barStr += pct > 0.9 ? chalk.red("\u2588") : pct > 0.75 ? chalk.yellow("\u2588") : chalk.magenta("\u2588");
+            else
+                barStr += chalk.gray("\u2591");
+        }
+        const label = swarm.cappedOut
+            ? chalk.red(`$${swarm.overageCostUsd.toFixed(2)}/$${swarm.extraUsageBudget} — budget hit`)
+            : `$${swarm.overageCostUsd.toFixed(2)}/$${swarm.extraUsageBudget}`;
+        out.push(`  ${chalk.dim("Extra ")}${barStr}  ${label}`);
+    }
     out.push("");
     // ── Agent table ──
     const running = swarm.agents.filter((a) => a.status === "running");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.4.0",
+  "version": "1.6.0",
   "description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
   "type": "module",
   "bin": {