npm - claude-overnight - Versions diffs - 1.25.33 → 1.25.35 - Mend

claude-overnight 1.25.33 → 1.25.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/_version.d.ts +1 -1
package/dist/_version.js +1 -1
package/dist/interactive-panel.d.ts +9 -0
package/dist/interactive-panel.js +18 -3
package/dist/run.js +113 -15
package/dist/steering.js +5 -2
package/dist/swarm.js +5 -2
package/dist/types.d.ts +2 -0
package/dist/ui.d.ts +4 -2
package/dist/ui.js +8 -2
package/package.json +2 -2
package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1

package/dist/_version.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const VERSION = "1.25.33";
1	+ export declare const VERSION = "1.25.35";

package/dist/_version.js CHANGED Viewed

@@ -1,2 +1,2 @@
 // Auto-generated by build — do not edit manually.
-export const VERSION = "1.25.33";
+export const VERSION = "1.25.35";

package/dist/interactive-panel.d.ts CHANGED Viewed

@@ -1,4 +1,9 @@
 export type PanelMode = "debrief" | "ask" | "custom" | "none";
+export interface DebriefEntry {
+    label: string;
+    text: string;
+    time: number;
+}
 /** Mutable state of the interactive panel. */
 export interface PanelState {
     mode: PanelMode;
@@ -11,12 +16,16 @@ export interface PanelState {
 export declare class InteractivePanel {
     state: PanelState;
     private _bodyLines;
+    /** Accumulated debrief entries — each wave/phase appends one. */
+    private _debriefHistory;
     set(params: {
         mode: PanelMode;
         header: string;
         preview: string;
         body: string;
     }): void;
+    /** Append a debrief entry to the running history. Only meaningful in debrief mode. */
+    appendHistory(label: string, text: string): void;
     /** Close the panel entirely (set mode to "none"). */
     close(): void;
     collapse(): void;

package/dist/interactive-panel.js CHANGED Viewed

@@ -1,5 +1,5 @@
-const DARK_GREEN_BG = "\x1B[48;5;22m";
-const LIGHT_GREEN_FG = "\x1B[38;5;156m";
+const BLACK_BG = "\x1B[48;5;232m";
+const SUBTLE_FG = "\x1B[38;5;108m";
 const BRIGHT_WHITE_FG = "\x1B[38;5;231m";
 const SOFT_GREEN_FG = "\x1B[38;5;114m";
 const RESET = "\x1B[0m";
@@ -15,7 +15,7 @@ function truncate(s, max) {
 }
 /** Wrap a plain (ANSI-free) line in the dark-green bg, padded to width. */
 function bgLine(text, width) {
-    return `${DARK_GREEN_BG}${LIGHT_GREEN_FG}${padTo(text, width)}${RESET}`;
+    return `${BLACK_BG}${SUBTLE_FG}${padTo(text, width)}${RESET}`;
 }
 export class InteractivePanel {
     state = {
@@ -27,6 +27,8 @@ export class InteractivePanel {
         body: "",
     };
     _bodyLines = [];
+    /** Accumulated debrief entries — each wave/phase appends one. */
+    _debriefHistory = [];
     set(params) {
         this.state.mode = params.mode;
         this.state.header = params.header;
@@ -34,6 +36,19 @@ export class InteractivePanel {
         this.state.body = params.body;
         this._bodyLines = params.body.split("\n").filter(l => l.length > 0);
         this.state.scrollOffset = 0;
+        // Clear history when mode changes away from debrief
+        if (params.mode !== "debrief")
+            this._debriefHistory = [];
+    }
+    /** Append a debrief entry to the running history. Only meaningful in debrief mode. */
+    appendHistory(label, text) {
+        if (this.state.mode !== "debrief")
+            return;
+        this._debriefHistory.push({ label, text, time: Date.now() });
+        // Rebuild body from full history so expanded view shows everything
+        const historyBody = this._debriefHistory.map(e => `  ${e.label}\n  ${e.text}`).join("\n\n");
+        this.state.body = historyBody;
+        this._bodyLines = historyBody.split("\n");
     }
     /** Close the panel entirely (set mode to "none"). */
     close() {

package/dist/run.js CHANGED Viewed

@@ -185,11 +185,11 @@ export async function executeRun(cfg) {
             waveHistory.length ? `Waves done: ${waveHistory.length}` : "",
             memory.reflections ? `Reflections:\n${cap(memory.reflections, 600)}` : "",
         ].filter(Boolean).join("\n\n");
-        const prompt = `${label}\n\n${ctx}\n\nWrite one short sentence (max 120 chars) summarising progress and what's next. No preamble.`;
+        const prompt = `${label}\n\n${ctx}\n\nWrite one short sentence (max 180 chars) summarising progress and what's next. No preamble.`;
         // Show in-flight feedback so the panel isn't empty while the planner thinks.
         display.setDebrief(`Summarizing ${label.toLowerCase().replace(/\.$/, "")}\u2026`);
         void runPlannerQuery(prompt, { cwd, model: debriefModel, permissionMode }, () => { })
-            .then(text => { display.setDebrief(text.trim().slice(0, 140)); })
+            .then(text => { display.setDebrief(text.trim().slice(0, 210), label); })
             .catch(() => { display.setDebrief(undefined); });
     };
     /** Generate a longer narrative summary at run end. Awaited (not fire-and-forget)
@@ -455,17 +455,50 @@ export async function executeRun(cfg) {
             }
             display.pause();
             console.log(renderSummary(swarm));
-            // Retry execute tasks that returned filesChanged=0. One retry with a nudge;
-            // if still 0, fail loudly so steering re-plans instead of silently dropping.
+            // Retry execute tasks that returned filesChanged=0 OR whose postcondition
+            // shell-check failed after merge. One retry with a nudge that includes the
+            // failure output; if still failing, fail loudly so steering re-plans.
             if (!swarm.aborted && !swarm.cappedOut && remaining > 0) {
-                const zeroWork = swarm.agents.filter(a => a.status === "done" && (!a.task.type || a.task.type === "execute") && (a.filesChanged ?? 0) === 0);
+                const failedBranches = new Set(swarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
+                const postResults = new Map();
+                for (const a of swarm.agents) {
+                    if (a.status !== "done" || !a.task.postcondition)
+                        continue;
+                    if (a.branch && failedBranches.has(a.branch))
+                        continue; // merge-failed: postcondition can't pass on main anyway
+                    try {
+                        const out = execSync(a.task.postcondition, { cwd, encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], timeout: 30_000 });
+                        postResults.set(a.id, { ok: true, output: out.trim().slice(0, 400) });
+                    }
+                    catch (err) {
+                        const output = ((err.stderr || "") + "\n" + (err.stdout || err.message || "")).trim().slice(0, 400);
+                        postResults.set(a.id, { ok: false, output });
+                    }
+                }
+                const zeroWork = swarm.agents.filter(a => {
+                    if (a.status !== "done" || (a.task.type && a.task.type !== "execute"))
+                        return false;
+                    if ((a.filesChanged ?? 0) === 0)
+                        return true;
+                    const pr = postResults.get(a.id);
+                    return pr && !pr.ok;
+                });
                 if (zeroWork.length > 0) {
-                    display.appendSteeringEvent(`Retry: ${zeroWork.length} execute task(s) with 0 file changes`);
-                    const retryTasks = zeroWork.map(a => ({
-                        id: `${a.task.id}-retry`,
-                        prompt: `${a.task.prompt}\n\nIMPORTANT: your last attempt made no file edits. If the fix truly needs no changes, say 'no-op:' at the start and explain why. Otherwise, make the actual edits.`,
-                        type: "execute",
-                    }));
+                    const noFiles = zeroWork.filter(a => (a.filesChanged ?? 0) === 0).length;
+                    const badPost = zeroWork.length - noFiles;
+                    display.appendSteeringEvent(`Retry: ${zeroWork.length} task(s) (${noFiles} with 0 files, ${badPost} failed postcondition)`);
+                    const retryTasks = zeroWork.map(a => {
+                        const pr = postResults.get(a.id);
+                        const postFailBlock = pr && !pr.ok
+                            ? `\n\nThe postcondition \`${a.task.postcondition}\` failed after your last attempt:\n${pr.output || "(no output)"}\n\nFix what makes the check fail and try again.`
+                            : `\n\nIMPORTANT: your last attempt made no file edits. If the fix truly needs no changes, say 'no-op:' at the start and explain why. Otherwise, make the actual edits.`;
+                        return {
+                            id: `${a.task.id}-retry`,
+                            prompt: `${a.task.prompt}${postFailBlock}`,
+                            type: "execute",
+                            postcondition: a.task.postcondition,
+                        };
+                    });
                     const retrySwarm = new Swarm({
                         tasks: retryTasks, concurrency: Math.min(concurrency, retryTasks.length), cwd, model: workerModel,
                         permissionMode, allowedTools, useWorktrees, mergeStrategy: waveMerge,
@@ -485,10 +518,29 @@ export async function executeRun(cfg) {
                     accIn += retrySwarm.totalInputTokens;
                     accOut += retrySwarm.totalOutputTokens;
                     accTools += retrySwarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
-                    // Any retry that still has 0 files → hard fail
-                    const stillZero = retrySwarm.agents.filter(a => a.status === "done" && (a.filesChanged ?? 0) === 0);
+                    // Any retry that still has 0 files OR a still-failing postcondition → hard fail
+                    const retryFailedBranches = new Set(retrySwarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
+                    const stillZero = retrySwarm.agents.filter(a => {
+                        if (a.status !== "done")
+                            return false;
+                        if ((a.filesChanged ?? 0) === 0)
+                            return true;
+                        if (!a.task.postcondition)
+                            return false;
+                        if (a.branch && retryFailedBranches.has(a.branch))
+                            return true;
+                        try {
+                            execSync(a.task.postcondition, { cwd, stdio: "ignore", timeout: 30_000 });
+                            return false;
+                        }
+                        catch {
+                            return true;
+                        }
+                    });
                     for (const a of stillZero) {
-                        display.appendSteeringEvent(`RETRY FAILED: agent ${a.id} still changed 0 files after nudge — task dropped as error`);
+                        const why = (a.filesChanged ?? 0) === 0 ? "still changed 0 files" : "postcondition still failing";
+                        display.appendSteeringEvent(`RETRY FAILED: agent ${a.id} ${why} — task dropped as error`);
+                        a.error = a.error ?? `retry failed: ${why}`;
                         accFailed++;
                         remaining = Math.max(0, remaining - 1);
                     }
@@ -554,9 +606,22 @@ export async function executeRun(cfg) {
             const attemptedPrompts = new Set(swarm.agents.map(a => a.task.prompt));
             const neverStarted = currentTasks.filter(t => !attemptedPrompts.has(t.prompt));
             saveRunState(runDir, buildRunState({ remaining, phase: "steering", currentTasks: neverStarted }));
+            // Overlay merge outcomes: if an agent's branch failed to merge, its changes
+            // did NOT land — tell steering the truth (filesChanged=0, error attached)
+            // so it can't declare victory on work that didn't reach the codebase.
+            const failedMergeBranches = new Set(swarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
             waveHistory.push({
                 wave: waveNum,
-                tasks: swarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, type: a.task.type, filesChanged: a.filesChanged, error: a.error })),
+                tasks: swarm.agents.map(a => {
+                    const mergeFailed = a.branch && failedMergeBranches.has(a.branch);
+                    return {
+                        prompt: a.task.prompt,
+                        status: a.status,
+                        type: a.task.type,
+                        filesChanged: mergeFailed ? 0 : a.filesChanged,
+                        error: mergeFailed ? `merge-failed: branch ${a.branch} did not land` : a.error,
+                    };
+                }),
             });
             // Hook-blocked work: agents that touched files but nothing landed on the
             // branch (pre-commit hooks, gitignore, writes outside worktree). Surface
@@ -574,6 +639,39 @@ export async function executeRun(cfg) {
                 }
                 catch { }
             }
+            // Merge-failed branches: changes never reached the codebase. Regenerate a
+            // pinned section in status.md every wave from live git state — resolved
+            // branches (deleted from git) drop off automatically; still-broken ones
+            // keep shouting at steering until a follow-up wave lands them or discards
+            // them. This is what turns merge-failed from a silent state into a
+            // first-class blocker.
+            try {
+                const unresolved = branches.filter(b => {
+                    if (b.status !== "merge-failed")
+                        return false;
+                    try {
+                        execSync(`git rev-parse --verify "${b.branch}"`, { cwd, stdio: "ignore" });
+                        return true;
+                    }
+                    catch {
+                        return false;
+                    } // branch gone → treat as resolved
+                });
+                const statusPath = join(runDir, "status.md");
+                const existing = existsSync(statusPath) ? readFileSync(statusPath, "utf-8") : "";
+                const marker = "## Unresolved merge failures";
+                const idx = existing.indexOf(marker);
+                const base = idx >= 0 ? existing.slice(0, idx).replace(/\n+$/, "") : existing;
+                let next = base;
+                if (unresolved.length > 0) {
+                    const list = unresolved.map(b => `  - ${b.branch} — ${b.taskPrompt.slice(0, 120)}`).join("\n");
+                    next = `${base}${base ? "\n\n" : ""}${marker}\n${unresolved.length} branch(es) contain unmerged agent work. Resolve or discard before relying on those changes:\n${list}\n`;
+                    display.appendSteeringEvent(`⚠ ${unresolved.length} unresolved merge failure(s) — see status.md`);
+                }
+                if (next !== existing)
+                    writeFileSync(statusPath, next, "utf-8");
+            }
+            catch { }
             // Fire-and-forget debrief after each wave.
             runDebrief(`Wave ${waveNum + 1} just finished.`);
             // After-wave commands: run shell commands in cwd after each wave (e.g. "supabase db push").

package/dist/steering.js CHANGED Viewed

@@ -16,7 +16,7 @@ const STEER_SCHEMA = {
                 type: "array",
                 items: {
                     type: "object",
-                    properties: { prompt: { type: "string" }, model: { type: "string" }, noWorktree: { type: "boolean" }, type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] } },
+                    properties: { prompt: { type: "string" }, model: { type: "string" }, noWorktree: { type: "boolean" }, type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] }, postcondition: { type: "string" } },
                     required: ["prompt"],
                 },
             },
@@ -103,7 +103,7 @@ Respond with ONLY a JSON object (no markdown fences):
   "statusUpdate": "REQUIRED  -- concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
   "estimatedSessionsRemaining": 15,
   "tasks": [
-    {"prompt": "task instruction...", "model": "worker"},
+    {"prompt": "task instruction...", "model": "worker", "postcondition": "test -f src/new-file.ts"},
     {"prompt": "quick icon fix, verified by worker next wave...", "model": "fast"},
     {"prompt": "verify the app end-to-end...", "model": "worker", "noWorktree": true}
   ]
@@ -114,6 +114,8 @@ Respond with ONLY a JSON object (no markdown fences):
 The "model" field on each task: use "worker" (${workerModel}) for all tasks. Use "fast" (${fastModel ?? "not set"}) for small, single-file changes that will be checked by the worker in the next wave.
 Set "noWorktree": true for verify/user-test tasks  -- they need the real project directory with env files, dependencies, and local config.
+OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. The framework runs it after merge; if it fails, the agent's "no-op" claim is rejected and the task is retried with the failure output as context. Use it whenever the task has a concrete, machine-checkable outcome. Examples: \`test -f src/tracking/watchlist-poller.ts && grep -q "runWatchlistPoll" src/tracking/watchlist-poller.ts\`, \`grep -q "watchlistPollerTask" src/scraper/scheduler.ts\`, \`pnpm run build\`, \`diff -q src/public/index.html frontend/dist/index.html\`. Keep it cheap (sub-second, no network). Omit for exploratory/research tasks where there is no crisp check.
 If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSessionsRemaining": 0, "tasks": []}`;
     onLog("Assessing...", "status");
     onLog(`Reading codebase  -- wave ${history.length + 1}`, "event");
@@ -151,6 +153,7 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSes
         ...(t.model && { model: resolveModel(t.model) }),
         ...(t.noWorktree && { noWorktree: true }),
         ...(t.type && { type: t.type }),
+        ...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
     }));
     tasks = postProcess(tasks, remainingBudget, onLog);
     endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");

package/dist/swarm.js CHANGED Viewed

@@ -553,10 +553,13 @@ export class Swarm {
                 let resumePrompt = "Continue. Complete the task.";
                 const runOnce = async (isResume) => {
                     const preamble = "Keep files under ~500 lines. If a file would exceed that, split it.\n\n";
+                    const postBlock = task.postcondition
+                        ? `\n\nEXIT CRITERION — after you finish, the framework will run this shell check in cwd and reject a no-op if it fails:\n  $ ${task.postcondition}\nYour work is not done until that command exits 0. Don't claim no-op unless you can prove the check already passes.`
+                        : "";
                     const agentPrompt = isResume ? resumePrompt
                         : this.config.useWorktrees && !task.noWorktree
-                            ? `You are working in an isolated git worktree. Focus only on this task. Do NOT commit your changes  -- the framework handles that.\n\n${preamble}${task.prompt}`
-                            : `${preamble}${task.prompt}`;
+                            ? `You are working in an isolated git worktree. Focus only on this task. Do NOT commit your changes  -- the framework handles that.\n\n${preamble}${task.prompt}${postBlock}`
+                            : `${preamble}${task.prompt}${postBlock}`;
                     const effectiveModel = task.model || this.config.model;
                     const envOverride = this.config.envForModel?.(effectiveModel);
                     const agentQuery = query({

package/dist/types.d.ts CHANGED Viewed

@@ -16,6 +16,8 @@ export interface Task {
     agentCwd?: string;
     /** The kind of work: "execute" modifies files, others are read-only/analysis. Defaults to "execute". */
     type?: string;
+    /** Shell command that must exit 0 for the task to be considered done. Runs in cwd after merge. Failed postconditions trigger the same retry path as filesChanged=0. */
+    postcondition?: string;
 }
 /** Schema for a JSON task file that defines a batch of work for the swarm. */
 export interface TaskFile {

package/dist/ui.d.ts CHANGED Viewed

@@ -84,8 +84,10 @@ export declare class RunDisplay {
     private lastFrame;
     private onSteer?;
     private onAsk?;
-    /** Set or clear the debrief text shown in the interactive panel. */
-    setDebrief(text: string | undefined): void;
+    /** Set or clear the debrief text shown in the interactive panel.
+     *  When a label is provided alongside resolved text, it's appended to
+     *  the running history so expanded view shows all wave debriefs. */
+    setDebrief(text: string | undefined, label?: string): void;
     constructor(runInfo: RunInfo, liveConfig?: LiveConfig, callbacks?: {
         onSteer?: (text: string) => void;
         onAsk?: (text: string) => void;

package/dist/ui.js CHANGED Viewed

@@ -49,10 +49,16 @@ export class RunDisplay {
     lastFrame = "";
     onSteer;
     onAsk;
-    /** Set or clear the debrief text shown in the interactive panel. */
-    setDebrief(text) {
+    /** Set or clear the debrief text shown in the interactive panel.
+     *  When a label is provided alongside resolved text, it's appended to
+     *  the running history so expanded view shows all wave debriefs. */
+    setDebrief(text, label) {
         if (text) {
             this.panel.set({ mode: "debrief", header: "Debrief", preview: text, body: text });
+            // Append to accumulated history when we have the final text (not loading message)
+            if (label && !text.startsWith("Summarizing")) {
+                this.panel.appendHistory(label, text);
+            }
         }
         else if (this.panel.state.mode === "debrief") {
             this.panel.set({ mode: "none", header: "", preview: "", body: "" });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.25.33",
+  "version": "1.25.35",
   "description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
   "type": "module",
   "bin": {
@@ -17,7 +17,7 @@
   "dependencies": {
     "@anthropic-ai/claude-agent-sdk": "^0.2.92",
     "chalk": "^5.4.1",
-    "cursor-composer-in-claude": "0.9.1",
+    "cursor-composer-in-claude": "0.9.2",
     "jsonwebtoken": "^9.0.2"
   },
   "devDependencies": {

package/plugins/claude-overnight/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.25.33",
+  "version": "1.25.35",
   "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs  -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
   "author": {
     "name": "Francesco Fornace"