npm - claude-overnight - Versions diffs - 1.8.1 → 1.8.4 - Mend

claude-overnight 1.8.1 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -1333,6 +1333,7 @@ async function main() {
     const waveMerge = (flex && runBranch) ? "yolo" : mergeStrategy;
     // Graceful drain
     let stopping = false;
+    let steeringFailed = false;
     const gracefulStop = (signal) => {
         if (stopping) {
             currentSwarm?.cleanup();
@@ -1367,12 +1368,36 @@ async function main() {
                     writeStatus(runDir, steer.statusUpdate);
                 if (steer.goalUpdate)
                     writeGoalUpdate(runDir, steer.goalUpdate);
+                // Persist steering reasoning for debugging
+                const steerDir0 = join(runDir, "steering");
+                mkdirSync(steerDir0, { recursive: true });
+                writeFileSync(join(steerDir0, `pre-wave-attempt-${steerAttempts}.json`), JSON.stringify({
+                    done: steer.done, waveKind: steer.waveKind, reasoning: steer.reasoning,
+                    taskCount: steer.tasks.length, statusUpdate: steer.statusUpdate, goalUpdate: steer.goalUpdate,
+                }, null, 2), "utf-8");
                 if (steer.done || steer.tasks.length === 0) {
                     const hasVerification = waveHistory.some(w => w.kind.includes("verif"));
                     if (!hasVerification && remaining >= 1) {
-                        display.updateText(`Done blocked \u2014 verification required`);
-                        lastWaveKind = "done-blocked";
-                        continue;
+                        // Auto-compose verification instead of retrying steering
+                        display.updateText(`Done blocked — auto-composing verification wave`);
+                        currentTasks = [{
+                                id: "verify-0",
+                                prompt: `## Verification: Build, run, and test the application end-to-end
+You are the final gatekeeper before this run is marked complete. The steerer believes the objective is done. Your job: prove it or disprove it.
+1. Run the build (npm run build, or whatever this project uses). Report ALL errors.
+2. Start the dev server. If a port is taken, try another. If a dependency is missing, install it.
+3. Navigate key flows as a real user would. Check that the main features work.
+4. Write your findings to .claude-overnight/latest/verifications/final-verify.md
+Be relentless. Do not give up if the first approach fails. Search the codebase for dev login routes, test tokens, seed users, env vars, CLI auth commands, or any bypass.`,
+                                noWorktree: true,
+                                model: plannerModel,
+                            }];
+                        lastWaveKind = "verification";
+                        overheadBudgetUsed += 1;
+                        break;
                     }
                     objectiveComplete = true;
                     remaining = 0;
@@ -1493,15 +1518,40 @@ async function main() {
                     writeStatus(runDir, steer.statusUpdate);
                 if (steer.goalUpdate)
                     writeGoalUpdate(runDir, steer.goalUpdate);
+                // Persist steering reasoning for debugging
+                const steerDir = join(runDir, "steering");
+                mkdirSync(steerDir, { recursive: true });
+                writeFileSync(join(steerDir, `wave-${waveNum}-attempt-${steerAttempts}.json`), JSON.stringify({
+                    done: steer.done, waveKind: steer.waveKind, reasoning: steer.reasoning,
+                    taskCount: steer.tasks.length, statusUpdate: steer.statusUpdate, goalUpdate: steer.goalUpdate,
+                }, null, 2), "utf-8");
                 const execWaves = waveHistory.filter(w => w.kind === "execute").length;
                 if (execWaves > 0 && execWaves % 5 === 0)
                     archiveMilestone(runDir, waveNum);
                 if (steer.done || steer.tasks.length === 0) {
                     const hasVerification = waveHistory.some(w => w.kind.includes("verif"));
                     if (!hasVerification && remaining >= 1) {
-                        display.updateText(`Done blocked \u2014 verification required`);
-                        lastWaveKind = "done-blocked";
-                        continue;
+                        // Auto-compose a verification wave instead of retrying steering
+                        display.updateText(`Done blocked — auto-composing verification wave`);
+                        currentTasks = [{
+                                id: "verify-0",
+                                prompt: `## Verification: Build, run, and test the application end-to-end
+You are the final gatekeeper before this run is marked complete. The steerer believes the objective is done. Your job: prove it or disprove it.
+1. Run the build (npm run build, or whatever this project uses). Report ALL errors.
+2. Start the dev server. If a port is taken, try another. If a dependency is missing, install it.
+3. Navigate key flows as a real user would. Check that the main features work.
+4. Write your findings to .claude-overnight/latest/verifications/final-verify.md
+Be relentless. Do not give up if the first approach fails. Search the codebase for dev login routes, test tokens, seed users, env vars, CLI auth commands, or any bypass.`,
+                                noWorktree: true,
+                                model: plannerModel,
+                            }];
+                        lastWaveKind = "verification";
+                        overheadBudgetUsed += 1;
+                        steered = true;
+                        break;
                     }
                     objectiveComplete = true;
                     remaining = 0;
@@ -1526,8 +1576,13 @@ async function main() {
             catch (err) {
                 const steerCost = getTotalPlannerCost() - plannerCostBefore;
                 accCost += steerCost;
+                if (steerAttempts < 3) {
+                    display.updateText(`Steering failed (attempt ${steerAttempts}/3) — retrying...`);
+                    continue;
+                }
                 display.stop();
-                console.log(chalk.yellow(`  Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
+                console.log(chalk.yellow(`  Steering failed after ${steerAttempts} attempts: ${err.message?.slice(0, 80)} — stopping\n`));
+                steeringFailed = true;
                 break;
             }
         }
@@ -1538,7 +1593,8 @@ async function main() {
     display.stop();
     // Only truly "done" if steering explicitly completed the objective (or non-flex single wave with budget exhausted)
     const trulyDone = objectiveComplete || (!flex && remaining <= 0);
-    const finalPhase = trulyDone ? "done" : "capped";
+    const wasCapped = lastCapped || lastAborted;
+    const finalPhase = trulyDone ? "done" : steeringFailed ? "steering" : wasCapped ? "capped" : remaining <= 0 ? "capped" : "stopped";
     saveRunState(runDir, {
         id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: budget ?? tasks.length,
         remaining, workerModel, plannerModel, concurrency, permissionMode,
@@ -1581,14 +1637,26 @@ async function main() {
     if (trulyDone) {
         console.log(chalk.bold.green(`  CLAUDE OVERNIGHT — COMPLETE`));
     }
-    else {
+    else if (steeringFailed) {
+        console.log(chalk.bold.yellow(`  CLAUDE OVERNIGHT — STEERING FAILED`));
+    }
+    else if (remaining <= 0) {
         console.log(chalk.bold.yellow(`  CLAUDE OVERNIGHT — BUDGET EXHAUSTED`));
     }
+    else if (lastCapped) {
+        console.log(chalk.bold.yellow(`  CLAUDE OVERNIGHT — RATE LIMITED`));
+    }
+    else if (stopping || lastAborted) {
+        console.log(chalk.bold.yellow(`  CLAUDE OVERNIGHT — INTERRUPTED`));
+    }
+    else {
+        console.log(chalk.bold.yellow(`  CLAUDE OVERNIGHT — STOPPED`));
+    }
     console.log(chalk.green(`  ${bannerChar.repeat(Math.min(termW - 4, 60))}`));
     console.log("");
     // Stats grid
     const statRows = [
-        [chalk.bold("Waves"), String(waves), chalk.bold("Sessions"), `${accCompleted} done${accFailed > 0 ? ` / ${accFailed} failed` : ""}`],
+        [chalk.bold("Waves"), String(waves), chalk.bold("Sessions"), `${accCompleted} done${accFailed > 0 ? ` / ${accFailed} failed` : ""}${remaining > 0 ? ` (${remaining} remaining)` : ""}`],
         [chalk.bold("Cost"), chalk.green(`$${accCost.toFixed(2)}`), chalk.bold("Elapsed"), elapsedStr],
         [chalk.bold("Merged"), `${totalMerged} branches`, chalk.bold("Conflicts"), totalConflicts > 0 ? chalk.red(String(totalConflicts)) : chalk.green("0")],
         [chalk.bold("Tokens"), `${fmtTokens(accIn)} in / ${fmtTokens(accOut)} out`, chalk.bold("Tool calls"), String(accTools)],

package/dist/planner.js CHANGED Viewed

@@ -15,6 +15,46 @@ Consistency is what makes complex things feel simple. One design system, rigid r
 `;
 const NUDGE_MS = 15 * 60 * 1000; // 15 min — close & restart with "continue"
 const HARD_TIMEOUT_MS = 30 * 60 * 1000; // 30 min — give up
+const WALL_CLOCK_LIMIT_MS = 45 * 60 * 1000; // 45 min — absolute max per planner call
+// ── JSON schemas for structured output ──
+const TASKS_SCHEMA = {
+    type: "json_schema",
+    schema: {
+        type: "object",
+        properties: { tasks: { type: "array", items: { type: "object", properties: { prompt: { type: "string" } }, required: ["prompt"] } } },
+        required: ["tasks"],
+    },
+};
+const THEMES_SCHEMA = {
+    type: "json_schema",
+    schema: {
+        type: "object",
+        properties: { themes: { type: "array", items: { type: "string" } } },
+        required: ["themes"],
+    },
+};
+const STEER_SCHEMA = {
+    type: "json_schema",
+    schema: {
+        type: "object",
+        properties: {
+            done: { type: "boolean" },
+            waveKind: { type: "string" },
+            reasoning: { type: "string" },
+            statusUpdate: { type: "string" },
+            goalUpdate: { type: "string" },
+            tasks: {
+                type: "array",
+                items: {
+                    type: "object",
+                    properties: { prompt: { type: "string" }, model: { type: "string" }, noWorktree: { type: "boolean" } },
+                    required: ["prompt"],
+                },
+            },
+        },
+        required: ["done", "tasks", "reasoning", "statusUpdate"],
+    },
+};
 export function detectModelTier(model) {
     const m = model.toLowerCase();
     if (m === "default" || m.includes("opus"))
@@ -211,6 +251,7 @@ export function getPlannerRateLimitInfo() { return _plannerRateLimitInfo; }
 async function runPlannerQueryOnce(prompt, opts, onLog) {
     _plannerRateLimitInfo = { utilization: 0, status: "", isUsingOverage: false, windows: new Map(), costUsd: 0 };
     let resultText = "";
+    let structuredOutput;
     const startedAt = Date.now();
     const isResume = !!opts.resumeSessionId;
     const pq = query({
@@ -225,6 +266,7 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
             persistSession: true, // needed for interrupt+resume
             includePartialMessages: true,
             ...(isResume && { resume: opts.resumeSessionId }),
+            ...(opts.outputFormat && { outputFormat: opts.outputFormat }),
         },
     });
     // Progress ticker — fast updates with compact format
@@ -249,7 +291,14 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
     let timer;
     const watchdog = new Promise((_, reject) => {
         const check = () => {
+            const elapsed = Date.now() - startedAt;
             const silent = Date.now() - lastActivity;
+            // Wall-clock limit: kill if session has been running too long regardless of activity
+            if (elapsed >= WALL_CLOCK_LIMIT_MS) {
+                pq.interrupt().catch(() => pq.close());
+                reject(new Error(`Planner hit wall-clock limit (${Math.round(elapsed / 60000)}min) — likely rate limited`));
+                return;
+            }
             if (silent >= timeoutMs) {
                 // Try interrupt (graceful), fall back to close (hard kill)
                 pq.interrupt().catch(() => pq.close());
@@ -315,8 +364,10 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
                     _plannerRateLimitInfo.costUsd += costUsd;
                     _totalPlannerCostUsd += costUsd;
                 }
-                if (msg.subtype === "success")
+                if (msg.subtype === "success") {
+                    structuredOutput = r.structured_output;
                     resultText = r.result || "";
+                }
                 else
                     throw new Error(`Planner failed: ${r.result || msg.subtype}`);
             }
@@ -329,6 +380,10 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
         clearTimeout(timer);
         clearInterval(ticker);
     }
+    // Prefer SDK-validated structured output — guaranteed to match the schema
+    if (structuredOutput != null && typeof structuredOutput === "object") {
+        return JSON.stringify(structuredOutput);
+    }
     return resultText;
 }
 function postProcess(raw, budget, onLog) {
@@ -395,10 +450,10 @@ export async function planTasks(objective, cwd, plannerModel, workerModel, permi
     onLog("Analyzing codebase...");
     const prompt = plannerPrompt(objective, workerModel, budget, concurrency, flexNote);
     const fileInstruction = outFile ? `\n\nAFTER generating the JSON, also write it to ${outFile} using the Write tool.` : "";
-    const resultText = await runPlannerQuery(prompt + fileInstruction, { cwd, model: plannerModel, permissionMode }, onLog);
+    const resultText = await runPlannerQuery(prompt + fileInstruction, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
     const parsed = await extractTaskJson(resultText, async () => {
         onLog("Retrying...");
-        return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
+        return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
     }, onLog, outFile);
     let tasks = (parsed.tasks || []).map((t, i) => ({
         id: String(i),
@@ -412,7 +467,7 @@ export async function planTasks(objective, cwd, plannerModel, workerModel, permi
 }
 // ── Thinking wave ──
 export async function identifyThemes(objective, count, model, permissionMode, onLog = () => { }) {
-    const resultText = await runPlannerQuery(`Split this objective into exactly ${count} independent research angles for architects exploring a codebase. Each angle should cover a distinct aspect.\n\nObjective: ${objective}\n\nReturn ONLY a JSON object: {"themes": ["angle description", ...]}`, { cwd: process.cwd(), model, permissionMode }, onLog);
+    const resultText = await runPlannerQuery(`Split this objective into exactly ${count} independent research angles for architects exploring a codebase. Each angle should cover a distinct aspect.\n\nObjective: ${objective}\n\nReturn ONLY a JSON object: {"themes": ["angle description", ...]}`, { cwd: process.cwd(), model, permissionMode, outputFormat: THEMES_SCHEMA }, onLog);
     const parsed = attemptJsonParse(resultText);
     if (parsed?.themes && Array.isArray(parsed.themes))
         return parsed.themes.slice(0, count);
@@ -479,10 +534,10 @@ Requirements:
 Respond with ONLY a JSON object (no markdown fences):
 {"tasks": [{"prompt": "..."}]}${fileInstruction}`;
     onLog("Synthesizing...");
-    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
+    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
     const parsed = await extractTaskJson(resultText, async () => {
         onLog("Retrying...");
-        return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
+        return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
     }, onLog, outFile);
     let tasks = (parsed.tasks || []).map((t, i) => ({
         id: String(i),
@@ -519,10 +574,10 @@ ${scaleNote} ${concurrency} agents run in parallel. Update the plan accordingly.
 Respond with ONLY a JSON object (no markdown):
 {"tasks":[{"prompt":"..."}]}`;
-    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
+    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
     const parsed = await extractTaskJson(resultText, async () => {
         onLog("Retrying...");
-        return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
+        return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
     }, onLog);
     let tasks = (parsed.tasks || []).map((t, i) => ({
         id: String(i),
@@ -725,19 +780,22 @@ Set "noWorktree": true for verify/user-test tasks — they run in the real proje
 If done: {"done": true, "waveKind": "done", "reasoning": "...", "statusUpdate": "...", "tasks": []}`;
     onLog("Assessing...");
-    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
+    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA }, onLog);
     const parsed = await (async () => {
         const first = attemptJsonParse(resultText);
         if (first)
             return first;
-        onLog("Retrying...");
-        const retryText = await runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"done":false,"waveKind":"execute","reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
+        // Log what failed so we can debug
+        onLog(`Steering parse failed (${resultText.length} chars). Asking model to fix...`);
+        // Send the broken response back so the model can fix its own output
+        const snippet = resultText.length > 2000 ? resultText.slice(0, 1000) + "\n...\n" + resultText.slice(-800) : resultText;
+        const retryText = await runPlannerQuery(`Your previous steering response could not be parsed as JSON. Here is what you returned:\n\n---\n${snippet}\n---\n\nExtract or rewrite the above as ONLY a valid JSON object with this schema: {"done":boolean,"waveKind":"execute"|"done","reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}\n\nRespond with ONLY the JSON, no markdown fences, no explanation.`, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA }, onLog);
         const retryParsed = attemptJsonParse(retryText);
         if (retryParsed)
             return retryParsed;
         // Don't return done:true on parse failure — that permanently marks the run complete.
         // Throw so the caller's catch block handles it as a transient steering failure.
-        throw new Error("Could not parse steering response after retry");
+        throw new Error(`Could not parse steering response after retry (${resultText.length} chars: ${resultText.slice(0, 120)}...)`);
     })();
     const isDone = parsed.done === true;
     const waveKind = parsed.waveKind || parsed.action || (isDone ? "done" : "execute");

package/dist/types.d.ts CHANGED Viewed

@@ -144,7 +144,7 @@ export interface RunState {
     accOut?: number;
     accTools?: number;
     branches: BranchRecord[];
-    phase: "steering" | "capped" | "done";
+    phase: "steering" | "capped" | "done" | "stopped";
     startedAt: string;
     cwd: string;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.8.1",
+  "version": "1.8.4",
   "description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
   "type": "module",
   "bin": {