npm - claude-overnight - Versions diffs - 1.25.46 → 1.25.47 - Mend

claude-overnight 1.25.46 → 1.25.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/cli/help.js +3 -1
package/dist/core/_version.d.ts +1 -1
package/dist/core/_version.js +1 -1
package/dist/index.js +44 -4
package/dist/planner/verifier.d.ts +66 -0
package/dist/planner/verifier.js +117 -0
package/dist/run/run.js +29 -0
package/dist/run/wave-loop.d.ts +2 -0
package/dist/run/wave-loop.js +9 -8
package/package.json +1 -1
package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1

package/dist/cli/help.js CHANGED Viewed

@@ -16,6 +16,7 @@ export function printHelp() {
   ${chalk.cyan("Usage")}
     claude-overnight                          ${chalk.dim("interactive mode")}
     claude-overnight tasks.json               ${chalk.dim("task file mode")}
+    claude-overnight plan.md                  ${chalk.dim("plan file mode (.md) — coach + flex")}
     claude-overnight "fix auth" "add tests"   ${chalk.dim("inline tasks")}
   ${chalk.cyan("Flags")}
@@ -30,7 +31,8 @@ export function printHelp() {
     --allow-extra-usage    Allow extra/overage usage ${chalk.dim("(default: stop when plan limits hit)")}
     --extra-usage-budget=N Max $ for extra usage ${chalk.dim("(implies --allow-extra-usage)")}
     --timeout=SECONDS      Agent inactivity timeout ${chalk.dim("(default: 900s, nudges at timeout, kills at 2×)")}
-    --no-flex              Disable adaptive multi-wave planning ${chalk.dim("(run all tasks in one shot)")}
+    --flex                 Force adaptive multi-wave planning ${chalk.dim("(steering between waves)")}
+    --no-flex              Fixed plan mode ${chalk.dim("(verifier between waves, no re-planning)")}
     --worktrees            Force worktree isolation on ${chalk.dim("(default: auto-detect git repo)")}
     --no-worktrees         Disable worktree isolation ${chalk.dim("(all agents work in real cwd)")}
     --merge=MODE           Merge strategy: yolo or branch ${chalk.dim("(default: yolo)")}

package/dist/core/_version.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const VERSION = "1.25.46";
1	+ export declare const VERSION = "1.25.47";

package/dist/core/_version.js CHANGED Viewed

@@ -1,2 +1,2 @@
 // Auto-generated by build — do not edit manually.
-export const VERSION = "1.25.46";
+export const VERSION = "1.25.47";

package/dist/index.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { setPlannerEnvResolver } from "./planner/query.js";
 import { setTranscriptRunDir } from "./core/transcripts.js";
 import { pickModel, loadProviders, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, warnMacCursorAgentShellPatchIfNeeded, } from "./providers/index.js";
 import { executeRun } from "./run/run.js";
-import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
+import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, loadPlanFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
 import { loadRunState, findOrphanedDesigns, backfillOrphanedPlans, readPreviousRunKnowledge, createRunDir, updateLatestSymlink, } from "./state/state.js";
 import { runSetupCoach, loadUserSettings, saveUserSettings, COACH_MODEL } from "./planner/coach/coach.js";
 import { editRunSettings, formatSettingsSummary } from "./cli/settings.js";
@@ -63,11 +63,21 @@ async function main() {
     // ── Load tasks ──
     let tasks = [];
     let fileCfg;
+    let planFileContent;
     const jsonFiles = args.filter(a => a.endsWith(".json"));
+    const mdFiles = args.filter(a => a.endsWith(".md"));
     if (jsonFiles.length > 1) {
         console.error(chalk.red(`  Multiple task files provided. Only one .json file is supported.`));
         process.exit(1);
     }
+    if (mdFiles.length > 1) {
+        console.error(chalk.red(`  Multiple plan files provided. Only one .md file is supported.`));
+        process.exit(1);
+    }
+    if (jsonFiles.length && mdFiles.length) {
+        console.error(chalk.red(`  Cannot mix a .json task file with a .md plan file.`));
+        process.exit(1);
+    }
     for (const arg of args) {
         if (arg.endsWith(".json")) {
             if (tasks.length > 0) {
@@ -77,8 +87,13 @@ async function main() {
             fileCfg = loadTaskFile(arg);
             tasks = fileCfg.tasks;
         }
+        else if (arg.endsWith(".md")) {
+            const plan = loadPlanFile(arg);
+            planFileContent = plan.planContent;
+            fileCfg = { tasks: [], objective: plan.objective, flexiblePlan: true };
+        }
         else if (!arg.startsWith("-") && existsSync(resolve(arg))) {
-            console.error(chalk.red(`  "${arg}" looks like a file but doesn't end in .json. Rename it or quote the string.`));
+            console.error(chalk.red(`  "${arg}" looks like a file but doesn't end in .json or .md. Rename it or quote the string.`));
             process.exit(1);
         }
         else {
@@ -341,6 +356,30 @@ async function main() {
         console.log(chalk.dim(`  ╰${"─".repeat(innerLen + 4)}╯`));
     }
     else {
+        // ── Setup coach in confirm-only mode (task/plan file on a TTY) ──
+        let coachResult = null;
+        if (fileCfg?.objective && process.stdin.isTTY
+            && !argv.includes("--no-coach") && !loadUserSettings().skipCoach) {
+            const settings = loadUserSettings();
+            const cModel = settings.coachModel ?? COACH_MODEL;
+            const cProvider = settings.coachProviderId
+                ? loadProviders().find(p => p.id === settings.coachProviderId) : undefined;
+            coachResult = await runSetupCoach(fileCfg.objective, cwd, {
+                providers: loadProviders(), cliFlags, coachModel: cModel, coachProvider: cProvider,
+                planContent: planFileContent, confirmOnly: true,
+            });
+            if (coachResult) {
+                coachedOriginal = fileCfg.objective;
+                coachedAt = Date.now();
+                fileCfg.objective = coachResult.improvedObjective;
+                objective = coachResult.improvedObjective;
+                const rec = coachResult.recommended;
+                if (fileCfg.concurrency == null)
+                    fileCfg.concurrency = rec.concurrency;
+                if (fileCfg.usageCap == null && rec.usageCap != null)
+                    fileCfg.usageCap = Math.round(rec.usageCap * 100);
+            }
+        }
         let models = [];
         if (!cliFlags.model && !fileCfg?.model)
             models = await fetchModels(5_000);
@@ -374,7 +413,7 @@ async function main() {
             }
         }
         concurrency = cliFlags.concurrency ? parseInt(cliFlags.concurrency) : (fileCfg?.concurrency ?? 5);
-        budget = cliFlags.budget ? parseInt(cliFlags.budget) : undefined;
+        budget = cliFlags.budget ? parseInt(cliFlags.budget) : coachResult?.recommended.budget;
         if (budget != null && (isNaN(budget) || budget < 1)) {
             console.error(chalk.red(`  --budget must be a positive integer`));
             process.exit(1);
@@ -442,7 +481,8 @@ async function main() {
         console.log(chalk.dim(`  ${workerModel}  concurrency=${concurrency}  worktrees=${useWorktrees}  merge=${mergeStrategy}${capStr}${extraStr}`));
     }
     // ── Plan phase ──
-    const flex = !argv.includes("--no-flex") && (fileCfg?.flexiblePlan ?? objective != null) && objective != null && (budget ?? 10) > 2;
+    const flexFlag = argv.includes("--flex") ? true : argv.includes("--no-flex") ? false : undefined;
+    const flex = objective != null && (flexFlag ?? ((fileCfg?.flexiblePlan ?? true) && (budget ?? 10) > 2));
     const agentTimeoutMs = cliFlags.timeout ? parseFloat(cliFlags.timeout) * 1000 : undefined;
     let thinkingUsed = 0, thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
     let thinkingHistory;

package/dist/planner/verifier.d.ts ADDED Viewed

@@ -0,0 +1,66 @@
+import type { Task, SteerResult, WaveSummary } from "../core/types.js";
+import { type PlannerLog } from "./query.js";
+export declare const VERIFY_SCHEMA: {
+    type: "json_schema";
+    schema: {
+        type: string;
+        properties: {
+            done: {
+                type: string;
+            };
+            reasoning: {
+                type: string;
+            };
+            statusUpdate: {
+                type: string;
+            };
+            estimatedSessionsRemaining: {
+                type: string;
+            };
+            verifiedCount: {
+                type: string;
+            };
+            retryCount: {
+                type: string;
+            };
+            tasks: {
+                type: string;
+                items: {
+                    type: string;
+                    properties: {
+                        prompt: {
+                            type: string;
+                        };
+                        model: {
+                            type: string;
+                        };
+                        noWorktree: {
+                            type: string;
+                        };
+                        type: {
+                            type: string;
+                            enum: string[];
+                        };
+                        postcondition: {
+                            type: string;
+                        };
+                    };
+                    required: string[];
+                };
+            };
+        };
+        required: string[];
+    };
+};
+/**
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
+ *
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
+ *   1. Runs the project's build/smoke checks.
+ *   2. Fixes shallow regressions in the last wave (edits directly).
+ *   3. Picks the next N pending tasks from the user's fixed plan.
+ *
+ * The model has full tool access so it can actually repair broken commits,
+ * not just report on them.
+ */
+export declare function verifyWave(objective: string, pendingTasks: Task[], lastWave: WaveSummary | undefined, remainingBudget: number, cwd: string, plannerModel: string, concurrency: number, onLog: PlannerLog, transcriptName?: string): Promise<SteerResult>;

package/dist/planner/verifier.js ADDED Viewed

@@ -0,0 +1,117 @@
+import { runPlannerQuery, attemptJsonParse, postProcess } from "./query.js";
+import { createTurn, beginTurn, endTurn } from "../core/turns.js";
+// Verifier schema — same shape as STEER_SCHEMA plus a `verifiedIds` list so
+// the wave-loop can tell which of the prior wave's tasks actually shipped.
+export const VERIFY_SCHEMA = {
+    type: "json_schema",
+    schema: {
+        type: "object",
+        properties: {
+            done: { type: "boolean" },
+            reasoning: { type: "string" },
+            statusUpdate: { type: "string" },
+            estimatedSessionsRemaining: { type: "number" },
+            verifiedCount: { type: "number" },
+            retryCount: { type: "number" },
+            tasks: {
+                type: "array",
+                items: {
+                    type: "object",
+                    properties: {
+                        prompt: { type: "string" },
+                        model: { type: "string" },
+                        noWorktree: { type: "boolean" },
+                        type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] },
+                        postcondition: { type: "string" },
+                    },
+                    required: ["prompt"],
+                },
+            },
+        },
+        required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
+    },
+};
+function renderLastWave(w) {
+    if (!w)
+        return "(first wave — nothing to verify yet)";
+    const lines = w.tasks.map(t => {
+        const files = t.filesChanged ? ` (${t.filesChanged} files)` : " (0 files)";
+        const err = t.error ? ` — ${t.error}` : "";
+        return `  - [${t.status}] ${t.prompt.slice(0, 160)}${files}${err}`;
+    }).join("\n");
+    return `Wave ${w.wave + 1}:\n${lines}`;
+}
+/**
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
+ *
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
+ *   1. Runs the project's build/smoke checks.
+ *   2. Fixes shallow regressions in the last wave (edits directly).
+ *   3. Picks the next N pending tasks from the user's fixed plan.
+ *
+ * The model has full tool access so it can actually repair broken commits,
+ * not just report on them.
+ */
+export async function verifyWave(objective, pendingTasks, lastWave, remainingBudget, cwd, plannerModel, concurrency, onLog, transcriptName = "verify") {
+    const pendingList = pendingTasks.length > 0
+        ? pendingTasks.map((t, i) => `  ${i + 1}. ${t.prompt.slice(0, 200)}`).join("\n")
+        : "(none — every task from the original plan has been attempted)";
+    const prompt = `You are the verifier + fix gate between waves of a fixed-plan execution.
+Objective: ${objective}
+## What just happened
+${renderLastWave(lastWave)}
+## Remaining plan (pending tasks, in order)
+${pendingList}
+## Your job
+1. Run the project's build and smoke checks. Use the tools you have (Bash, Read, Grep, Edit, Write).
+2. For any regression the last wave introduced, make the fix directly. Don't delegate a fix to the next wave if you can do it in two edits.
+3. Compose the next batch of pending tasks to dispatch — pick tasks with non-overlapping file scopes so ${concurrency} can run in parallel.
+4. If the plan is complete AND the build passes AND one verify task has confirmed the app runs, set done=true.
+## Output
+Respond with ONLY a JSON object (no markdown fences):
+{"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"verifiedCount":N,"retryCount":N,"tasks":[{"prompt":"...","type":"execute","postcondition":"..."}]}
+Remaining budget: ${remainingBudget} agent sessions. Include retries inside tasks[] (same format) if a pending step needs a second attempt with corrected context.`;
+    onLog("Verifying last wave…", "status");
+    const turn = createTurn("steer", `Verify wave`, `verify-${lastWave?.wave ?? 0}`, plannerModel);
+    beginTurn(turn);
+    const resultText = await runPlannerQuery(prompt, {
+        cwd, model: plannerModel, outputFormat: VERIFY_SCHEMA,
+        transcriptName, turnId: turn.id, maxTurns: 80,
+    }, onLog);
+    const parsed = attemptJsonParse(resultText);
+    if (!parsed) {
+        endTurn(turn, "error");
+        throw new Error(`Could not parse verifier response (${resultText.length} chars): ${resultText.slice(0, 120)}`);
+    }
+    const isDone = parsed.done === true;
+    const statusUpdate = parsed.statusUpdate || undefined;
+    const estRaw = parsed.estimatedSessionsRemaining;
+    const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
+    let tasks = (parsed.tasks || []).map((t, i) => ({
+        id: String(i),
+        prompt: typeof t === "string" ? t : t.prompt,
+        ...(t.noWorktree && { noWorktree: true }),
+        ...(t.type && { type: t.type }),
+        ...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
+    }));
+    tasks = postProcess(tasks, remainingBudget, onLog);
+    endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");
+    if (isDone) {
+        return {
+            done: true, tasks: [], reasoning: parsed.reasoning || "Plan complete and verified",
+            statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0,
+        };
+    }
+    return {
+        done: tasks.length === 0, tasks,
+        reasoning: parsed.reasoning || "", statusUpdate, estimatedSessionsRemaining,
+    };
+}

package/dist/run/run.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { join } from "path";
 import { execSync } from "child_process";
 import chalk from "chalk";
 import { steerWave, STEER_SCHEMA } from "../planner/steering.js";
+import { verifyWave } from "../planner/verifier.js";
 import { getTotalPlannerCost, getPlannerRateLimitInfo, runPlannerQuery, setPlannerEnvResolver, attemptJsonParse } from "../planner/query.js";
 import { buildEnvResolver, isCursorProxyProvider } from "../providers/index.js";
 import { RunDisplay } from "../ui/ui.js";
@@ -397,6 +398,33 @@ export async function executeRun(cfg) {
         }
         return steered;
     };
+    // In non-flex mode with an objective, the verifier runs between waves instead of the steerer.
+    const runVerifier = async () => {
+        if (!objective)
+            return false;
+        const plannerCostBefore = getTotalPlannerCost();
+        try {
+            const result = await verifyWave(objective, currentTasks, waveHistory[waveHistory.length - 1], remaining, cwd, plannerModel, concurrency, steeringLog, `verify-wave-${waveNum}`);
+            accCost += getTotalPlannerCost() - plannerCostBefore;
+            syncRunInfo();
+            if (result.statusUpdate)
+                writeStatus(runDir, result.statusUpdate);
+            if (typeof result.estimatedSessionsRemaining === "number")
+                lastEstimate = result.estimatedSessionsRemaining;
+            if (result.done || result.tasks.length === 0) {
+                objectiveComplete = result.done;
+                remaining = 0;
+                return false;
+            }
+            currentTasks = result.tasks;
+            return true;
+        }
+        catch (err) {
+            accCost += getTotalPlannerCost() - plannerCostBefore;
+            display.appendSteeringEvent(`Verifier failed: ${err?.message?.slice(0, 200) || "(no details)"}`);
+            return false;
+        }
+    };
     // Resume: steer immediately if no queued tasks
     if (cfg.resuming && flex && currentTasks.length === 0 && remaining > 0) {
         display.setSteering(rlGetter, buildSteeringContext());
@@ -465,6 +493,7 @@ export async function executeRun(cfg) {
         lastEstimate,
         display,
         runSteering,
+        runVerifier,
         buildSteeringContext,
         rlGetter,
         isStopping: () => stopping,

package/dist/run/wave-loop.d.ts CHANGED Viewed

@@ -46,6 +46,8 @@ export interface WaveLoopCtx {
     lastEstimate: number | undefined;
     display: RunDisplay;
     runSteering: () => Promise<boolean>;
+    /** Verifier invoked between waves in no-flex mode. Mirrors runSteering's contract. */
+    runVerifier?: () => Promise<boolean>;
     buildSteeringContext: () => SteeringContext;
     rlGetter: RLGetter;
     isStopping: () => boolean;

package/dist/run/wave-loop.js CHANGED Viewed

@@ -154,10 +154,9 @@ export async function runWaveLoop(host, ctx) {
             // work the user expects to see on resume — save them under "stopped".
             const midWavePhase = (ctx.isStopping() || swarm.aborted) ? "stopped" : "steering";
             saveRunState(ctx.runDir, buildRunState(host, midWavePhase, neverStarted));
-            // Preserve the leftover tasks on the host so the outer run loop's final
-            // saveRunState writes them (instead of []), and resume has something to load.
-            if (midWavePhase === "stopped")
-                host.currentTasks = neverStarted;
+            // Preserve the leftover tasks on the host so resume / verifier see the
+            // real pending queue (not the full original batch) after each wave.
+            host.currentTasks = neverStarted;
             // ── Overlay merge outcomes into wave history ──
             const failedMergeBranches = new Set(swarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
             const tasks = swarm.agents.map(a => {
@@ -311,14 +310,16 @@ export async function runWaveLoop(host, ctx) {
                     ctx.display.appendSteeringEvent(`Post-wave review: ${reviewResult.completed} done${reviewResult.failed > 0 ? ` / ${reviewResult.failed} failed` : ""}`);
                 }
             }
-            if (!ctx.flex || host.remaining <= 0 || swarm.aborted || swarm.cappedOut)
+            if (host.remaining <= 0 || swarm.aborted || swarm.cappedOut)
                 break;
-            // ── Steering ──
+            if (!ctx.flex && !ctx.runVerifier)
+                break;
+            // ── Transition: steering (flex) or verifier (no-flex) ──
             ctx.syncRunInfo();
             ctx.display.setSteering(ctx.rlGetter, ctx.buildSteeringContext());
             ctx.display.resume();
-            const steered = await ctx.runSteering();
-            if (!steered)
+            const transitioned = ctx.flex ? await ctx.runSteering() : await ctx.runVerifier();
+            if (!transitioned)
                 break;
             host.waveNum++;
         } // end inner while

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.25.46",
+  "version": "1.25.47",
   "description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
   "type": "module",
   "bin": {

package/plugins/claude-overnight/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.25.46",
+  "version": "1.25.47",
   "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs  -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
   "author": {
     "name": "Francesco Fornace"