claude-overnight 1.25.46 → 1.25.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/help.js CHANGED
@@ -16,6 +16,7 @@ export function printHelp() {
16
16
  ${chalk.cyan("Usage")}
17
17
  claude-overnight ${chalk.dim("interactive mode")}
18
18
  claude-overnight tasks.json ${chalk.dim("task file mode")}
19
+ claude-overnight plan.md ${chalk.dim("plan file mode (.md) — coach + flex")}
19
20
  claude-overnight "fix auth" "add tests" ${chalk.dim("inline tasks")}
20
21
 
21
22
  ${chalk.cyan("Flags")}
@@ -30,7 +31,8 @@ export function printHelp() {
30
31
  --allow-extra-usage Allow extra/overage usage ${chalk.dim("(default: stop when plan limits hit)")}
31
32
  --extra-usage-budget=N Max $ for extra usage ${chalk.dim("(implies --allow-extra-usage)")}
32
33
  --timeout=SECONDS Agent inactivity timeout ${chalk.dim("(default: 900s, nudges at timeout, kills at 2×)")}
33
- --no-flex Disable adaptive multi-wave planning ${chalk.dim("(run all tasks in one shot)")}
34
+ --flex Force adaptive multi-wave planning ${chalk.dim("(steering between waves)")}
35
+ --no-flex Fixed plan mode ${chalk.dim("(verifier between waves, no re-planning)")}
34
36
  --worktrees Force worktree isolation on ${chalk.dim("(default: auto-detect git repo)")}
35
37
  --no-worktrees Disable worktree isolation ${chalk.dim("(all agents work in real cwd)")}
36
38
  --merge=MODE Merge strategy: yolo or branch ${chalk.dim("(default: yolo)")}
@@ -1 +1 @@
1
- export declare const VERSION = "1.25.46";
1
+ export declare const VERSION = "1.25.47";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.25.46";
2
+ export const VERSION = "1.25.47";
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import { setPlannerEnvResolver } from "./planner/query.js";
8
8
  import { setTranscriptRunDir } from "./core/transcripts.js";
9
9
  import { pickModel, loadProviders, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, warnMacCursorAgentShellPatchIfNeeded, } from "./providers/index.js";
10
10
  import { executeRun } from "./run/run.js";
11
- import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
11
+ import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, loadPlanFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
12
12
  import { loadRunState, findOrphanedDesigns, backfillOrphanedPlans, readPreviousRunKnowledge, createRunDir, updateLatestSymlink, } from "./state/state.js";
13
13
  import { runSetupCoach, loadUserSettings, saveUserSettings, COACH_MODEL } from "./planner/coach/coach.js";
14
14
  import { editRunSettings, formatSettingsSummary } from "./cli/settings.js";
@@ -63,11 +63,21 @@ async function main() {
63
63
  // ── Load tasks ──
64
64
  let tasks = [];
65
65
  let fileCfg;
66
+ let planFileContent;
66
67
  const jsonFiles = args.filter(a => a.endsWith(".json"));
68
+ const mdFiles = args.filter(a => a.endsWith(".md"));
67
69
  if (jsonFiles.length > 1) {
68
70
  console.error(chalk.red(` Multiple task files provided. Only one .json file is supported.`));
69
71
  process.exit(1);
70
72
  }
73
+ if (mdFiles.length > 1) {
74
+ console.error(chalk.red(` Multiple plan files provided. Only one .md file is supported.`));
75
+ process.exit(1);
76
+ }
77
+ if (jsonFiles.length && mdFiles.length) {
78
+ console.error(chalk.red(` Cannot mix a .json task file with a .md plan file.`));
79
+ process.exit(1);
80
+ }
71
81
  for (const arg of args) {
72
82
  if (arg.endsWith(".json")) {
73
83
  if (tasks.length > 0) {
@@ -77,8 +87,13 @@ async function main() {
77
87
  fileCfg = loadTaskFile(arg);
78
88
  tasks = fileCfg.tasks;
79
89
  }
90
+ else if (arg.endsWith(".md")) {
91
+ const plan = loadPlanFile(arg);
92
+ planFileContent = plan.planContent;
93
+ fileCfg = { tasks: [], objective: plan.objective, flexiblePlan: true };
94
+ }
80
95
  else if (!arg.startsWith("-") && existsSync(resolve(arg))) {
81
- console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json. Rename it or quote the string.`));
96
+ console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json or .md. Rename it or quote the string.`));
82
97
  process.exit(1);
83
98
  }
84
99
  else {
@@ -341,6 +356,30 @@ async function main() {
341
356
  console.log(chalk.dim(` ╰${"─".repeat(innerLen + 4)}╯`));
342
357
  }
343
358
  else {
359
+ // ── Setup coach in confirm-only mode (task/plan file on a TTY) ──
360
+ let coachResult = null;
361
+ if (fileCfg?.objective && process.stdin.isTTY
362
+ && !argv.includes("--no-coach") && !loadUserSettings().skipCoach) {
363
+ const settings = loadUserSettings();
364
+ const cModel = settings.coachModel ?? COACH_MODEL;
365
+ const cProvider = settings.coachProviderId
366
+ ? loadProviders().find(p => p.id === settings.coachProviderId) : undefined;
367
+ coachResult = await runSetupCoach(fileCfg.objective, cwd, {
368
+ providers: loadProviders(), cliFlags, coachModel: cModel, coachProvider: cProvider,
369
+ planContent: planFileContent, confirmOnly: true,
370
+ });
371
+ if (coachResult) {
372
+ coachedOriginal = fileCfg.objective;
373
+ coachedAt = Date.now();
374
+ fileCfg.objective = coachResult.improvedObjective;
375
+ objective = coachResult.improvedObjective;
376
+ const rec = coachResult.recommended;
377
+ if (fileCfg.concurrency == null)
378
+ fileCfg.concurrency = rec.concurrency;
379
+ if (fileCfg.usageCap == null && rec.usageCap != null)
380
+ fileCfg.usageCap = Math.round(rec.usageCap * 100);
381
+ }
382
+ }
344
383
  let models = [];
345
384
  if (!cliFlags.model && !fileCfg?.model)
346
385
  models = await fetchModels(5_000);
@@ -374,7 +413,7 @@ async function main() {
374
413
  }
375
414
  }
376
415
  concurrency = cliFlags.concurrency ? parseInt(cliFlags.concurrency) : (fileCfg?.concurrency ?? 5);
377
- budget = cliFlags.budget ? parseInt(cliFlags.budget) : undefined;
416
+ budget = cliFlags.budget ? parseInt(cliFlags.budget) : coachResult?.recommended.budget;
378
417
  if (budget != null && (isNaN(budget) || budget < 1)) {
379
418
  console.error(chalk.red(` --budget must be a positive integer`));
380
419
  process.exit(1);
@@ -442,7 +481,8 @@ async function main() {
442
481
  console.log(chalk.dim(` ${workerModel} concurrency=${concurrency} worktrees=${useWorktrees} merge=${mergeStrategy}${capStr}${extraStr}`));
443
482
  }
444
483
  // ── Plan phase ──
445
- const flex = !argv.includes("--no-flex") && (fileCfg?.flexiblePlan ?? objective != null) && objective != null && (budget ?? 10) > 2;
484
+ const flexFlag = argv.includes("--flex") ? true : argv.includes("--no-flex") ? false : undefined;
485
+ const flex = objective != null && (flexFlag ?? ((fileCfg?.flexiblePlan ?? true) && (budget ?? 10) > 2));
446
486
  const agentTimeoutMs = cliFlags.timeout ? parseFloat(cliFlags.timeout) * 1000 : undefined;
447
487
  let thinkingUsed = 0, thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
448
488
  let thinkingHistory;
@@ -0,0 +1,66 @@
1
+ import type { Task, SteerResult, WaveSummary } from "../core/types.js";
2
+ import { type PlannerLog } from "./query.js";
3
+ export declare const VERIFY_SCHEMA: {
4
+ type: "json_schema";
5
+ schema: {
6
+ type: string;
7
+ properties: {
8
+ done: {
9
+ type: string;
10
+ };
11
+ reasoning: {
12
+ type: string;
13
+ };
14
+ statusUpdate: {
15
+ type: string;
16
+ };
17
+ estimatedSessionsRemaining: {
18
+ type: string;
19
+ };
20
+ verifiedCount: {
21
+ type: string;
22
+ };
23
+ retryCount: {
24
+ type: string;
25
+ };
26
+ tasks: {
27
+ type: string;
28
+ items: {
29
+ type: string;
30
+ properties: {
31
+ prompt: {
32
+ type: string;
33
+ };
34
+ model: {
35
+ type: string;
36
+ };
37
+ noWorktree: {
38
+ type: string;
39
+ };
40
+ type: {
41
+ type: string;
42
+ enum: string[];
43
+ };
44
+ postcondition: {
45
+ type: string;
46
+ };
47
+ };
48
+ required: string[];
49
+ };
50
+ };
51
+ };
52
+ required: string[];
53
+ };
54
+ };
55
+ /**
56
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
57
+ *
58
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
59
+ * 1. Runs the project's build/smoke checks.
60
+ * 2. Fixes shallow regressions in the last wave (edits directly).
61
+ * 3. Picks the next N pending tasks from the user's fixed plan.
62
+ *
63
+ * The model has full tool access so it can actually repair broken commits,
64
+ * not just report on them.
65
+ */
66
+ export declare function verifyWave(objective: string, pendingTasks: Task[], lastWave: WaveSummary | undefined, remainingBudget: number, cwd: string, plannerModel: string, concurrency: number, onLog: PlannerLog, transcriptName?: string): Promise<SteerResult>;
@@ -0,0 +1,117 @@
1
+ import { runPlannerQuery, attemptJsonParse, postProcess } from "./query.js";
2
+ import { createTurn, beginTurn, endTurn } from "../core/turns.js";
3
+ // Verifier schema — same shape as STEER_SCHEMA plus a `verifiedIds` list so
4
+ // the wave-loop can tell which of the prior wave's tasks actually shipped.
5
+ export const VERIFY_SCHEMA = {
6
+ type: "json_schema",
7
+ schema: {
8
+ type: "object",
9
+ properties: {
10
+ done: { type: "boolean" },
11
+ reasoning: { type: "string" },
12
+ statusUpdate: { type: "string" },
13
+ estimatedSessionsRemaining: { type: "number" },
14
+ verifiedCount: { type: "number" },
15
+ retryCount: { type: "number" },
16
+ tasks: {
17
+ type: "array",
18
+ items: {
19
+ type: "object",
20
+ properties: {
21
+ prompt: { type: "string" },
22
+ model: { type: "string" },
23
+ noWorktree: { type: "boolean" },
24
+ type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] },
25
+ postcondition: { type: "string" },
26
+ },
27
+ required: ["prompt"],
28
+ },
29
+ },
30
+ },
31
+ required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
32
+ },
33
+ };
34
+ function renderLastWave(w) {
35
+ if (!w)
36
+ return "(first wave — nothing to verify yet)";
37
+ const lines = w.tasks.map(t => {
38
+ const files = t.filesChanged ? ` (${t.filesChanged} files)` : " (0 files)";
39
+ const err = t.error ? ` — ${t.error}` : "";
40
+ return ` - [${t.status}] ${t.prompt.slice(0, 160)}${files}${err}`;
41
+ }).join("\n");
42
+ return `Wave ${w.wave + 1}:\n${lines}`;
43
+ }
44
+ /**
45
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
46
+ *
47
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
48
+ * 1. Runs the project's build/smoke checks.
49
+ * 2. Fixes shallow regressions in the last wave (edits directly).
50
+ * 3. Picks the next N pending tasks from the user's fixed plan.
51
+ *
52
+ * The model has full tool access so it can actually repair broken commits,
53
+ * not just report on them.
54
+ */
55
+ export async function verifyWave(objective, pendingTasks, lastWave, remainingBudget, cwd, plannerModel, concurrency, onLog, transcriptName = "verify") {
56
+ const pendingList = pendingTasks.length > 0
57
+ ? pendingTasks.map((t, i) => ` ${i + 1}. ${t.prompt.slice(0, 200)}`).join("\n")
58
+ : "(none — every task from the original plan has been attempted)";
59
+ const prompt = `You are the verifier + fix gate between waves of a fixed-plan execution.
60
+
61
+ Objective: ${objective}
62
+
63
+ ## What just happened
64
+ ${renderLastWave(lastWave)}
65
+
66
+ ## Remaining plan (pending tasks, in order)
67
+ ${pendingList}
68
+
69
+ ## Your job
70
+
71
+ 1. Run the project's build and smoke checks. Use the tools you have (Bash, Read, Grep, Edit, Write).
72
+ 2. For any regression the last wave introduced, make the fix directly. Don't delegate a fix to the next wave if you can do it in two edits.
73
+ 3. Compose the next batch of pending tasks to dispatch — pick tasks with non-overlapping file scopes so ${concurrency} can run in parallel.
74
+ 4. If the plan is complete AND the build passes AND one verify task has confirmed the app runs, set done=true.
75
+
76
+ ## Output
77
+
78
+ Respond with ONLY a JSON object (no markdown fences):
79
+ {"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"verifiedCount":N,"retryCount":N,"tasks":[{"prompt":"...","type":"execute","postcondition":"..."}]}
80
+
81
+ Remaining budget: ${remainingBudget} agent sessions. Include retries inside tasks[] (same format) if a pending step needs a second attempt with corrected context.`;
82
+ onLog("Verifying last wave…", "status");
83
+ const turn = createTurn("steer", `Verify wave`, `verify-${lastWave?.wave ?? 0}`, plannerModel);
84
+ beginTurn(turn);
85
+ const resultText = await runPlannerQuery(prompt, {
86
+ cwd, model: plannerModel, outputFormat: VERIFY_SCHEMA,
87
+ transcriptName, turnId: turn.id, maxTurns: 80,
88
+ }, onLog);
89
+ const parsed = attemptJsonParse(resultText);
90
+ if (!parsed) {
91
+ endTurn(turn, "error");
92
+ throw new Error(`Could not parse verifier response (${resultText.length} chars): ${resultText.slice(0, 120)}`);
93
+ }
94
+ const isDone = parsed.done === true;
95
+ const statusUpdate = parsed.statusUpdate || undefined;
96
+ const estRaw = parsed.estimatedSessionsRemaining;
97
+ const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
98
+ let tasks = (parsed.tasks || []).map((t, i) => ({
99
+ id: String(i),
100
+ prompt: typeof t === "string" ? t : t.prompt,
101
+ ...(t.noWorktree && { noWorktree: true }),
102
+ ...(t.type && { type: t.type }),
103
+ ...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
104
+ }));
105
+ tasks = postProcess(tasks, remainingBudget, onLog);
106
+ endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");
107
+ if (isDone) {
108
+ return {
109
+ done: true, tasks: [], reasoning: parsed.reasoning || "Plan complete and verified",
110
+ statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0,
111
+ };
112
+ }
113
+ return {
114
+ done: tasks.length === 0, tasks,
115
+ reasoning: parsed.reasoning || "", statusUpdate, estimatedSessionsRemaining,
116
+ };
117
+ }
package/dist/run/run.js CHANGED
@@ -3,6 +3,7 @@ import { join } from "path";
3
3
  import { execSync } from "child_process";
4
4
  import chalk from "chalk";
5
5
  import { steerWave, STEER_SCHEMA } from "../planner/steering.js";
6
+ import { verifyWave } from "../planner/verifier.js";
6
7
  import { getTotalPlannerCost, getPlannerRateLimitInfo, runPlannerQuery, setPlannerEnvResolver, attemptJsonParse } from "../planner/query.js";
7
8
  import { buildEnvResolver, isCursorProxyProvider } from "../providers/index.js";
8
9
  import { RunDisplay } from "../ui/ui.js";
@@ -397,6 +398,33 @@ export async function executeRun(cfg) {
397
398
  }
398
399
  return steered;
399
400
  };
401
+ // In non-flex mode with an objective, the verifier runs between waves instead of the steerer.
402
+ const runVerifier = async () => {
403
+ if (!objective)
404
+ return false;
405
+ const plannerCostBefore = getTotalPlannerCost();
406
+ try {
407
+ const result = await verifyWave(objective, currentTasks, waveHistory[waveHistory.length - 1], remaining, cwd, plannerModel, concurrency, steeringLog, `verify-wave-${waveNum}`);
408
+ accCost += getTotalPlannerCost() - plannerCostBefore;
409
+ syncRunInfo();
410
+ if (result.statusUpdate)
411
+ writeStatus(runDir, result.statusUpdate);
412
+ if (typeof result.estimatedSessionsRemaining === "number")
413
+ lastEstimate = result.estimatedSessionsRemaining;
414
+ if (result.done || result.tasks.length === 0) {
415
+ objectiveComplete = result.done;
416
+ remaining = 0;
417
+ return false;
418
+ }
419
+ currentTasks = result.tasks;
420
+ return true;
421
+ }
422
+ catch (err) {
423
+ accCost += getTotalPlannerCost() - plannerCostBefore;
424
+ display.appendSteeringEvent(`Verifier failed: ${err?.message?.slice(0, 200) || "(no details)"}`);
425
+ return false;
426
+ }
427
+ };
400
428
  // Resume: steer immediately if no queued tasks
401
429
  if (cfg.resuming && flex && currentTasks.length === 0 && remaining > 0) {
402
430
  display.setSteering(rlGetter, buildSteeringContext());
@@ -465,6 +493,7 @@ export async function executeRun(cfg) {
465
493
  lastEstimate,
466
494
  display,
467
495
  runSteering,
496
+ runVerifier,
468
497
  buildSteeringContext,
469
498
  rlGetter,
470
499
  isStopping: () => stopping,
@@ -46,6 +46,8 @@ export interface WaveLoopCtx {
46
46
  lastEstimate: number | undefined;
47
47
  display: RunDisplay;
48
48
  runSteering: () => Promise<boolean>;
49
+ /** Verifier invoked between waves in no-flex mode. Mirrors runSteering's contract. */
50
+ runVerifier?: () => Promise<boolean>;
49
51
  buildSteeringContext: () => SteeringContext;
50
52
  rlGetter: RLGetter;
51
53
  isStopping: () => boolean;
@@ -154,10 +154,9 @@ export async function runWaveLoop(host, ctx) {
154
154
  // work the user expects to see on resume — save them under "stopped".
155
155
  const midWavePhase = (ctx.isStopping() || swarm.aborted) ? "stopped" : "steering";
156
156
  saveRunState(ctx.runDir, buildRunState(host, midWavePhase, neverStarted));
157
- // Preserve the leftover tasks on the host so the outer run loop's final
158
- // saveRunState writes them (instead of []), and resume has something to load.
159
- if (midWavePhase === "stopped")
160
- host.currentTasks = neverStarted;
157
+ // Preserve the leftover tasks on the host so resume / verifier see the
158
+ // real pending queue (not the full original batch) after each wave.
159
+ host.currentTasks = neverStarted;
161
160
  // ── Overlay merge outcomes into wave history ──
162
161
  const failedMergeBranches = new Set(swarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
163
162
  const tasks = swarm.agents.map(a => {
@@ -311,14 +310,16 @@ export async function runWaveLoop(host, ctx) {
311
310
  ctx.display.appendSteeringEvent(`Post-wave review: ${reviewResult.completed} done${reviewResult.failed > 0 ? ` / ${reviewResult.failed} failed` : ""}`);
312
311
  }
313
312
  }
314
- if (!ctx.flex || host.remaining <= 0 || swarm.aborted || swarm.cappedOut)
313
+ if (host.remaining <= 0 || swarm.aborted || swarm.cappedOut)
315
314
  break;
316
- // ── Steering ──
315
+ if (!ctx.flex && !ctx.runVerifier)
316
+ break;
317
+ // ── Transition: steering (flex) or verifier (no-flex) ──
317
318
  ctx.syncRunInfo();
318
319
  ctx.display.setSteering(ctx.rlGetter, ctx.buildSteeringContext());
319
320
  ctx.display.resume();
320
- const steered = await ctx.runSteering();
321
- if (!steered)
321
+ const transitioned = ctx.flex ? await ctx.runSteering() : await ctx.runVerifier();
322
+ if (!transitioned)
322
323
  break;
323
324
  host.waveNum++;
324
325
  } // end inner while
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.25.46",
3
+ "version": "1.25.47",
4
4
  "description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.25.46",
3
+ "version": "1.25.47",
4
4
  "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
5
5
  "author": {
6
6
  "name": "Francesco Fornace"