claude-overnight 1.12.0 → 1.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/run.js CHANGED
@@ -8,7 +8,7 @@ import { getTotalPlannerCost, getPlannerRateLimitInfo, runPlannerQuery } from ".
8
8
  import { RunDisplay } from "./ui.js";
9
9
  import { renderSummary } from "./render.js";
10
10
  import { fmtTokens } from "./render.js";
11
- import { isAuthError } from "./cli.js";
11
+ import { isAuthError, selectKey, ask } from "./cli.js";
12
12
  import { readRunMemory, writeStatus, writeGoalUpdate, saveRunState, saveWaveSession, loadWaveHistory, recordBranches, archiveMilestone, writeSteerInbox, consumeSteerInbox, countSteerInbox, appendOvernightLogStart, updateOvernightLogEnd, } from "./state.js";
13
13
  export async function executeRun(cfg) {
14
14
  const restore = () => { try {
@@ -31,6 +31,7 @@ export async function executeRun(cfg) {
31
31
  let accCost, accCompleted, accFailed, accTools;
32
32
  let accIn = 0, accOut = 0;
33
33
  let lastCapped = false, lastAborted = false, objectiveComplete = false, lastHealed = false;
34
+ let lastEstimate;
34
35
  const branches = [];
35
36
  if (cfg.resuming && cfg.resumeState) {
36
37
  const rs = cfg.resumeState;
@@ -216,6 +217,8 @@ export async function executeRun(cfg) {
216
217
  writeStatus(runDir, steer.statusUpdate);
217
218
  if (steer.goalUpdate)
218
219
  writeGoalUpdate(runDir, steer.goalUpdate);
220
+ if (typeof steer.estimatedSessionsRemaining === "number")
221
+ lastEstimate = steer.estimatedSessionsRemaining;
219
222
  const steerDir = join(runDir, "steering");
220
223
  mkdirSync(steerDir, { recursive: true });
221
224
  writeFileSync(join(steerDir, `wave-${waveNum}-attempt-${steerAttempts}.json`), JSON.stringify({
@@ -283,93 +286,127 @@ export async function executeRun(cfg) {
283
286
  if (!display.runInfo.startedAt)
284
287
  display.runInfo.startedAt = cfg.runStartedAt;
285
288
  display.start();
286
- // ── Main wave loop ──
287
- while (remaining > 0 && currentTasks.length > 0 && !stopping) {
288
- if (!lastHealed) {
289
- const healTask = checkProjectHealth(cwd);
290
- if (healTask && remaining > 0) {
291
- lastHealed = true;
292
- currentTasks = [healTask];
289
+ // ── Main wave loop (wrapped so exhaustion can prompt for an extension) ──
290
+ let runAnotherRound = true;
291
+ while (runAnotherRound) {
292
+ runAnotherRound = false;
293
+ while (remaining > 0 && currentTasks.length > 0 && !stopping) {
294
+ if (!lastHealed) {
295
+ const healTask = checkProjectHealth(cwd);
296
+ if (healTask && remaining > 0) {
297
+ lastHealed = true;
298
+ currentTasks = [healTask];
299
+ }
293
300
  }
301
+ else {
302
+ lastHealed = false;
303
+ }
304
+ if (currentTasks.length > remaining)
305
+ currentTasks = currentTasks.slice(0, remaining);
306
+ syncRunInfo();
307
+ const swarm = new Swarm({
308
+ tasks: currentTasks, concurrency, cwd, model: workerModel, permissionMode, allowedTools,
309
+ useWorktrees, mergeStrategy: waveMerge, agentTimeoutMs: cfg.agentTimeoutMs,
310
+ usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
311
+ baseCostUsd: accCost,
312
+ });
313
+ currentSwarm = swarm;
314
+ display.setWave(swarm);
315
+ display.resume();
316
+ try {
317
+ await swarm.run();
318
+ }
319
+ catch (err) {
320
+ if (isAuthError(err)) {
321
+ display.stop();
322
+ restore();
323
+ console.error(chalk.red(`\n Authentication failed — check your API key or run: claude auth\n`));
324
+ process.exit(1);
325
+ }
326
+ throw err;
327
+ }
328
+ display.pause();
329
+ console.log(renderSummary(swarm));
330
+ accCost += swarm.totalCostUsd;
331
+ accIn += swarm.totalInputTokens;
332
+ accOut += swarm.totalOutputTokens;
333
+ accCompleted += swarm.completed;
334
+ accFailed += swarm.failed;
335
+ accTools += swarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
336
+ remaining = Math.max(0, remaining - swarm.completed - swarm.failed);
337
+ const totalConsumed = accCompleted + accFailed + cfg.thinkingUsed;
338
+ const expectedFloor = Math.max(0, cfg.budget - totalConsumed);
339
+ if (remaining < expectedFloor)
340
+ remaining = expectedFloor;
341
+ if (liveConfig.dirty) {
342
+ remaining = liveConfig.remaining;
343
+ usageCap = liveConfig.usageCap;
344
+ liveConfig.dirty = false;
345
+ }
346
+ liveConfig.remaining = remaining;
347
+ lastCapped = swarm.cappedOut;
348
+ lastAborted = swarm.aborted;
349
+ recordBranches(swarm.agents, swarm.mergeResults, branches);
350
+ saveWaveSession(runDir, waveNum, swarm.agents, swarm.totalCostUsd);
351
+ // Tasks that never made it into the swarm (queue cleared on abort/cap)
352
+ // are preserved as currentTasks so resume picks them up. Budget for these
353
+ // wasn't decremented (only attempted agents were), so no refund needed.
354
+ const attemptedPrompts = new Set(swarm.agents.map(a => a.task.prompt));
355
+ const neverStarted = currentTasks.filter(t => !attemptedPrompts.has(t.prompt));
356
+ saveRunState(runDir, {
357
+ id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: cfg.budget,
358
+ remaining, workerModel, plannerModel, concurrency, permissionMode,
359
+ usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
360
+ flex, useWorktrees, mergeStrategy, waveNum, currentTasks: neverStarted,
361
+ accCost, accCompleted, accFailed, accIn, accOut, accTools,
362
+ branches, phase: "steering", startedAt: new Date(cfg.runStartedAt).toISOString(), cwd,
363
+ });
364
+ waveHistory.push({
365
+ wave: waveNum,
366
+ tasks: swarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, filesChanged: a.filesChanged, error: a.error })),
367
+ });
368
+ if (!flex || remaining <= 0 || swarm.aborted || swarm.cappedOut)
369
+ break;
370
+ syncRunInfo();
371
+ display.setSteering(rlGetter, buildSteeringContext());
372
+ display.resume();
373
+ const steered = await runSteering();
374
+ if (!steered)
375
+ break;
376
+ waveNum++;
294
377
  }
295
- else {
296
- lastHealed = false;
297
- }
298
- if (currentTasks.length > remaining)
299
- currentTasks = currentTasks.slice(0, remaining);
300
- syncRunInfo();
301
- const swarm = new Swarm({
302
- tasks: currentTasks, concurrency, cwd, model: workerModel, permissionMode, allowedTools,
303
- useWorktrees, mergeStrategy: waveMerge, agentTimeoutMs: cfg.agentTimeoutMs,
304
- usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
305
- baseCostUsd: accCost,
306
- });
307
- currentSwarm = swarm;
308
- display.setWave(swarm);
309
- display.resume();
310
- try {
311
- await swarm.run();
312
- }
313
- catch (err) {
314
- if (isAuthError(err)) {
378
+ display.stop();
379
+ // ── Budget-exhausted: offer to extend with the same settings ──
380
+ const exhaustedByBudget = !objectiveComplete && !stopping && !lastAborted && !lastCapped &&
381
+ remaining <= 0 && !!process.stdin.isTTY;
382
+ if (exhaustedByBudget) {
383
+ const ext = await promptBudgetExtension({
384
+ estimate: lastEstimate,
385
+ spent: accCost,
386
+ sessionsUsed: accCompleted + accFailed + cfg.thinkingUsed,
387
+ budget: cfg.budget,
388
+ });
389
+ if (ext > 0) {
390
+ remaining = ext;
391
+ cfg.budget += ext;
392
+ lastCapped = false;
393
+ lastAborted = false;
394
+ runInfoRef.sessionsBudget = cfg.budget;
395
+ runInfoRef.remaining = remaining;
396
+ liveConfig.remaining = remaining;
397
+ liveConfig.usageCap = usageCap;
398
+ display.setSteering(rlGetter, buildSteeringContext());
399
+ display.start();
400
+ const steered = await runSteering();
401
+ if (steered) {
402
+ waveNum++;
403
+ runAnotherRound = true;
404
+ continue;
405
+ }
315
406
  display.stop();
316
- restore();
317
- console.error(chalk.red(`\n Authentication failed — check your API key or run: claude auth\n`));
318
- process.exit(1);
319
407
  }
320
- throw err;
321
408
  }
322
- display.pause();
323
- console.log(renderSummary(swarm));
324
- accCost += swarm.totalCostUsd;
325
- accIn += swarm.totalInputTokens;
326
- accOut += swarm.totalOutputTokens;
327
- accCompleted += swarm.completed;
328
- accFailed += swarm.failed;
329
- accTools += swarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
330
- remaining = Math.max(0, remaining - swarm.completed - swarm.failed);
331
- const totalConsumed = accCompleted + accFailed + cfg.thinkingUsed;
332
- const expectedFloor = Math.max(0, cfg.budget - totalConsumed);
333
- if (remaining < expectedFloor)
334
- remaining = expectedFloor;
335
- if (liveConfig.dirty) {
336
- remaining = liveConfig.remaining;
337
- usageCap = liveConfig.usageCap;
338
- liveConfig.dirty = false;
339
- }
340
- liveConfig.remaining = remaining;
341
- lastCapped = swarm.cappedOut;
342
- lastAborted = swarm.aborted;
343
- recordBranches(swarm.agents, swarm.mergeResults, branches);
344
- saveWaveSession(runDir, waveNum, swarm.agents, swarm.totalCostUsd);
345
- // Tasks that never made it into the swarm (queue cleared on abort/cap)
346
- // are preserved as currentTasks so resume picks them up. Budget for these
347
- // wasn't decremented (only attempted agents were), so no refund needed.
348
- const attemptedPrompts = new Set(swarm.agents.map(a => a.task.prompt));
349
- const neverStarted = currentTasks.filter(t => !attemptedPrompts.has(t.prompt));
350
- saveRunState(runDir, {
351
- id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: cfg.budget,
352
- remaining, workerModel, plannerModel, concurrency, permissionMode,
353
- usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
354
- flex, useWorktrees, mergeStrategy, waveNum, currentTasks: neverStarted,
355
- accCost, accCompleted, accFailed, accIn, accOut, accTools,
356
- branches, phase: "steering", startedAt: new Date(cfg.runStartedAt).toISOString(), cwd,
357
- });
358
- waveHistory.push({
359
- wave: waveNum,
360
- tasks: swarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, filesChanged: a.filesChanged, error: a.error })),
361
- });
362
- if (!flex || remaining <= 0 || swarm.aborted || swarm.cappedOut)
363
- break;
364
- syncRunInfo();
365
- display.setSteering(rlGetter, buildSteeringContext());
366
- display.resume();
367
- const steered = await runSteering();
368
- if (!steered)
369
- break;
370
- waveNum++;
371
- }
372
- display.stop();
409
+ } // end outer extension loop
373
410
  // ── Finalize ──
374
411
  const trulyDone = objectiveComplete || (!flex && remaining <= 0);
375
412
  const wasCapped = lastCapped || lastAborted;
@@ -479,6 +516,37 @@ export async function executeRun(cfg) {
479
516
  if (lastAborted || accCompleted === 0)
480
517
  process.exit(2);
481
518
  }
519
+ async function promptBudgetExtension(ctx) {
520
+ const avg = ctx.sessionsUsed > 0 ? ctx.spent / ctx.sessionsUsed : 0;
521
+ const base = ctx.estimate && ctx.estimate > 0
522
+ ? ctx.estimate
523
+ : Math.max(10, Math.round(ctx.budget * 0.2));
524
+ // Wiggle room: 30% buffer, minimum 10, rounded up to a nearest-5.
525
+ const withBuffer = Math.max(10, Math.ceil(base * 1.3));
526
+ const suggested = Math.ceil(withBuffer / 5) * 5;
527
+ const estCost = avg > 0 ? ` · ~$${(suggested * avg).toFixed(2)}` : "";
528
+ const estLine = ctx.estimate != null
529
+ ? chalk.dim(` Planner estimate: ${ctx.estimate} sessions to complete${avg > 0 ? ` (~$${(ctx.estimate * avg).toFixed(2)} at $${avg.toFixed(2)}/session)` : ""}`)
530
+ : chalk.dim(` No planner estimate available — using default${avg > 0 ? ` (~$${avg.toFixed(2)}/session)` : ""}`);
531
+ console.log("");
532
+ console.log(chalk.yellow(` Budget exhausted — run not yet complete.`));
533
+ console.log(estLine);
534
+ console.log(chalk.dim(` Continue with ${chalk.bold.white(String(suggested))} more sessions${estCost}? Everything stays the same — just hit enter.`));
535
+ const action = await selectKey("", [
536
+ { key: "y", desc: "es (↵)" },
537
+ { key: "c", desc: "ustom" },
538
+ { key: "n", desc: "o — stop here" },
539
+ ]);
540
+ if (action === "y")
541
+ return suggested;
542
+ if (action === "n")
543
+ return 0;
544
+ const custom = await ask(` How many more sessions? ${chalk.dim(`[${suggested}]: `)}`);
545
+ const n = parseInt(custom);
546
+ if (isNaN(n) || n <= 0)
547
+ return suggested;
548
+ return n;
549
+ }
482
550
  function checkProjectHealth(cwd) {
483
551
  let pkg;
484
552
  try {
package/dist/steering.js CHANGED
@@ -9,6 +9,7 @@ const STEER_SCHEMA = {
9
9
  reasoning: { type: "string" },
10
10
  statusUpdate: { type: "string" },
11
11
  goalUpdate: { type: "string" },
12
+ estimatedSessionsRemaining: { type: "number" },
12
13
  tasks: {
13
14
  type: "array",
14
15
  items: {
@@ -18,7 +19,7 @@ const STEER_SCHEMA = {
18
19
  },
19
20
  },
20
21
  },
21
- required: ["done", "tasks", "reasoning", "statusUpdate"],
22
+ required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
22
23
  },
23
24
  };
24
25
  export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog, runMemory) {
@@ -96,6 +97,7 @@ Respond with ONLY a JSON object (no markdown fences):
96
97
  "reasoning": "your assessment and why you chose this wave composition",
97
98
  "goalUpdate": "optional — refine what 'amazing' means as you learn more",
98
99
  "statusUpdate": "REQUIRED — concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
100
+ "estimatedSessionsRemaining": 15,
99
101
  "tasks": [
100
102
  {"prompt": "task instruction...", "model": "worker"},
101
103
  {"prompt": "review task...", "model": "planner"},
@@ -103,10 +105,12 @@ Respond with ONLY a JSON object (no markdown fences):
103
105
  ]
104
106
  }
105
107
 
108
+ "estimatedSessionsRemaining" is REQUIRED. Your best honest estimate of how many MORE agent sessions (beyond the wave you just composed above) are needed to reach 'amazing' — include follow-up fixes, polish, verification, and anything else you'd want before shipping. Be realistic, not optimistic. Use 0 only if truly done.
109
+
106
110
  The "model" field on each task: use "worker" (${workerModel}) for implementation tasks, "planner" (${plannerModel}) for review/analysis/verification tasks. Default is "worker".
107
111
  Set "noWorktree": true for verify/user-test tasks — they need the real project directory with env files, dependencies, and local config.
108
112
 
109
- If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "tasks": []}`;
113
+ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSessionsRemaining": 0, "tasks": []}`;
110
114
  onLog("Assessing...", "status");
111
115
  onLog(`Reading codebase — wave ${history.length + 1}`, "event");
112
116
  const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA }, onLog);
@@ -124,8 +128,10 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "tasks": []}`
124
128
  })();
125
129
  const isDone = parsed.done === true;
126
130
  const statusUpdate = parsed.statusUpdate || undefined;
131
+ const estRaw = parsed.estimatedSessionsRemaining;
132
+ const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
127
133
  if (isDone) {
128
- return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete", goalUpdate: parsed.goalUpdate, statusUpdate };
134
+ return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete", goalUpdate: parsed.goalUpdate, statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0 };
129
135
  }
130
136
  let tasks = (parsed.tasks || []).map((t, i) => ({
131
137
  id: String(i),
@@ -134,5 +140,5 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "tasks": []}`
134
140
  ...(t.noWorktree && { noWorktree: true }),
135
141
  }));
136
142
  tasks = postProcess(tasks, remainingBudget, onLog);
137
- return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "", goalUpdate: parsed.goalUpdate, statusUpdate };
143
+ return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "", goalUpdate: parsed.goalUpdate, statusUpdate, estimatedSessionsRemaining };
138
144
  }
package/dist/types.d.ts CHANGED
@@ -138,6 +138,7 @@ export interface SteerResult {
138
138
  reasoning: string;
139
139
  goalUpdate?: string;
140
140
  statusUpdate?: string;
141
+ estimatedSessionsRemaining?: number;
141
142
  }
142
143
  /** Accumulated run memory — designs, verifications, etc. — fed to the steerer. */
143
144
  export interface RunMemory {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.12.0",
3
+ "version": "1.13.1",
4
4
  "description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
5
5
  "type": "module",
6
6
  "bin": {