claude-overnight 1.12.0 → 1.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/run.js +151 -83
- package/dist/steering.js +10 -4
- package/dist/types.d.ts +1 -0
- package/package.json +1 -1
package/dist/run.js
CHANGED
|
@@ -8,7 +8,7 @@ import { getTotalPlannerCost, getPlannerRateLimitInfo, runPlannerQuery } from ".
|
|
|
8
8
|
import { RunDisplay } from "./ui.js";
|
|
9
9
|
import { renderSummary } from "./render.js";
|
|
10
10
|
import { fmtTokens } from "./render.js";
|
|
11
|
-
import { isAuthError } from "./cli.js";
|
|
11
|
+
import { isAuthError, selectKey, ask } from "./cli.js";
|
|
12
12
|
import { readRunMemory, writeStatus, writeGoalUpdate, saveRunState, saveWaveSession, loadWaveHistory, recordBranches, archiveMilestone, writeSteerInbox, consumeSteerInbox, countSteerInbox, appendOvernightLogStart, updateOvernightLogEnd, } from "./state.js";
|
|
13
13
|
export async function executeRun(cfg) {
|
|
14
14
|
const restore = () => { try {
|
|
@@ -31,6 +31,7 @@ export async function executeRun(cfg) {
|
|
|
31
31
|
let accCost, accCompleted, accFailed, accTools;
|
|
32
32
|
let accIn = 0, accOut = 0;
|
|
33
33
|
let lastCapped = false, lastAborted = false, objectiveComplete = false, lastHealed = false;
|
|
34
|
+
let lastEstimate;
|
|
34
35
|
const branches = [];
|
|
35
36
|
if (cfg.resuming && cfg.resumeState) {
|
|
36
37
|
const rs = cfg.resumeState;
|
|
@@ -216,6 +217,8 @@ export async function executeRun(cfg) {
|
|
|
216
217
|
writeStatus(runDir, steer.statusUpdate);
|
|
217
218
|
if (steer.goalUpdate)
|
|
218
219
|
writeGoalUpdate(runDir, steer.goalUpdate);
|
|
220
|
+
if (typeof steer.estimatedSessionsRemaining === "number")
|
|
221
|
+
lastEstimate = steer.estimatedSessionsRemaining;
|
|
219
222
|
const steerDir = join(runDir, "steering");
|
|
220
223
|
mkdirSync(steerDir, { recursive: true });
|
|
221
224
|
writeFileSync(join(steerDir, `wave-${waveNum}-attempt-${steerAttempts}.json`), JSON.stringify({
|
|
@@ -283,93 +286,127 @@ export async function executeRun(cfg) {
|
|
|
283
286
|
if (!display.runInfo.startedAt)
|
|
284
287
|
display.runInfo.startedAt = cfg.runStartedAt;
|
|
285
288
|
display.start();
|
|
286
|
-
// ── Main wave loop ──
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
289
|
+
// ── Main wave loop (wrapped so exhaustion can prompt for an extension) ──
|
|
290
|
+
let runAnotherRound = true;
|
|
291
|
+
while (runAnotherRound) {
|
|
292
|
+
runAnotherRound = false;
|
|
293
|
+
while (remaining > 0 && currentTasks.length > 0 && !stopping) {
|
|
294
|
+
if (!lastHealed) {
|
|
295
|
+
const healTask = checkProjectHealth(cwd);
|
|
296
|
+
if (healTask && remaining > 0) {
|
|
297
|
+
lastHealed = true;
|
|
298
|
+
currentTasks = [healTask];
|
|
299
|
+
}
|
|
293
300
|
}
|
|
301
|
+
else {
|
|
302
|
+
lastHealed = false;
|
|
303
|
+
}
|
|
304
|
+
if (currentTasks.length > remaining)
|
|
305
|
+
currentTasks = currentTasks.slice(0, remaining);
|
|
306
|
+
syncRunInfo();
|
|
307
|
+
const swarm = new Swarm({
|
|
308
|
+
tasks: currentTasks, concurrency, cwd, model: workerModel, permissionMode, allowedTools,
|
|
309
|
+
useWorktrees, mergeStrategy: waveMerge, agentTimeoutMs: cfg.agentTimeoutMs,
|
|
310
|
+
usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
|
|
311
|
+
baseCostUsd: accCost,
|
|
312
|
+
});
|
|
313
|
+
currentSwarm = swarm;
|
|
314
|
+
display.setWave(swarm);
|
|
315
|
+
display.resume();
|
|
316
|
+
try {
|
|
317
|
+
await swarm.run();
|
|
318
|
+
}
|
|
319
|
+
catch (err) {
|
|
320
|
+
if (isAuthError(err)) {
|
|
321
|
+
display.stop();
|
|
322
|
+
restore();
|
|
323
|
+
console.error(chalk.red(`\n Authentication failed — check your API key or run: claude auth\n`));
|
|
324
|
+
process.exit(1);
|
|
325
|
+
}
|
|
326
|
+
throw err;
|
|
327
|
+
}
|
|
328
|
+
display.pause();
|
|
329
|
+
console.log(renderSummary(swarm));
|
|
330
|
+
accCost += swarm.totalCostUsd;
|
|
331
|
+
accIn += swarm.totalInputTokens;
|
|
332
|
+
accOut += swarm.totalOutputTokens;
|
|
333
|
+
accCompleted += swarm.completed;
|
|
334
|
+
accFailed += swarm.failed;
|
|
335
|
+
accTools += swarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
|
|
336
|
+
remaining = Math.max(0, remaining - swarm.completed - swarm.failed);
|
|
337
|
+
const totalConsumed = accCompleted + accFailed + cfg.thinkingUsed;
|
|
338
|
+
const expectedFloor = Math.max(0, cfg.budget - totalConsumed);
|
|
339
|
+
if (remaining < expectedFloor)
|
|
340
|
+
remaining = expectedFloor;
|
|
341
|
+
if (liveConfig.dirty) {
|
|
342
|
+
remaining = liveConfig.remaining;
|
|
343
|
+
usageCap = liveConfig.usageCap;
|
|
344
|
+
liveConfig.dirty = false;
|
|
345
|
+
}
|
|
346
|
+
liveConfig.remaining = remaining;
|
|
347
|
+
lastCapped = swarm.cappedOut;
|
|
348
|
+
lastAborted = swarm.aborted;
|
|
349
|
+
recordBranches(swarm.agents, swarm.mergeResults, branches);
|
|
350
|
+
saveWaveSession(runDir, waveNum, swarm.agents, swarm.totalCostUsd);
|
|
351
|
+
// Tasks that never made it into the swarm (queue cleared on abort/cap)
|
|
352
|
+
// are preserved as currentTasks so resume picks them up. Budget for these
|
|
353
|
+
// wasn't decremented (only attempted agents were), so no refund needed.
|
|
354
|
+
const attemptedPrompts = new Set(swarm.agents.map(a => a.task.prompt));
|
|
355
|
+
const neverStarted = currentTasks.filter(t => !attemptedPrompts.has(t.prompt));
|
|
356
|
+
saveRunState(runDir, {
|
|
357
|
+
id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: cfg.budget,
|
|
358
|
+
remaining, workerModel, plannerModel, concurrency, permissionMode,
|
|
359
|
+
usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
|
|
360
|
+
flex, useWorktrees, mergeStrategy, waveNum, currentTasks: neverStarted,
|
|
361
|
+
accCost, accCompleted, accFailed, accIn, accOut, accTools,
|
|
362
|
+
branches, phase: "steering", startedAt: new Date(cfg.runStartedAt).toISOString(), cwd,
|
|
363
|
+
});
|
|
364
|
+
waveHistory.push({
|
|
365
|
+
wave: waveNum,
|
|
366
|
+
tasks: swarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, filesChanged: a.filesChanged, error: a.error })),
|
|
367
|
+
});
|
|
368
|
+
if (!flex || remaining <= 0 || swarm.aborted || swarm.cappedOut)
|
|
369
|
+
break;
|
|
370
|
+
syncRunInfo();
|
|
371
|
+
display.setSteering(rlGetter, buildSteeringContext());
|
|
372
|
+
display.resume();
|
|
373
|
+
const steered = await runSteering();
|
|
374
|
+
if (!steered)
|
|
375
|
+
break;
|
|
376
|
+
waveNum++;
|
|
294
377
|
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
378
|
+
display.stop();
|
|
379
|
+
// ── Budget-exhausted: offer to extend with the same settings ──
|
|
380
|
+
const exhaustedByBudget = !objectiveComplete && !stopping && !lastAborted && !lastCapped &&
|
|
381
|
+
remaining <= 0 && !!process.stdin.isTTY;
|
|
382
|
+
if (exhaustedByBudget) {
|
|
383
|
+
const ext = await promptBudgetExtension({
|
|
384
|
+
estimate: lastEstimate,
|
|
385
|
+
spent: accCost,
|
|
386
|
+
sessionsUsed: accCompleted + accFailed + cfg.thinkingUsed,
|
|
387
|
+
budget: cfg.budget,
|
|
388
|
+
});
|
|
389
|
+
if (ext > 0) {
|
|
390
|
+
remaining = ext;
|
|
391
|
+
cfg.budget += ext;
|
|
392
|
+
lastCapped = false;
|
|
393
|
+
lastAborted = false;
|
|
394
|
+
runInfoRef.sessionsBudget = cfg.budget;
|
|
395
|
+
runInfoRef.remaining = remaining;
|
|
396
|
+
liveConfig.remaining = remaining;
|
|
397
|
+
liveConfig.usageCap = usageCap;
|
|
398
|
+
display.setSteering(rlGetter, buildSteeringContext());
|
|
399
|
+
display.start();
|
|
400
|
+
const steered = await runSteering();
|
|
401
|
+
if (steered) {
|
|
402
|
+
waveNum++;
|
|
403
|
+
runAnotherRound = true;
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
315
406
|
display.stop();
|
|
316
|
-
restore();
|
|
317
|
-
console.error(chalk.red(`\n Authentication failed — check your API key or run: claude auth\n`));
|
|
318
|
-
process.exit(1);
|
|
319
407
|
}
|
|
320
|
-
throw err;
|
|
321
408
|
}
|
|
322
|
-
|
|
323
|
-
console.log(renderSummary(swarm));
|
|
324
|
-
accCost += swarm.totalCostUsd;
|
|
325
|
-
accIn += swarm.totalInputTokens;
|
|
326
|
-
accOut += swarm.totalOutputTokens;
|
|
327
|
-
accCompleted += swarm.completed;
|
|
328
|
-
accFailed += swarm.failed;
|
|
329
|
-
accTools += swarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
|
|
330
|
-
remaining = Math.max(0, remaining - swarm.completed - swarm.failed);
|
|
331
|
-
const totalConsumed = accCompleted + accFailed + cfg.thinkingUsed;
|
|
332
|
-
const expectedFloor = Math.max(0, cfg.budget - totalConsumed);
|
|
333
|
-
if (remaining < expectedFloor)
|
|
334
|
-
remaining = expectedFloor;
|
|
335
|
-
if (liveConfig.dirty) {
|
|
336
|
-
remaining = liveConfig.remaining;
|
|
337
|
-
usageCap = liveConfig.usageCap;
|
|
338
|
-
liveConfig.dirty = false;
|
|
339
|
-
}
|
|
340
|
-
liveConfig.remaining = remaining;
|
|
341
|
-
lastCapped = swarm.cappedOut;
|
|
342
|
-
lastAborted = swarm.aborted;
|
|
343
|
-
recordBranches(swarm.agents, swarm.mergeResults, branches);
|
|
344
|
-
saveWaveSession(runDir, waveNum, swarm.agents, swarm.totalCostUsd);
|
|
345
|
-
// Tasks that never made it into the swarm (queue cleared on abort/cap)
|
|
346
|
-
// are preserved as currentTasks so resume picks them up. Budget for these
|
|
347
|
-
// wasn't decremented (only attempted agents were), so no refund needed.
|
|
348
|
-
const attemptedPrompts = new Set(swarm.agents.map(a => a.task.prompt));
|
|
349
|
-
const neverStarted = currentTasks.filter(t => !attemptedPrompts.has(t.prompt));
|
|
350
|
-
saveRunState(runDir, {
|
|
351
|
-
id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: cfg.budget,
|
|
352
|
-
remaining, workerModel, plannerModel, concurrency, permissionMode,
|
|
353
|
-
usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
|
|
354
|
-
flex, useWorktrees, mergeStrategy, waveNum, currentTasks: neverStarted,
|
|
355
|
-
accCost, accCompleted, accFailed, accIn, accOut, accTools,
|
|
356
|
-
branches, phase: "steering", startedAt: new Date(cfg.runStartedAt).toISOString(), cwd,
|
|
357
|
-
});
|
|
358
|
-
waveHistory.push({
|
|
359
|
-
wave: waveNum,
|
|
360
|
-
tasks: swarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, filesChanged: a.filesChanged, error: a.error })),
|
|
361
|
-
});
|
|
362
|
-
if (!flex || remaining <= 0 || swarm.aborted || swarm.cappedOut)
|
|
363
|
-
break;
|
|
364
|
-
syncRunInfo();
|
|
365
|
-
display.setSteering(rlGetter, buildSteeringContext());
|
|
366
|
-
display.resume();
|
|
367
|
-
const steered = await runSteering();
|
|
368
|
-
if (!steered)
|
|
369
|
-
break;
|
|
370
|
-
waveNum++;
|
|
371
|
-
}
|
|
372
|
-
display.stop();
|
|
409
|
+
} // end outer extension loop
|
|
373
410
|
// ── Finalize ──
|
|
374
411
|
const trulyDone = objectiveComplete || (!flex && remaining <= 0);
|
|
375
412
|
const wasCapped = lastCapped || lastAborted;
|
|
@@ -479,6 +516,37 @@ export async function executeRun(cfg) {
|
|
|
479
516
|
if (lastAborted || accCompleted === 0)
|
|
480
517
|
process.exit(2);
|
|
481
518
|
}
|
|
519
|
+
async function promptBudgetExtension(ctx) {
|
|
520
|
+
const avg = ctx.sessionsUsed > 0 ? ctx.spent / ctx.sessionsUsed : 0;
|
|
521
|
+
const base = ctx.estimate && ctx.estimate > 0
|
|
522
|
+
? ctx.estimate
|
|
523
|
+
: Math.max(10, Math.round(ctx.budget * 0.2));
|
|
524
|
+
// Wiggle room: 30% buffer, minimum 10, rounded up to a nearest-5.
|
|
525
|
+
const withBuffer = Math.max(10, Math.ceil(base * 1.3));
|
|
526
|
+
const suggested = Math.ceil(withBuffer / 5) * 5;
|
|
527
|
+
const estCost = avg > 0 ? ` · ~$${(suggested * avg).toFixed(2)}` : "";
|
|
528
|
+
const estLine = ctx.estimate != null
|
|
529
|
+
? chalk.dim(` Planner estimate: ${ctx.estimate} sessions to complete${avg > 0 ? ` (~$${(ctx.estimate * avg).toFixed(2)} at $${avg.toFixed(2)}/session)` : ""}`)
|
|
530
|
+
: chalk.dim(` No planner estimate available — using default${avg > 0 ? ` (~$${avg.toFixed(2)}/session)` : ""}`);
|
|
531
|
+
console.log("");
|
|
532
|
+
console.log(chalk.yellow(` Budget exhausted — run not yet complete.`));
|
|
533
|
+
console.log(estLine);
|
|
534
|
+
console.log(chalk.dim(` Continue with ${chalk.bold.white(String(suggested))} more sessions${estCost}? Everything stays the same — just hit enter.`));
|
|
535
|
+
const action = await selectKey("", [
|
|
536
|
+
{ key: "y", desc: "es (↵)" },
|
|
537
|
+
{ key: "c", desc: "ustom" },
|
|
538
|
+
{ key: "n", desc: "o — stop here" },
|
|
539
|
+
]);
|
|
540
|
+
if (action === "y")
|
|
541
|
+
return suggested;
|
|
542
|
+
if (action === "n")
|
|
543
|
+
return 0;
|
|
544
|
+
const custom = await ask(` How many more sessions? ${chalk.dim(`[${suggested}]: `)}`);
|
|
545
|
+
const n = parseInt(custom);
|
|
546
|
+
if (isNaN(n) || n <= 0)
|
|
547
|
+
return suggested;
|
|
548
|
+
return n;
|
|
549
|
+
}
|
|
482
550
|
function checkProjectHealth(cwd) {
|
|
483
551
|
let pkg;
|
|
484
552
|
try {
|
package/dist/steering.js
CHANGED
|
@@ -9,6 +9,7 @@ const STEER_SCHEMA = {
|
|
|
9
9
|
reasoning: { type: "string" },
|
|
10
10
|
statusUpdate: { type: "string" },
|
|
11
11
|
goalUpdate: { type: "string" },
|
|
12
|
+
estimatedSessionsRemaining: { type: "number" },
|
|
12
13
|
tasks: {
|
|
13
14
|
type: "array",
|
|
14
15
|
items: {
|
|
@@ -18,7 +19,7 @@ const STEER_SCHEMA = {
|
|
|
18
19
|
},
|
|
19
20
|
},
|
|
20
21
|
},
|
|
21
|
-
required: ["done", "tasks", "reasoning", "statusUpdate"],
|
|
22
|
+
required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
|
|
22
23
|
},
|
|
23
24
|
};
|
|
24
25
|
export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog, runMemory) {
|
|
@@ -96,6 +97,7 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
96
97
|
"reasoning": "your assessment and why you chose this wave composition",
|
|
97
98
|
"goalUpdate": "optional — refine what 'amazing' means as you learn more",
|
|
98
99
|
"statusUpdate": "REQUIRED — concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
|
|
100
|
+
"estimatedSessionsRemaining": 15,
|
|
99
101
|
"tasks": [
|
|
100
102
|
{"prompt": "task instruction...", "model": "worker"},
|
|
101
103
|
{"prompt": "review task...", "model": "planner"},
|
|
@@ -103,10 +105,12 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
103
105
|
]
|
|
104
106
|
}
|
|
105
107
|
|
|
108
|
+
"estimatedSessionsRemaining" is REQUIRED. Your best honest estimate of how many MORE agent sessions (beyond the wave you just composed above) are needed to reach 'amazing' — include follow-up fixes, polish, verification, and anything else you'd want before shipping. Be realistic, not optimistic. Use 0 only if truly done.
|
|
109
|
+
|
|
106
110
|
The "model" field on each task: use "worker" (${workerModel}) for implementation tasks, "planner" (${plannerModel}) for review/analysis/verification tasks. Default is "worker".
|
|
107
111
|
Set "noWorktree": true for verify/user-test tasks — they need the real project directory with env files, dependencies, and local config.
|
|
108
112
|
|
|
109
|
-
If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "tasks": []}`;
|
|
113
|
+
If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSessionsRemaining": 0, "tasks": []}`;
|
|
110
114
|
onLog("Assessing...", "status");
|
|
111
115
|
onLog(`Reading codebase — wave ${history.length + 1}`, "event");
|
|
112
116
|
const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA }, onLog);
|
|
@@ -124,8 +128,10 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "tasks": []}`
|
|
|
124
128
|
})();
|
|
125
129
|
const isDone = parsed.done === true;
|
|
126
130
|
const statusUpdate = parsed.statusUpdate || undefined;
|
|
131
|
+
const estRaw = parsed.estimatedSessionsRemaining;
|
|
132
|
+
const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
|
|
127
133
|
if (isDone) {
|
|
128
|
-
return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete", goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
134
|
+
return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete", goalUpdate: parsed.goalUpdate, statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0 };
|
|
129
135
|
}
|
|
130
136
|
let tasks = (parsed.tasks || []).map((t, i) => ({
|
|
131
137
|
id: String(i),
|
|
@@ -134,5 +140,5 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "tasks": []}`
|
|
|
134
140
|
...(t.noWorktree && { noWorktree: true }),
|
|
135
141
|
}));
|
|
136
142
|
tasks = postProcess(tasks, remainingBudget, onLog);
|
|
137
|
-
return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "", goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
143
|
+
return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "", goalUpdate: parsed.goalUpdate, statusUpdate, estimatedSessionsRemaining };
|
|
138
144
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -138,6 +138,7 @@ export interface SteerResult {
|
|
|
138
138
|
reasoning: string;
|
|
139
139
|
goalUpdate?: string;
|
|
140
140
|
statusUpdate?: string;
|
|
141
|
+
estimatedSessionsRemaining?: number;
|
|
141
142
|
}
|
|
142
143
|
/** Accumulated run memory — designs, verifications, etc. — fed to the steerer. */
|
|
143
144
|
export interface RunMemory {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.13.1",
|
|
4
4
|
"description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|