claude-overnight 1.11.13 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +47 -5
- package/dist/render.js +21 -6
- package/dist/run.js +1 -1
- package/dist/state.js +6 -3
- package/dist/swarm.d.ts +30 -0
- package/dist/swarm.js +174 -24
- package/dist/types.d.ts +2 -0
- package/dist/ui.d.ts +2 -0
- package/dist/ui.js +29 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -269,15 +269,37 @@ async function main() {
|
|
|
269
269
|
// (saveRunState always stores [] — the plan is on disk in tasks.json).
|
|
270
270
|
if (resumeState.currentTasks.length === 0) {
|
|
271
271
|
const loaded = salvageFromFile(join(resumeRunDir, "tasks.json"), resumeState.budget, () => { }, "resume");
|
|
272
|
-
if (!loaded && resumeState.phase === "planning") {
|
|
273
|
-
console.error(chalk.red(`\n Planning-phase run has no usable tasks.json — start Fresh instead.\n`));
|
|
274
|
-
process.exit(1);
|
|
275
|
-
}
|
|
276
272
|
if (loaded) {
|
|
277
273
|
resumeState.currentTasks = loaded;
|
|
278
274
|
const label = resumeState.phase === "planning" ? "Resuming plan" : `Resuming ${resumeState.phase} run`;
|
|
279
275
|
console.log(chalk.green(`\n ✓ ${label} · ${loaded.length} tasks loaded from tasks.json`));
|
|
280
276
|
}
|
|
277
|
+
else if (resumeState.phase === "planning") {
|
|
278
|
+
// No tasks.json — the thinking wave got killed before orchestrate ran.
|
|
279
|
+
// If design docs survived, re-orchestrate from them (salvages the
|
|
280
|
+
// thinking spend instead of throwing it away).
|
|
281
|
+
const designs = readMdDir(join(resumeRunDir, "designs"));
|
|
282
|
+
if (!designs || !resumeState.objective) {
|
|
283
|
+
console.error(chalk.red(`\n Planning-phase run has no usable tasks.json or designs — start Fresh instead.\n`));
|
|
284
|
+
process.exit(1);
|
|
285
|
+
}
|
|
286
|
+
const remainingBudget = Math.max(resumeState.concurrency, resumeState.budget - resumeState.accCompleted);
|
|
287
|
+
const orchBudget = Math.min(50, Math.max(resumeState.concurrency, Math.ceil(remainingBudget * 0.5)));
|
|
288
|
+
const flexNote = `This is wave 1 of an adaptive multi-wave run (total budget: ${remainingBudget}). Plan the highest-impact foundational work first. Future waves will iterate based on what's learned.`;
|
|
289
|
+
console.log(chalk.cyan(`\n ◆ Re-orchestrating plan from existing designs...\n`));
|
|
290
|
+
process.stdout.write("\x1B[?25l");
|
|
291
|
+
try {
|
|
292
|
+
const orchTasks = await orchestrate(resumeState.objective, designs, cwd, resumeState.plannerModel, resumeState.workerModel, resumeState.permissionMode, orchBudget, resumeState.concurrency, makeProgressLog(), flexNote, join(resumeRunDir, "tasks.json"));
|
|
293
|
+
resumeState.currentTasks = orchTasks;
|
|
294
|
+
process.stdout.write(`\x1B[2K\r ${chalk.green(`✓ ${orchTasks.length} tasks`)}\n`);
|
|
295
|
+
}
|
|
296
|
+
catch (err) {
|
|
297
|
+
process.stdout.write("\x1B[?25h");
|
|
298
|
+
console.error(chalk.red(`\n Re-orchestration failed: ${err.message}\n Start Fresh instead.\n`));
|
|
299
|
+
process.exit(1);
|
|
300
|
+
}
|
|
301
|
+
process.stdout.write("\x1B[?25h");
|
|
302
|
+
}
|
|
281
303
|
}
|
|
282
304
|
const unmerged = resumeState.branches.filter(b => b.status === "unmerged").length;
|
|
283
305
|
if (unmerged > 0) {
|
|
@@ -634,7 +656,7 @@ async function main() {
|
|
|
634
656
|
useWorktrees: false, mergeStrategy: "yolo", agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
|
|
635
657
|
});
|
|
636
658
|
const thinkRunInfo = { accIn: 0, accOut: 0, accCost: 0, accCompleted: 0, accFailed: 0, sessionsBudget: budget ?? 10, waveNum: -1, remaining: budget ?? 10, model: plannerModel, startedAt: Date.now() };
|
|
637
|
-
const thinkDisplay = new RunDisplay(thinkRunInfo, { remaining: 0, usageCap, dirty: false });
|
|
659
|
+
const thinkDisplay = new RunDisplay(thinkRunInfo, { remaining: 0, usageCap, concurrency, paused: false, dirty: false });
|
|
638
660
|
thinkDisplay.setWave(thinkingSwarm);
|
|
639
661
|
thinkDisplay.start();
|
|
640
662
|
try {
|
|
@@ -651,6 +673,26 @@ async function main() {
|
|
|
651
673
|
thinkingOut = thinkingSwarm.totalOutputTokens;
|
|
652
674
|
thinkingTools = thinkingSwarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
|
|
653
675
|
thinkingHistory = { wave: -1, tasks: thinkingSwarm.agents.map(a => ({ prompt: a.task.prompt.slice(0, 200), status: a.status, filesChanged: a.filesChanged, error: a.error })) };
|
|
676
|
+
// Persist thinking cost/count into run.json so if the user quits
|
|
677
|
+
// between thinking and orchestrate, resume still sees the real spend
|
|
678
|
+
// and the run stays visible in the picker (designs on disk = resumable).
|
|
679
|
+
try {
|
|
680
|
+
saveRunState(runDir, {
|
|
681
|
+
id: runDir.split(/[/\\]/).pop() ?? "",
|
|
682
|
+
objective: objective, budget: budget ?? 10, remaining: (budget ?? 10) - thinkingUsed,
|
|
683
|
+
workerModel, plannerModel, concurrency, permissionMode,
|
|
684
|
+
usageCap, allowExtraUsage, extraUsageBudget,
|
|
685
|
+
flex, useWorktrees, mergeStrategy,
|
|
686
|
+
waveNum: 0, currentTasks: [],
|
|
687
|
+
accCost: thinkingCost, accCompleted: thinkingUsed, accFailed: 0,
|
|
688
|
+
accIn: thinkingIn, accOut: thinkingOut, accTools: thinkingTools,
|
|
689
|
+
branches: [],
|
|
690
|
+
phase: "planning",
|
|
691
|
+
startedAt: new Date().toISOString(),
|
|
692
|
+
cwd,
|
|
693
|
+
});
|
|
694
|
+
}
|
|
695
|
+
catch { }
|
|
654
696
|
if (thinkingSwarm.rateLimitResetsAt) {
|
|
655
697
|
const waitMs = thinkingSwarm.rateLimitResetsAt - Date.now();
|
|
656
698
|
if (waitMs > 0) {
|
package/dist/render.js
CHANGED
|
@@ -42,10 +42,16 @@ function renderHeader(out, w, p) {
|
|
|
42
42
|
const bar = chalk.green("\u2588".repeat(filled)) + chalk.gray("\u2591".repeat(barW - filled));
|
|
43
43
|
const modelTag = p.model ? chalk.dim(` [${p.model}]`) : "";
|
|
44
44
|
const phaseTag = p.phase ? " " + p.phase : "";
|
|
45
|
+
const blocked = p.blocked ?? 0;
|
|
46
|
+
const working = Math.max(0, p.active - blocked);
|
|
47
|
+
const stuck = blocked > 0 && working === 0;
|
|
48
|
+
const activeChip = p.active > 0
|
|
49
|
+
? (stuck ? chalk.yellow(`${p.active} blocked`) : chalk.cyan(`${working} active`) + (blocked > 0 ? chalk.yellow(` (${blocked} blocked)`) : ""))
|
|
50
|
+
: "";
|
|
45
51
|
out.push("");
|
|
46
52
|
out.push(` ${chalk.bold.white("CLAUDE OVERNIGHT")}${modelTag}${phaseTag} ${bar} ` +
|
|
47
53
|
`${p.barLabel} ` +
|
|
48
|
-
(
|
|
54
|
+
(activeChip ? activeChip + " " : "") +
|
|
49
55
|
(p.queued > 0 ? chalk.gray(`${p.queued} queued`) + " " : "") +
|
|
50
56
|
chalk.gray(`\u23F1 ${fmtDur(Date.now() - p.startedAt)}`));
|
|
51
57
|
const tokIn = fmtTokens(p.totalIn);
|
|
@@ -129,17 +135,19 @@ export function renderFrame(swarm, showHotkeys, runInfo) {
|
|
|
129
135
|
const w = Math.max((process.stdout.columns ?? 80) || 80, 60);
|
|
130
136
|
const out = [];
|
|
131
137
|
const stoppingTag = swarm.aborted ? chalk.yellow("STOPPING") : "";
|
|
138
|
+
const pausedTag = swarm.paused ? chalk.yellow("PAUSED") : "";
|
|
139
|
+
const stallTag = swarm.stallLevel >= 3 ? chalk.red("STALL") : swarm.stallLevel > 0 ? chalk.yellow(`STALL L${swarm.stallLevel}`) : "";
|
|
132
140
|
const phaseLabel = swarm.phase === "planning" ? chalk.magenta("PLANNING")
|
|
133
141
|
: swarm.phase === "merging" ? chalk.yellow("MERGING")
|
|
134
142
|
: swarm.rateLimitPaused > 0 ? chalk.yellow("COOLING") : "";
|
|
135
|
-
const phase = [phaseLabel, stoppingTag].filter(Boolean).join(" ");
|
|
143
|
+
const phase = [phaseLabel, pausedTag, stallTag, stoppingTag].filter(Boolean).join(" ");
|
|
136
144
|
const waveUsed = swarm.completed + swarm.failed;
|
|
137
145
|
renderHeader(out, w, {
|
|
138
146
|
model: runInfo?.model ?? swarm.model,
|
|
139
147
|
phase,
|
|
140
148
|
barPct: swarm.total > 0 ? swarm.completed / swarm.total : 0,
|
|
141
149
|
barLabel: `${swarm.completed}/${swarm.total}`,
|
|
142
|
-
active: swarm.active, queued: swarm.pending,
|
|
150
|
+
active: swarm.active, blocked: swarm.blocked, queued: swarm.pending,
|
|
143
151
|
startedAt: runInfo?.startedAt ?? swarm.startedAt,
|
|
144
152
|
totalIn: (runInfo?.accIn ?? 0) + swarm.totalInputTokens,
|
|
145
153
|
totalOut: (runInfo?.accOut ?? 0) + swarm.totalOutputTokens,
|
|
@@ -187,7 +195,11 @@ export function renderFrame(swarm, showHotkeys, runInfo) {
|
|
|
187
195
|
const pending = runInfo?.pendingSteer ?? 0;
|
|
188
196
|
const chip = pending > 0 ? chalk.cyan(` \u270E ${pending} steer queued`) : "";
|
|
189
197
|
const fixChip = swarm.failed > 0 && swarm.active > 0 ? chalk.yellow(" [f] fix") : "";
|
|
190
|
-
|
|
198
|
+
const pauseLabel = swarm.paused ? "[p] resume" : "[p] pause";
|
|
199
|
+
out.push(chalk.dim(` [b] budget [t] threshold [c] conc ${pauseLabel} [s] steer [?] ask [q] stop`) + fixChip + chip);
|
|
200
|
+
if (swarm.blocked > 0 && swarm.blocked === swarm.active) {
|
|
201
|
+
out.push(chalk.yellow(` all workers rate-limited — press [c] to reduce concurrency, [p] to pause, [q] to quit`));
|
|
202
|
+
}
|
|
191
203
|
}
|
|
192
204
|
out.push("");
|
|
193
205
|
return out.join("\n");
|
|
@@ -368,12 +380,15 @@ function fmtRow(a, w) {
|
|
|
368
380
|
const elapsed = a.status === "running" && a.startedAt ? " " + chalk.dim(fmtDur(Date.now() - a.startedAt)) : "";
|
|
369
381
|
const spin = SPINNER[Math.floor(Date.now() / 250) % SPINNER.length];
|
|
370
382
|
const icon = a.status === "running"
|
|
371
|
-
? chalk.blue(`${spin} run`) + elapsed
|
|
383
|
+
? (a.blockedAt ? chalk.yellow("\u25CF blk") : chalk.blue(`${spin} run`)) + elapsed
|
|
372
384
|
: a.status === "done" ? chalk.green("\u2713 done") : chalk.red("\u2717 err ");
|
|
373
385
|
const taskW = Math.max(20, Math.min(36, w - 50));
|
|
374
386
|
const task = truncate(a.task.prompt, taskW).padEnd(taskW);
|
|
375
387
|
let action;
|
|
376
|
-
if (a.
|
|
388
|
+
if (a.blockedAt) {
|
|
389
|
+
action = chalk.yellow(`rate-limited ${fmtDur(Date.now() - a.blockedAt)}`);
|
|
390
|
+
}
|
|
391
|
+
else if (a.currentTool) {
|
|
377
392
|
action = chalk.yellow(a.currentTool);
|
|
378
393
|
}
|
|
379
394
|
else if (a.status === "running") {
|
package/dist/run.js
CHANGED
|
@@ -25,7 +25,7 @@ export async function executeRun(cfg) {
|
|
|
25
25
|
let currentSwarm;
|
|
26
26
|
let remaining;
|
|
27
27
|
let currentTasks;
|
|
28
|
-
const liveConfig = { remaining: 0, usageCap, dirty: false };
|
|
28
|
+
const liveConfig = { remaining: 0, usageCap, concurrency, paused: false, dirty: false };
|
|
29
29
|
let waveNum;
|
|
30
30
|
const waveHistory = [];
|
|
31
31
|
let accCost, accCompleted, accFailed, accTools;
|
package/dist/state.js
CHANGED
|
@@ -191,9 +191,12 @@ export function findIncompleteRuns(rootDir, filterCwd) {
|
|
|
191
191
|
const state = loadRunState(runDir);
|
|
192
192
|
if (!state || state.phase === "done" || state.cwd !== filterCwd)
|
|
193
193
|
continue;
|
|
194
|
-
// Planning-phase runs are
|
|
195
|
-
//
|
|
196
|
-
|
|
194
|
+
// Planning-phase runs are resumable if either tasks.json was written
|
|
195
|
+
// (orchestrate completed) OR design docs exist on disk (thinking wave
|
|
196
|
+
// got killed mid-way — we can re-orchestrate from the designs on resume).
|
|
197
|
+
if (state.phase === "planning"
|
|
198
|
+
&& !existsSync(join(runDir, "tasks.json"))
|
|
199
|
+
&& !readMdDir(join(runDir, "designs")))
|
|
197
200
|
continue;
|
|
198
201
|
results.push({ dir: runDir, state });
|
|
199
202
|
}
|
package/dist/swarm.d.ts
CHANGED
|
@@ -41,6 +41,20 @@ export declare class Swarm {
|
|
|
41
41
|
rateLimitPaused: number;
|
|
42
42
|
isUsingOverage: boolean;
|
|
43
43
|
overageCostUsd: number;
|
|
44
|
+
/** Live-adjustable concurrency target. Workers above this count exit on the next task boundary. */
|
|
45
|
+
targetConcurrency: number;
|
|
46
|
+
/** When true, dispatch is frozen — workers wait without starting new tasks. */
|
|
47
|
+
paused: boolean;
|
|
48
|
+
/** Wall-clock ms of the last sign of real progress (assistant msg, tool use, result). */
|
|
49
|
+
lastProgressAt: number;
|
|
50
|
+
/** 0 = normal, 1 = halved once, 2 = halved twice, 3 = long cooldown at c=1, 4 = aborted. */
|
|
51
|
+
stallLevel: number;
|
|
52
|
+
/** Last time the watchdog took an action; used to debounce escalations. */
|
|
53
|
+
private stallActionAt;
|
|
54
|
+
/** Live worker coroutine count (not agents). */
|
|
55
|
+
private workerCount;
|
|
56
|
+
/** Growable list of worker promises; run() awaits until empty. */
|
|
57
|
+
private workerPromises;
|
|
44
58
|
private queue;
|
|
45
59
|
private config;
|
|
46
60
|
private nextId;
|
|
@@ -56,7 +70,12 @@ export declare class Swarm {
|
|
|
56
70
|
mergeBranch?: string;
|
|
57
71
|
constructor(config: SwarmConfig);
|
|
58
72
|
get active(): number;
|
|
73
|
+
get blocked(): number;
|
|
59
74
|
get pending(): number;
|
|
75
|
+
/** Live-adjust concurrency. Shrinks by having excess workers exit on next task boundary; grows by spawning new workers. */
|
|
76
|
+
setConcurrency(n: number): void;
|
|
77
|
+
/** Freeze/resume dispatch without killing the run. Paused workers block at the top of their loop. */
|
|
78
|
+
setPaused(b: boolean): void;
|
|
60
79
|
run(): Promise<void>;
|
|
61
80
|
abort(): void;
|
|
62
81
|
/** Re-queue all errored agents' tasks for retry within this wave. */
|
|
@@ -65,6 +84,17 @@ export declare class Swarm {
|
|
|
65
84
|
log(agentId: number, text: string): void;
|
|
66
85
|
cleanup(): void;
|
|
67
86
|
private worker;
|
|
87
|
+
/** Mark real progress — resets stall state. Called on any assistant/tool/result message. */
|
|
88
|
+
private markProgress;
|
|
89
|
+
/**
|
|
90
|
+
* Stall watchdog. Called each time a worker finishes a rate-limit wait. Escalates when
|
|
91
|
+
* the whole swarm has been stuck with no progress for a while:
|
|
92
|
+
* L1 @ 5m → halve concurrency
|
|
93
|
+
* L2 @ 10m → halve again
|
|
94
|
+
* L3 @ 15m+ at c=1 → force a 10-minute cooldown instead of hammering every 60s
|
|
95
|
+
* L4 @ 30m → abort the run so it can be resumed later without burning the budget
|
|
96
|
+
*/
|
|
97
|
+
private checkStall;
|
|
68
98
|
private capForOverage;
|
|
69
99
|
private throttle;
|
|
70
100
|
private runAgent;
|
package/dist/swarm.js
CHANGED
|
@@ -34,6 +34,20 @@ export class Swarm {
|
|
|
34
34
|
rateLimitPaused = 0;
|
|
35
35
|
isUsingOverage = false;
|
|
36
36
|
overageCostUsd = 0;
|
|
37
|
+
/** Live-adjustable concurrency target. Workers above this count exit on the next task boundary. */
|
|
38
|
+
targetConcurrency;
|
|
39
|
+
/** When true, dispatch is frozen — workers wait without starting new tasks. */
|
|
40
|
+
paused = false;
|
|
41
|
+
/** Wall-clock ms of the last sign of real progress (assistant msg, tool use, result). */
|
|
42
|
+
lastProgressAt = Date.now();
|
|
43
|
+
/** 0 = normal, 1 = halved once, 2 = halved twice, 3 = long cooldown at c=1, 4 = aborted. */
|
|
44
|
+
stallLevel = 0;
|
|
45
|
+
/** Last time the watchdog took an action; used to debounce escalations. */
|
|
46
|
+
stallActionAt = 0;
|
|
47
|
+
/** Live worker coroutine count (not agents). */
|
|
48
|
+
workerCount = 0;
|
|
49
|
+
/** Growable list of worker promises; run() awaits until empty. */
|
|
50
|
+
workerPromises = [];
|
|
37
51
|
queue;
|
|
38
52
|
config;
|
|
39
53
|
nextId = 0;
|
|
@@ -68,9 +82,33 @@ export class Swarm {
|
|
|
68
82
|
this.baseCostUsd = config.baseCostUsd ?? 0;
|
|
69
83
|
this.queue = [...config.tasks];
|
|
70
84
|
this.total = config.tasks.length;
|
|
85
|
+
this.targetConcurrency = config.concurrency;
|
|
71
86
|
}
|
|
72
87
|
get active() { return this.agents.filter(a => a.status === "running").length; }
|
|
88
|
+
get blocked() { return this.agents.filter(a => a.status === "running" && a.blockedAt != null).length; }
|
|
73
89
|
get pending() { return this.queue.length; }
|
|
90
|
+
/** Live-adjust concurrency. Shrinks by having excess workers exit on next task boundary; grows by spawning new workers. */
|
|
91
|
+
setConcurrency(n) {
|
|
92
|
+
if (!Number.isFinite(n) || n < 1)
|
|
93
|
+
return;
|
|
94
|
+
const prev = this.targetConcurrency;
|
|
95
|
+
if (n === prev)
|
|
96
|
+
return;
|
|
97
|
+
this.targetConcurrency = n;
|
|
98
|
+
this.log(-1, `Concurrency changed: ${prev} → ${n}`);
|
|
99
|
+
if (n > prev && this.queue.length > 0 && !this.aborted && !this.cappedOut) {
|
|
100
|
+
const toSpawn = Math.min(n - this.workerCount, this.queue.length);
|
|
101
|
+
for (let i = 0; i < toSpawn; i++)
|
|
102
|
+
this.workerPromises.push(this.worker());
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
/** Freeze/resume dispatch without killing the run. Paused workers block at the top of their loop. */
|
|
106
|
+
setPaused(b) {
|
|
107
|
+
if (this.paused === b)
|
|
108
|
+
return;
|
|
109
|
+
this.paused = b;
|
|
110
|
+
this.log(-1, b ? "Dispatch paused" : "Dispatch resumed");
|
|
111
|
+
}
|
|
74
112
|
async run() {
|
|
75
113
|
try {
|
|
76
114
|
if (this.config.useWorktrees) {
|
|
@@ -80,8 +118,15 @@ export class Swarm {
|
|
|
80
118
|
this.log(-1, `Worktrees: ${this.worktreeBase}`);
|
|
81
119
|
}
|
|
82
120
|
this.phase = "running";
|
|
83
|
-
const n = Math.min(this.
|
|
84
|
-
|
|
121
|
+
const n = Math.min(this.targetConcurrency, this.queue.length);
|
|
122
|
+
for (let i = 0; i < n; i++)
|
|
123
|
+
this.workerPromises.push(this.worker());
|
|
124
|
+
// setConcurrency() can grow workerPromises during execution, so drain in a loop.
|
|
125
|
+
while (this.workerPromises.length > 0) {
|
|
126
|
+
const batch = this.workerPromises.slice();
|
|
127
|
+
this.workerPromises.length = 0;
|
|
128
|
+
await Promise.all(batch);
|
|
129
|
+
}
|
|
85
130
|
if (this.config.useWorktrees) {
|
|
86
131
|
this.phase = "merging";
|
|
87
132
|
const branches = this.agents.filter(a => a.branch && a.status === "done" && (a.filesChanged ?? 0) > 0)
|
|
@@ -96,7 +141,7 @@ export class Swarm {
|
|
|
96
141
|
finally {
|
|
97
142
|
this.cleanup();
|
|
98
143
|
this.logFile = writeSwarmLog({
|
|
99
|
-
startedAt: this.startedAt, model: this.config.model, concurrency: this.
|
|
144
|
+
startedAt: this.startedAt, model: this.config.model, concurrency: this.targetConcurrency,
|
|
100
145
|
useWorktrees: this.config.useWorktrees, mergeStrategy: this.config.mergeStrategy,
|
|
101
146
|
completed: this.completed, failed: this.failed, aborted: this.aborted,
|
|
102
147
|
cost: this.totalCostUsd, inputTokens: this.totalInputTokens, outputTokens: this.totalOutputTokens,
|
|
@@ -151,23 +196,83 @@ export class Swarm {
|
|
|
151
196
|
}
|
|
152
197
|
// ── Worker loop ──
|
|
153
198
|
async worker() {
|
|
199
|
+
this.workerCount++;
|
|
154
200
|
let tasksProcessed = 0;
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
this.
|
|
201
|
+
try {
|
|
202
|
+
while (this.queue.length > 0 && !this.aborted && !this.cappedOut) {
|
|
203
|
+
// Shrink: exit if we're above the live target.
|
|
204
|
+
if (this.workerCount > this.targetConcurrency) {
|
|
205
|
+
this.log(-1, `Worker exiting (concurrency shrunk to ${this.targetConcurrency})`);
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
// Pause: block here without holding a task, so unpausing resumes cleanly.
|
|
209
|
+
while (this.paused && !this.aborted && !this.cappedOut)
|
|
210
|
+
await sleep(500);
|
|
211
|
+
await this.throttle();
|
|
212
|
+
if (this.cappedOut || this.aborted)
|
|
213
|
+
break;
|
|
214
|
+
if (this.workerCount > this.targetConcurrency)
|
|
215
|
+
return;
|
|
216
|
+
const task = this.queue.shift();
|
|
217
|
+
if (!task)
|
|
218
|
+
break;
|
|
219
|
+
try {
|
|
220
|
+
await this.runAgent(task);
|
|
221
|
+
}
|
|
222
|
+
catch (err) {
|
|
223
|
+
this.log(-1, `Worker error: ${String(err?.message || err).slice(0, 80)}`);
|
|
224
|
+
}
|
|
225
|
+
tasksProcessed++;
|
|
167
226
|
}
|
|
168
|
-
tasksProcessed
|
|
227
|
+
this.log(-1, `Worker finished (${tasksProcessed} tasks)`);
|
|
228
|
+
}
|
|
229
|
+
finally {
|
|
230
|
+
this.workerCount--;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
/** Mark real progress — resets stall state. Called on any assistant/tool/result message. */
|
|
234
|
+
markProgress() {
|
|
235
|
+
this.lastProgressAt = Date.now();
|
|
236
|
+
if (this.stallLevel > 0 && this.lastProgressAt > this.stallActionAt)
|
|
237
|
+
this.stallLevel = 0;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Stall watchdog. Called each time a worker finishes a rate-limit wait. Escalates when
|
|
241
|
+
* the whole swarm has been stuck with no progress for a while:
|
|
242
|
+
* L1 @ 5m → halve concurrency
|
|
243
|
+
* L2 @ 10m → halve again
|
|
244
|
+
* L3 @ 15m+ at c=1 → force a 10-minute cooldown instead of hammering every 60s
|
|
245
|
+
* L4 @ 30m → abort the run so it can be resumed later without burning the budget
|
|
246
|
+
*/
|
|
247
|
+
checkStall() {
|
|
248
|
+
const stalledFor = Date.now() - this.lastProgressAt;
|
|
249
|
+
if (stalledFor < 5 * 60_000)
|
|
250
|
+
return;
|
|
251
|
+
// Debounce so multiple workers waking at once don't double-escalate.
|
|
252
|
+
if (Date.now() - this.stallActionAt < 60_000)
|
|
253
|
+
return;
|
|
254
|
+
if (stalledFor >= 30 * 60_000) {
|
|
255
|
+
this.stallLevel = 4;
|
|
256
|
+
this.stallActionAt = Date.now();
|
|
257
|
+
this.log(-1, `Stalled ${Math.round(stalledFor / 60000)}m with no progress — aborting run so you can resume later`);
|
|
258
|
+
this.abort();
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
if (this.targetConcurrency <= 1 && stalledFor >= 15 * 60_000) {
|
|
262
|
+
this.stallLevel = 3;
|
|
263
|
+
this.stallActionAt = Date.now();
|
|
264
|
+
const until = Date.now() + 10 * 60_000;
|
|
265
|
+
this.rateLimitResetsAt = until;
|
|
266
|
+
this.log(-1, `Stalled at concurrency 1 for ${Math.round(stalledFor / 60000)}m — forcing 10m cooldown`);
|
|
267
|
+
return;
|
|
268
|
+
}
|
|
269
|
+
if (this.stallLevel < 2 && this.targetConcurrency > 1) {
|
|
270
|
+
const next = Math.max(1, Math.floor(this.targetConcurrency / 2));
|
|
271
|
+
this.stallLevel++;
|
|
272
|
+
this.stallActionAt = Date.now();
|
|
273
|
+
this.log(-1, `Auto-throttle L${this.stallLevel}: concurrency ${this.targetConcurrency} → ${next} (stalled ${Math.round(stalledFor / 60000)}m)`);
|
|
274
|
+
this.setConcurrency(next);
|
|
169
275
|
}
|
|
170
|
-
this.log(-1, `Worker finished (${tasksProcessed} tasks)`);
|
|
171
276
|
}
|
|
172
277
|
capForOverage(reason) {
|
|
173
278
|
if (this.cappedOut)
|
|
@@ -210,6 +315,9 @@ export class Swarm {
|
|
|
210
315
|
this.rateLimitUtilization = 0;
|
|
211
316
|
this.rateLimitResetsAt = undefined;
|
|
212
317
|
consecutiveWaits++;
|
|
318
|
+
this.checkStall();
|
|
319
|
+
if (this.aborted || this.cappedOut)
|
|
320
|
+
return;
|
|
213
321
|
}
|
|
214
322
|
}
|
|
215
323
|
// ── Agent execution ──
|
|
@@ -361,12 +469,12 @@ export class Swarm {
|
|
|
361
469
|
agent.status = "error";
|
|
362
470
|
agent.error = "Agent did no work — exited without tool use";
|
|
363
471
|
this.failed++;
|
|
472
|
+
this.log(id, agent.error);
|
|
364
473
|
}
|
|
365
474
|
else {
|
|
366
475
|
agent.status = "done";
|
|
367
476
|
this.completed++;
|
|
368
477
|
}
|
|
369
|
-
this.log(id, this.agentSummary(agent));
|
|
370
478
|
}
|
|
371
479
|
break;
|
|
372
480
|
}
|
|
@@ -378,14 +486,23 @@ export class Swarm {
|
|
|
378
486
|
const waitMs = this.rateLimitResetsAt && this.rateLimitResetsAt > Date.now()
|
|
379
487
|
? Math.max(5000, this.rateLimitResetsAt - Date.now())
|
|
380
488
|
: 120_000;
|
|
381
|
-
|
|
489
|
+
// If the whole swarm has been making zero progress for a while, stop giving
|
|
490
|
+
// rate-limit retries a free pass — force them to count against maxRetries so
|
|
491
|
+
// we eventually surrender instead of looping forever.
|
|
492
|
+
const globallyStalled = Date.now() - this.lastProgressAt > 15 * 60_000;
|
|
493
|
+
const freebie = !globallyStalled;
|
|
494
|
+
this.log(id, `Rate limited — waiting ${Math.ceil(waitMs / 1000)}s${freebie ? " (attempt not counted)" : " (counted — swarm stalled)"}`);
|
|
495
|
+
agent.blockedAt = Date.now();
|
|
382
496
|
this.rateLimitPaused++;
|
|
383
497
|
await sleep(waitMs);
|
|
384
498
|
this.rateLimitPaused--;
|
|
499
|
+
agent.blockedAt = undefined;
|
|
385
500
|
this.isUsingOverage = false;
|
|
386
501
|
this.rateLimitUtilization = 0;
|
|
387
502
|
this.rateLimitResetsAt = undefined;
|
|
388
|
-
|
|
503
|
+
this.checkStall();
|
|
504
|
+
if (freebie)
|
|
505
|
+
attempt--; // normal case: don't count against retries
|
|
389
506
|
continue;
|
|
390
507
|
}
|
|
391
508
|
const canRetry = attempt < maxRetries && !this.aborted && isTransientError(err);
|
|
@@ -403,16 +520,26 @@ export class Swarm {
|
|
|
403
520
|
if (this.config.useWorktrees && agent.branch) {
|
|
404
521
|
agent.filesChanged = autoCommit(agent.id, agent.task.prompt, agentCwd, agent.baseRef, (id, text) => this.log(id, text));
|
|
405
522
|
}
|
|
523
|
+
if (agent.status === "done")
|
|
524
|
+
this.log(agent.id, this.agentSummary(agent));
|
|
406
525
|
}
|
|
407
526
|
agentSummary(agent) {
|
|
408
527
|
const dur = (agent.finishedAt ?? Date.now()) - (agent.startedAt ?? Date.now());
|
|
409
528
|
const m = Math.floor(dur / 60000);
|
|
410
529
|
const s = Math.round((dur % 60000) / 1000);
|
|
411
530
|
const verb = agent.status === "error" ? "errored" : "done";
|
|
412
|
-
|
|
531
|
+
const files = agent.filesChanged != null ? `, ${agent.filesChanged} files changed` : "";
|
|
532
|
+
return `Agent ${agent.id} ${verb}: ${m}m ${s}s, ${agent.toolCalls} tools${files}`;
|
|
413
533
|
}
|
|
414
534
|
// ── Message handler ──
|
|
415
535
|
handleMsg(agent, msg) {
|
|
536
|
+
// Any message that isn't a rate-limit event counts as real progress and
|
|
537
|
+
// resets the stall watchdog + clears the per-agent blocked flag.
|
|
538
|
+
if (msg.type !== "rate_limit_event") {
|
|
539
|
+
this.markProgress();
|
|
540
|
+
if (agent.blockedAt != null)
|
|
541
|
+
agent.blockedAt = undefined;
|
|
542
|
+
}
|
|
416
543
|
switch (msg.type) {
|
|
417
544
|
case "assistant": {
|
|
418
545
|
const m = msg;
|
|
@@ -462,16 +589,39 @@ export class Swarm {
|
|
|
462
589
|
this.totalInputTokens += safeAdd(r.usage.input_tokens);
|
|
463
590
|
this.totalOutputTokens += safeAdd(r.usage.output_tokens);
|
|
464
591
|
}
|
|
592
|
+
// Surface SDK diagnostics so silent failures stop looking like "did no work".
|
|
593
|
+
const denials = r.permission_denials ?? [];
|
|
594
|
+
if (denials.length > 0) {
|
|
595
|
+
const tools = Array.from(new Set(denials.map(d => d.tool_name))).join(", ");
|
|
596
|
+
this.log(agent.id, `${denials.length} permission denial(s): ${tools}`);
|
|
597
|
+
}
|
|
598
|
+
if (r.terminal_reason && r.terminal_reason !== "completed") {
|
|
599
|
+
this.log(agent.id, `terminal: ${r.terminal_reason}`);
|
|
600
|
+
}
|
|
601
|
+
if (r.stop_reason && r.stop_reason !== "end_turn" && r.stop_reason !== "stop_sequence") {
|
|
602
|
+
this.log(agent.id, `stop: ${r.stop_reason}`);
|
|
603
|
+
}
|
|
604
|
+
if (typeof r.num_turns === "number" && r.num_turns > 0) {
|
|
605
|
+
this.log(agent.id, `${r.num_turns} turns`);
|
|
606
|
+
}
|
|
465
607
|
if (r.subtype === "success") {
|
|
466
608
|
agent.status = "done";
|
|
467
609
|
this.completed++;
|
|
468
|
-
this.log(agent.id, this.agentSummary(agent));
|
|
469
610
|
}
|
|
470
611
|
else {
|
|
471
612
|
agent.status = "error";
|
|
472
|
-
|
|
613
|
+
const parts = [r.subtype];
|
|
614
|
+
if (r.terminal_reason && r.terminal_reason !== "completed")
|
|
615
|
+
parts.push(r.terminal_reason);
|
|
616
|
+
const errs = r.errors;
|
|
617
|
+
if (Array.isArray(errs) && errs.length > 0) {
|
|
618
|
+
parts.push(errs[0]);
|
|
619
|
+
for (const e of errs.slice(1, 3))
|
|
620
|
+
this.log(agent.id, `err: ${String(e).slice(0, 160)}`);
|
|
621
|
+
}
|
|
622
|
+
agent.error = parts.join(" — ").slice(0, 180);
|
|
473
623
|
this.failed++;
|
|
474
|
-
this.log(agent.id,
|
|
624
|
+
this.log(agent.id, agent.error);
|
|
475
625
|
}
|
|
476
626
|
break;
|
|
477
627
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -68,6 +68,8 @@ export interface AgentState {
|
|
|
68
68
|
baseRef?: string;
|
|
69
69
|
/** Number of files changed by the agent (from git diff). */
|
|
70
70
|
filesChanged?: number;
|
|
71
|
+
/** Unix timestamp (ms) when this agent entered a rate-limit wait inside its retry loop. Cleared when work resumes. */
|
|
72
|
+
blockedAt?: number;
|
|
71
73
|
}
|
|
72
74
|
/** A timestamped log line from an agent's execution. */
|
|
73
75
|
export interface LogEntry {
|
package/dist/ui.d.ts
CHANGED
package/dist/ui.js
CHANGED
|
@@ -161,6 +161,9 @@ export class RunDisplay {
|
|
|
161
161
|
if (this.inputMode === "threshold") {
|
|
162
162
|
return `\n ${chalk.cyan(">")} New usage cap (0-100%): ${rendered}\u2588`;
|
|
163
163
|
}
|
|
164
|
+
if (this.inputMode === "concurrency") {
|
|
165
|
+
return `\n ${chalk.cyan(">")} New concurrency (min 1): ${rendered}\u2588`;
|
|
166
|
+
}
|
|
164
167
|
if (this.inputMode === "steer") {
|
|
165
168
|
return `\n ${chalk.cyan(">")} ${chalk.bold("Steer next wave")} ${chalk.dim("(Enter to queue, Esc to cancel)")}\n ${rendered}\u2588`;
|
|
166
169
|
}
|
|
@@ -226,7 +229,7 @@ export class RunDisplay {
|
|
|
226
229
|
}
|
|
227
230
|
/** Handle a pasted block. Returns true if the frame needs a redraw. */
|
|
228
231
|
handlePaste(text) {
|
|
229
|
-
if (this.inputMode === "budget" || this.inputMode === "threshold") {
|
|
232
|
+
if (this.inputMode === "budget" || this.inputMode === "threshold" || this.inputMode === "concurrency") {
|
|
230
233
|
const clean = text.replace(/[^0-9.]/g, "");
|
|
231
234
|
if (clean)
|
|
232
235
|
appendCharToSegments(this.inputSegs, clean);
|
|
@@ -243,7 +246,7 @@ export class RunDisplay {
|
|
|
243
246
|
/** Handle a typed (non-pasted) chunk. Returns true if the frame needs a redraw. */
|
|
244
247
|
handleTyped(s) {
|
|
245
248
|
const lc = this.liveConfig;
|
|
246
|
-
if (this.inputMode === "budget" || this.inputMode === "threshold") {
|
|
249
|
+
if (this.inputMode === "budget" || this.inputMode === "threshold" || this.inputMode === "concurrency") {
|
|
247
250
|
let dirty = false;
|
|
248
251
|
for (const ch of s) {
|
|
249
252
|
if (ch === "\r" || ch === "\n") {
|
|
@@ -261,6 +264,12 @@ export class RunDisplay {
|
|
|
261
264
|
this.swarm.usageCap = lc.usageCap;
|
|
262
265
|
this.swarm?.log(-1, `Usage cap changed to ${val > 0 ? val + "%" : "unlimited"}`);
|
|
263
266
|
}
|
|
267
|
+
else if (this.inputMode === "concurrency" && !isNaN(val) && val >= 1) {
|
|
268
|
+
const n = Math.round(val);
|
|
269
|
+
lc.concurrency = n;
|
|
270
|
+
lc.dirty = true;
|
|
271
|
+
this.swarm?.setConcurrency(n);
|
|
272
|
+
}
|
|
264
273
|
this.inputMode = "none";
|
|
265
274
|
this.inputSegs = [];
|
|
266
275
|
return true;
|
|
@@ -340,6 +349,24 @@ export class RunDisplay {
|
|
|
340
349
|
}
|
|
341
350
|
return false;
|
|
342
351
|
}
|
|
352
|
+
if (s === "c" || s === "C") {
|
|
353
|
+
if (this.swarm) {
|
|
354
|
+
this.inputMode = "concurrency";
|
|
355
|
+
this.inputSegs = [];
|
|
356
|
+
return true;
|
|
357
|
+
}
|
|
358
|
+
return false;
|
|
359
|
+
}
|
|
360
|
+
if (s === "p" || s === "P") {
|
|
361
|
+
if (this.swarm) {
|
|
362
|
+
const next = !this.swarm.paused;
|
|
363
|
+
this.swarm.setPaused(next);
|
|
364
|
+
lc.paused = next;
|
|
365
|
+
lc.dirty = true;
|
|
366
|
+
return true;
|
|
367
|
+
}
|
|
368
|
+
return false;
|
|
369
|
+
}
|
|
343
370
|
if ((s === "f" || s === "F") && this.swarm && this.swarm.failed > 0 && this.swarm.active > 0) {
|
|
344
371
|
this.swarm.requeueFailed();
|
|
345
372
|
return false;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.12.0",
|
|
4
4
|
"description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|