claude-overnight 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -7
- package/dist/index.js +107 -69
- package/dist/planner.js +27 -2
- package/dist/swarm.js +16 -7
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -45,7 +45,7 @@ A guided flow walks you through each step:
|
|
|
45
45
|
╰────────────────────────────────────╯
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
For large budgets, the planner identifies research themes — review them, then press Run. Everything after that is fully autonomous: thinking agents explore, the orchestrator synthesizes tasks, execution waves run, and steering adapts between waves. No further interaction needed — go to sleep.
|
|
49
49
|
|
|
50
50
|
### Task file
|
|
51
51
|
|
|
@@ -68,14 +68,17 @@ The planner always runs on the best available model (Opus) regardless of which m
|
|
|
68
68
|
For large budgets (`budget > concurrency * 3`), the planner doesn't try to generate hundreds of tasks from scratch. Instead, it launches a **thinking wave** — a team of architect agents that explore your codebase in parallel before any code is written.
|
|
69
69
|
|
|
70
70
|
```
|
|
71
|
-
⠋ identifying themes...
|
|
72
|
-
|
|
73
|
-
◆
|
|
71
|
+
⠋ identifying themes... → splits objective into N angles (< 30s)
|
|
72
|
+
✓ 10 themes → review themes, press Run, walk away
|
|
73
|
+
◆ Thinking: 10 agents exploring → each explores from its angle, writes a design doc
|
|
74
|
+
◆ Orchestrating plan... → reads all design docs, synthesizes execution tasks
|
|
75
|
+
◆ Wave 1 · 50 tasks → fully autonomous from here
|
|
76
|
+
◆ Steering... → adapts between waves, retries on rate limits
|
|
74
77
|
```
|
|
75
78
|
|
|
76
|
-
|
|
79
|
+
The review prompt appears right after theme identification — the last thing requiring your presence. After you press Run, the thinking wave, orchestration, execution, and steering all run autonomously. Rate-limited? The planner waits and retries. Go to sleep.
|
|
77
80
|
|
|
78
|
-
|
|
81
|
+
The number of thinking agents scales with budget: 5 for budget=50, 10 for budget=2000+. Each agent explores the codebase from a different angle and writes a structured design document. The orchestrator then reads all design docs and produces grounded execution tasks referencing real files and patterns.
|
|
79
82
|
|
|
80
83
|
For small budgets (≤ `concurrency * 3`), the planner skips the thinking wave and generates tasks directly — fast and efficient for focused work.
|
|
81
84
|
|
|
@@ -99,7 +102,7 @@ The budget also shapes task granularity:
|
|
|
99
102
|
|
|
100
103
|
**Large budget (50+)**: Thinking wave + orchestration. Architects explore, then execution tasks are synthesized from their findings. Each task is a substantial work session grounded in real codebase analysis.
|
|
101
104
|
|
|
102
|
-
A budget of 200 is not 200 micro-edits. It's 5 architects + ~195 senior-engineer work sessions, planned in waves.
|
|
105
|
+
A budget of 200 is not 200 micro-edits. It's ~5 architects + ~195 senior-engineer work sessions, planned in waves. A budget of 2000 gets 10 architects.
|
|
103
106
|
|
|
104
107
|
## Usage limits
|
|
105
108
|
|
package/dist/index.js
CHANGED
|
@@ -521,10 +521,11 @@ async function main() {
|
|
|
521
521
|
process.stdout.write("\x1B[?25l");
|
|
522
522
|
const planRestore = () => process.stdout.write("\x1B[?25h");
|
|
523
523
|
const useThinking = flex && (budget ?? 10) > concurrency * 3;
|
|
524
|
+
const thinkingCount = useThinking ? Math.min(Math.max(concurrency, Math.ceil((budget ?? 10) * 0.005)), 10) : 0;
|
|
524
525
|
const designDir = join(cwd, ".claude-overnight", "designs");
|
|
525
526
|
try {
|
|
526
527
|
if (useThinking) {
|
|
527
|
-
// Phase 1: Quick theme identification
|
|
528
|
+
// Phase 1: Quick theme identification → review → then autonomous
|
|
528
529
|
let themeFrame = 0;
|
|
529
530
|
const themeSpinner = setInterval(() => {
|
|
530
531
|
const spin = chalk.cyan(BRAILLE[themeFrame++ % BRAILLE.length]);
|
|
@@ -532,13 +533,52 @@ async function main() {
|
|
|
532
533
|
}, 120);
|
|
533
534
|
let themes;
|
|
534
535
|
try {
|
|
535
|
-
themes = await identifyThemes(objective,
|
|
536
|
+
themes = await identifyThemes(objective, thinkingCount, plannerModel, permissionMode);
|
|
536
537
|
}
|
|
537
538
|
finally {
|
|
538
539
|
clearInterval(themeSpinner);
|
|
539
540
|
}
|
|
540
|
-
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${themes.length} themes`)}\n`);
|
|
541
|
-
//
|
|
541
|
+
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${themes.length} themes`)}\n\n`);
|
|
542
|
+
// Show themes for review — this is the LAST user interaction
|
|
543
|
+
planRestore();
|
|
544
|
+
let reviewing = true;
|
|
545
|
+
while (reviewing) {
|
|
546
|
+
for (let i = 0; i < themes.length; i++) {
|
|
547
|
+
console.log(chalk.dim(` ${String(i + 1).padStart(3)}.`) + ` ${themes[i]}`);
|
|
548
|
+
}
|
|
549
|
+
console.log(chalk.dim(`\n ${thinkingCount} thinking agents → orchestrate → ${(budget ?? 10) - thinkingCount} execution sessions\n`));
|
|
550
|
+
const action = await selectKey(`${chalk.white(`${themes.length} themes`)} ${chalk.dim(`· ${thinkingCount} thinking · ${concurrency} concurrent`)}`, [
|
|
551
|
+
{ key: "r", desc: "un" },
|
|
552
|
+
{ key: "e", desc: "dit" },
|
|
553
|
+
{ key: "q", desc: "uit" },
|
|
554
|
+
]);
|
|
555
|
+
switch (action) {
|
|
556
|
+
case "r":
|
|
557
|
+
reviewing = false;
|
|
558
|
+
break;
|
|
559
|
+
case "e": {
|
|
560
|
+
const feedback = await ask(`\n ${chalk.bold("What should change?")}\n ${chalk.cyan(">")} `);
|
|
561
|
+
if (!feedback)
|
|
562
|
+
break;
|
|
563
|
+
process.stdout.write("\x1B[?25l");
|
|
564
|
+
try {
|
|
565
|
+
themes = await identifyThemes(`${objective}\n\nUser feedback: ${feedback}`, thinkingCount, plannerModel, permissionMode);
|
|
566
|
+
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${themes.length} themes`)}\n\n`);
|
|
567
|
+
}
|
|
568
|
+
catch (err) {
|
|
569
|
+
console.error(chalk.red(`\n Re-planning failed: ${err.message}\n`));
|
|
570
|
+
}
|
|
571
|
+
planRestore();
|
|
572
|
+
break;
|
|
573
|
+
}
|
|
574
|
+
case "q":
|
|
575
|
+
console.log(chalk.dim("\n Aborted.\n"));
|
|
576
|
+
process.exit(0);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
// ── From here, fully autonomous — no more user interaction ──
|
|
580
|
+
process.stdout.write("\x1B[?25l");
|
|
581
|
+
// Phase 2: Thinking wave
|
|
542
582
|
mkdirSync(designDir, { recursive: true });
|
|
543
583
|
const thinkingTasks = buildThinkingTasks(objective, themes, designDir, plannerModel);
|
|
544
584
|
console.log(chalk.cyan(`\n ◆ Thinking: ${thinkingTasks.length} agents exploring...\n`));
|
|
@@ -571,27 +611,84 @@ async function main() {
|
|
|
571
611
|
const flexNote = `This is wave 1 of an adaptive multi-wave run (total budget: ${(budget ?? 10) - thinkingUsed}). Plan the highest-impact foundational work first. Future waves will iterate based on what's learned.`;
|
|
572
612
|
console.log(chalk.cyan(`\n ◆ Orchestrating plan...\n`));
|
|
573
613
|
tasks = await orchestrate(objective, designContext, cwd, plannerModel, workerModel, permissionMode, orchBudget, concurrency, makeProgressLog(), flexNote);
|
|
574
|
-
|
|
575
|
-
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}${chalk.dim(` · ${remaining} remaining`)}\n\n`);
|
|
614
|
+
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
|
|
576
615
|
}
|
|
577
616
|
else {
|
|
578
|
-
|
|
579
|
-
console.log(chalk.yellow(`\n No design docs produced — falling back to direct planning\n`));
|
|
617
|
+
console.log(chalk.yellow(`\n No design docs — falling back to direct planning\n`));
|
|
580
618
|
const waveBudget = Math.min(50, Math.max(concurrency, Math.ceil(((budget ?? 10) - thinkingUsed) * 0.5)));
|
|
581
619
|
tasks = await planTasks(objective, cwd, plannerModel, workerModel, permissionMode, waveBudget, concurrency, makeProgressLog());
|
|
582
620
|
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
|
|
583
621
|
}
|
|
584
622
|
}
|
|
585
623
|
else {
|
|
586
|
-
// Small budget: direct planning
|
|
624
|
+
// Small budget: direct planning → review → run
|
|
587
625
|
const waveBudget = flex ? Math.min(50, Math.max(concurrency, Math.ceil((budget ?? 10) * 0.5))) : budget;
|
|
588
626
|
const flexNote = flex
|
|
589
627
|
? `This is wave 1 of an adaptive multi-wave run (total budget: ${budget}). Plan the highest-impact foundational work first. Future waves will iterate, polish, and expand based on what's learned.`
|
|
590
628
|
: undefined;
|
|
591
629
|
console.log(chalk.cyan(`\n ◆ Planning${flex ? " wave 1" : ""}...\n`));
|
|
592
630
|
tasks = await planTasks(objective, cwd, plannerModel, workerModel, permissionMode, waveBudget, concurrency, makeProgressLog(), flexNote);
|
|
593
|
-
const flexHint = flex ? chalk.dim(`
|
|
631
|
+
const flexHint = flex ? chalk.dim(` · wave 1`) : "";
|
|
594
632
|
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}${flexHint}\n\n`);
|
|
633
|
+
// Review loop for small-budget path
|
|
634
|
+
planRestore();
|
|
635
|
+
let reviewing = true;
|
|
636
|
+
while (reviewing) {
|
|
637
|
+
showPlan(tasks);
|
|
638
|
+
const action = await selectKey(`${chalk.white(`${tasks.length} tasks`)} ${chalk.dim(`· ${concurrency} concurrent`)}`, [
|
|
639
|
+
{ key: "r", desc: "un" },
|
|
640
|
+
{ key: "e", desc: "dit" },
|
|
641
|
+
{ key: "c", desc: "hat" },
|
|
642
|
+
{ key: "q", desc: "uit" },
|
|
643
|
+
]);
|
|
644
|
+
switch (action) {
|
|
645
|
+
case "r":
|
|
646
|
+
reviewing = false;
|
|
647
|
+
break;
|
|
648
|
+
case "e": {
|
|
649
|
+
const feedback = await ask(`\n ${chalk.bold("What should change?")}\n ${chalk.cyan(">")} `);
|
|
650
|
+
if (!feedback)
|
|
651
|
+
break;
|
|
652
|
+
console.log(chalk.cyan("\n ◆ Re-planning...\n"));
|
|
653
|
+
process.stdout.write("\x1B[?25l");
|
|
654
|
+
try {
|
|
655
|
+
tasks = await refinePlan(objective, tasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, makeProgressLog());
|
|
656
|
+
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
|
|
657
|
+
}
|
|
658
|
+
catch (err) {
|
|
659
|
+
console.error(chalk.red(`\n Re-planning failed: ${err.message}\n`));
|
|
660
|
+
}
|
|
661
|
+
planRestore();
|
|
662
|
+
break;
|
|
663
|
+
}
|
|
664
|
+
case "c": {
|
|
665
|
+
const question = await ask(`\n ${chalk.bold("Ask about the plan:")}\n ${chalk.cyan(">")} `);
|
|
666
|
+
if (!question)
|
|
667
|
+
break;
|
|
668
|
+
process.stdout.write("\x1B[?25l");
|
|
669
|
+
try {
|
|
670
|
+
let answer = "";
|
|
671
|
+
for await (const msg of query({
|
|
672
|
+
prompt: `You planned these tasks for the objective "${objective}":\n${tasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n")}\n\nUser question: ${question}`,
|
|
673
|
+
options: { cwd, model: plannerModel, permissionMode, persistSession: false },
|
|
674
|
+
})) {
|
|
675
|
+
if (msg.type === "result" && msg.subtype === "success")
|
|
676
|
+
answer = msg.result || "";
|
|
677
|
+
}
|
|
678
|
+
planRestore();
|
|
679
|
+
if (answer)
|
|
680
|
+
console.log(chalk.dim(`\n ${answer.slice(0, 500)}\n`));
|
|
681
|
+
}
|
|
682
|
+
catch {
|
|
683
|
+
planRestore();
|
|
684
|
+
}
|
|
685
|
+
break;
|
|
686
|
+
}
|
|
687
|
+
case "q":
|
|
688
|
+
console.log(chalk.dim("\n Aborted.\n"));
|
|
689
|
+
process.exit(0);
|
|
690
|
+
}
|
|
691
|
+
}
|
|
595
692
|
}
|
|
596
693
|
}
|
|
597
694
|
catch (err) {
|
|
@@ -602,65 +699,6 @@ async function main() {
|
|
|
602
699
|
console.error(chalk.red(`\n Planning failed: ${err.message}\n`));
|
|
603
700
|
process.exit(1);
|
|
604
701
|
}
|
|
605
|
-
// ── Review loop ──
|
|
606
|
-
planRestore();
|
|
607
|
-
let reviewing = true;
|
|
608
|
-
while (reviewing) {
|
|
609
|
-
showPlan(tasks);
|
|
610
|
-
const action = await selectKey(`${chalk.white(`${tasks.length} tasks`)} ${chalk.dim(`· ${concurrency} concurrent`)}`, [
|
|
611
|
-
{ key: "r", desc: "un" },
|
|
612
|
-
{ key: "e", desc: "dit" },
|
|
613
|
-
{ key: "c", desc: "hat" },
|
|
614
|
-
{ key: "q", desc: "uit" },
|
|
615
|
-
]);
|
|
616
|
-
switch (action) {
|
|
617
|
-
case "r":
|
|
618
|
-
reviewing = false;
|
|
619
|
-
break;
|
|
620
|
-
case "e": {
|
|
621
|
-
const feedback = await ask(`\n ${chalk.bold("What should change?")}\n ${chalk.cyan(">")} `);
|
|
622
|
-
if (!feedback)
|
|
623
|
-
break;
|
|
624
|
-
console.log(chalk.cyan("\n ◆ Re-planning...\n"));
|
|
625
|
-
process.stdout.write("\x1B[?25l");
|
|
626
|
-
try {
|
|
627
|
-
tasks = await refinePlan(objective, tasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, makeProgressLog());
|
|
628
|
-
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
|
|
629
|
-
}
|
|
630
|
-
catch (err) {
|
|
631
|
-
console.error(chalk.red(`\n Re-planning failed: ${err.message}\n`));
|
|
632
|
-
}
|
|
633
|
-
planRestore();
|
|
634
|
-
break;
|
|
635
|
-
}
|
|
636
|
-
case "c": {
|
|
637
|
-
const question = await ask(`\n ${chalk.bold("Ask about the plan:")}\n ${chalk.cyan(">")} `);
|
|
638
|
-
if (!question)
|
|
639
|
-
break;
|
|
640
|
-
process.stdout.write("\x1B[?25l");
|
|
641
|
-
try {
|
|
642
|
-
let answer = "";
|
|
643
|
-
for await (const msg of query({
|
|
644
|
-
prompt: `You planned these tasks for the objective "${objective}":\n${tasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n")}\n\nUser question: ${question}`,
|
|
645
|
-
options: { cwd, model: plannerModel, permissionMode, persistSession: false },
|
|
646
|
-
})) {
|
|
647
|
-
if (msg.type === "result" && msg.subtype === "success")
|
|
648
|
-
answer = msg.result || "";
|
|
649
|
-
}
|
|
650
|
-
planRestore();
|
|
651
|
-
if (answer)
|
|
652
|
-
console.log(chalk.dim(`\n ${answer.slice(0, 500)}\n`));
|
|
653
|
-
}
|
|
654
|
-
catch {
|
|
655
|
-
planRestore();
|
|
656
|
-
}
|
|
657
|
-
break;
|
|
658
|
-
}
|
|
659
|
-
case "q":
|
|
660
|
-
console.log(chalk.dim("\n Aborted.\n"));
|
|
661
|
-
process.exit(0);
|
|
662
|
-
}
|
|
663
|
-
}
|
|
664
702
|
}
|
|
665
703
|
if (tasks.length === 0) {
|
|
666
704
|
console.error("No tasks provided.");
|
package/dist/planner.js
CHANGED
|
@@ -2,7 +2,7 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
|
2
2
|
const INACTIVITY_MS = 5 * 60 * 1000;
|
|
3
3
|
export function detectModelTier(model) {
|
|
4
4
|
const m = model.toLowerCase();
|
|
5
|
-
if (m.includes("opus"))
|
|
5
|
+
if (m === "default" || m.includes("opus"))
|
|
6
6
|
return "opus";
|
|
7
7
|
if (m.includes("sonnet"))
|
|
8
8
|
return "sonnet";
|
|
@@ -146,7 +146,32 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
146
146
|
]
|
|
147
147
|
}`;
|
|
148
148
|
}
|
|
149
|
+
const RATE_LIMIT_PATTERNS = ["rate", "limit", "overloaded", "429", "hit your limit", "too many"];
|
|
150
|
+
function isRateLimitError(err) {
|
|
151
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
152
|
+
return RATE_LIMIT_PATTERNS.some((p) => msg.toLowerCase().includes(p));
|
|
153
|
+
}
|
|
149
154
|
async function runPlannerQuery(prompt, opts, onLog) {
|
|
155
|
+
const MAX_RETRIES = 3;
|
|
156
|
+
const BACKOFF = [30_000, 60_000, 120_000];
|
|
157
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
158
|
+
try {
|
|
159
|
+
return await runPlannerQueryOnce(prompt, opts, onLog);
|
|
160
|
+
}
|
|
161
|
+
catch (err) {
|
|
162
|
+
if (attempt < MAX_RETRIES && isRateLimitError(err)) {
|
|
163
|
+
const waitMs = BACKOFF[attempt];
|
|
164
|
+
const waitSec = Math.round(waitMs / 1000);
|
|
165
|
+
onLog(`Rate limited — waiting ${waitSec}s before retry ${attempt + 1}/${MAX_RETRIES}`);
|
|
166
|
+
await new Promise((r) => setTimeout(r, waitMs));
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
throw err;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
throw new Error("Planner query failed after retries");
|
|
173
|
+
}
|
|
174
|
+
async function runPlannerQueryOnce(prompt, opts, onLog) {
|
|
150
175
|
let resultText = "";
|
|
151
176
|
const startedAt = Date.now();
|
|
152
177
|
const pq = query({
|
|
@@ -213,7 +238,7 @@ async function runPlannerQuery(prompt, opts, onLog) {
|
|
|
213
238
|
if (msg.subtype === "success")
|
|
214
239
|
resultText = msg.result || "";
|
|
215
240
|
else
|
|
216
|
-
throw new Error(`Planner failed: ${msg.subtype}`);
|
|
241
|
+
throw new Error(`Planner failed: ${msg.result || msg.subtype}`);
|
|
217
242
|
}
|
|
218
243
|
}
|
|
219
244
|
};
|
package/dist/swarm.js
CHANGED
|
@@ -240,9 +240,17 @@ export class Swarm {
|
|
|
240
240
|
this.activeQueries.delete(agentQuery);
|
|
241
241
|
}
|
|
242
242
|
if (agent.status === "running") {
|
|
243
|
-
agent.status = "done";
|
|
244
243
|
agent.finishedAt = Date.now();
|
|
245
|
-
|
|
244
|
+
const duration = agent.finishedAt - (agent.startedAt || agent.finishedAt);
|
|
245
|
+
if (agent.toolCalls === 0 && (agent.costUsd ?? 0) < 0.001 && duration < 15_000) {
|
|
246
|
+
agent.status = "error";
|
|
247
|
+
agent.error = "Agent did no work (likely rate-limited before starting)";
|
|
248
|
+
this.failed++;
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
agent.status = "done";
|
|
252
|
+
this.completed++;
|
|
253
|
+
}
|
|
246
254
|
this.log(id, this.agentSummary(agent));
|
|
247
255
|
}
|
|
248
256
|
break; // Success — exit retry loop
|
|
@@ -424,12 +432,13 @@ export class Swarm {
|
|
|
424
432
|
finally {
|
|
425
433
|
if (stashed) {
|
|
426
434
|
try {
|
|
427
|
-
exec("git stash
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
435
|
+
const stashList = exec("git stash list", this.config.cwd).trim();
|
|
436
|
+
if (stashList) {
|
|
437
|
+
exec("git stash pop", this.config.cwd);
|
|
438
|
+
this.log(-1, "Restored stashed changes");
|
|
439
|
+
}
|
|
432
440
|
}
|
|
441
|
+
catch { /* stash already gone or empty */ }
|
|
433
442
|
}
|
|
434
443
|
}
|
|
435
444
|
}
|