claude-overnight 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -45,7 +45,7 @@ A guided flow walks you through each step:
45
45
  ╰────────────────────────────────────╯
46
46
  ```
47
47
 
48
- The planner generates tasks — review, edit, or chat about them, then run.
48
+ For large budgets, the planner identifies research themes — review them, then press Run. Everything after that is fully autonomous: thinking agents explore, the orchestrator synthesizes tasks, execution waves run, and steering adapts between waves. No further interaction needed — go to sleep.
49
49
 
50
50
  ### Task file
51
51
 
@@ -68,14 +68,17 @@ The planner always runs on the best available model (Opus) regardless of which m
68
68
  For large budgets (`budget > concurrency * 3`), the planner doesn't try to generate hundreds of tasks from scratch. Instead, it launches a **thinking wave** — a team of architect agents that explore your codebase in parallel before any code is written.
69
69
 
70
70
  ```
71
- ⠋ identifying themes... → splits objective into N angles (< 30s)
72
- Thinking: 5 agents exploring each explores from its angle, writes a design doc
73
- Orchestrating plan... reads all design docs, synthesizes execution tasks
71
+ ⠋ identifying themes... → splits objective into N angles (< 30s)
72
+ 10 themes review themes, press Run, walk away
73
+ Thinking: 10 agents exploring each explores from its angle, writes a design doc
74
+ ◆ Orchestrating plan... → reads all design docs, synthesizes execution tasks
75
+ ◆ Wave 1 · 50 tasks → fully autonomous from here
76
+ ◆ Steering... → adapts between waves, retries on rate limits
74
77
  ```
75
78
 
76
- Each thinking agent gets a different research focus (architecture, data, UI, APIs, testing, etc.), explores using Read/Glob/Grep, and writes a structured design document with findings, proposed work items, and key files. The orchestrator then reads all design docs and produces grounded, well-informed execution tasks that reference specific files and patterns the researchers found.
79
+ The review prompt appears right after theme identification the last thing requiring your presence. After you press Run, the thinking wave, orchestration, execution, and steering all run autonomously. Rate-limited? The planner waits and retries. Go to sleep.
77
80
 
78
- This means a budget of 200 doesn't generate 200 tasks from a single LLM call guessing at your codebase. It sends 5 architects to study the code first, then plans 50 tasks based on their findings, executes them, steers, and repeats.
81
+ The number of thinking agents scales with budget: 5 for budget=50, 10 for budget=2000+. Each agent explores the codebase from a different angle and writes a structured design document. The orchestrator then reads all design docs and produces grounded execution tasks referencing real files and patterns.
79
82
 
80
83
  For small budgets (≤ `concurrency * 3`), the planner skips the thinking wave and generates tasks directly — fast and efficient for focused work.
81
84
 
@@ -99,7 +102,7 @@ The budget also shapes task granularity:
99
102
 
100
103
  **Large budget (50+)**: Thinking wave + orchestration. Architects explore, then execution tasks are synthesized from their findings. Each task is a substantial work session grounded in real codebase analysis.
101
104
 
102
- A budget of 200 is not 200 micro-edits. It's 5 architects + ~195 senior-engineer work sessions, planned in waves.
105
+ A budget of 200 is not 200 micro-edits. It's ~5 architects + ~195 senior-engineer work sessions, planned in waves. A budget of 2000 gets 10 architects.
103
106
 
104
107
  ## Usage limits
105
108
 
package/dist/index.js CHANGED
@@ -521,10 +521,11 @@ async function main() {
521
521
  process.stdout.write("\x1B[?25l");
522
522
  const planRestore = () => process.stdout.write("\x1B[?25h");
523
523
  const useThinking = flex && (budget ?? 10) > concurrency * 3;
524
+ const thinkingCount = useThinking ? Math.min(Math.max(concurrency, Math.ceil((budget ?? 10) * 0.005)), 10) : 0;
524
525
  const designDir = join(cwd, ".claude-overnight", "designs");
525
526
  try {
526
527
  if (useThinking) {
527
- // Phase 1: Quick theme identification
528
+ // Phase 1: Quick theme identification → review → then autonomous
528
529
  let themeFrame = 0;
529
530
  const themeSpinner = setInterval(() => {
530
531
  const spin = chalk.cyan(BRAILLE[themeFrame++ % BRAILLE.length]);
@@ -532,13 +533,52 @@ async function main() {
532
533
  }, 120);
533
534
  let themes;
534
535
  try {
535
- themes = await identifyThemes(objective, concurrency, plannerModel, permissionMode);
536
+ themes = await identifyThemes(objective, thinkingCount, plannerModel, permissionMode);
536
537
  }
537
538
  finally {
538
539
  clearInterval(themeSpinner);
539
540
  }
540
- process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${themes.length} themes`)}\n`);
541
- // Phase 2: Thinking waveagents explore codebase
541
+ process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${themes.length} themes`)}\n\n`);
542
+ // Show themes for reviewthis is the LAST user interaction
543
+ planRestore();
544
+ let reviewing = true;
545
+ while (reviewing) {
546
+ for (let i = 0; i < themes.length; i++) {
547
+ console.log(chalk.dim(` ${String(i + 1).padStart(3)}.`) + ` ${themes[i]}`);
548
+ }
549
+ console.log(chalk.dim(`\n ${thinkingCount} thinking agents → orchestrate → ${(budget ?? 10) - thinkingCount} execution sessions\n`));
550
+ const action = await selectKey(`${chalk.white(`${themes.length} themes`)} ${chalk.dim(`· ${thinkingCount} thinking · ${concurrency} concurrent`)}`, [
551
+ { key: "r", desc: "un" },
552
+ { key: "e", desc: "dit" },
553
+ { key: "q", desc: "uit" },
554
+ ]);
555
+ switch (action) {
556
+ case "r":
557
+ reviewing = false;
558
+ break;
559
+ case "e": {
560
+ const feedback = await ask(`\n ${chalk.bold("What should change?")}\n ${chalk.cyan(">")} `);
561
+ if (!feedback)
562
+ break;
563
+ process.stdout.write("\x1B[?25l");
564
+ try {
565
+ themes = await identifyThemes(`${objective}\n\nUser feedback: ${feedback}`, thinkingCount, plannerModel, permissionMode);
566
+ process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${themes.length} themes`)}\n\n`);
567
+ }
568
+ catch (err) {
569
+ console.error(chalk.red(`\n Re-planning failed: ${err.message}\n`));
570
+ }
571
+ planRestore();
572
+ break;
573
+ }
574
+ case "q":
575
+ console.log(chalk.dim("\n Aborted.\n"));
576
+ process.exit(0);
577
+ }
578
+ }
579
+ // ── From here, fully autonomous — no more user interaction ──
580
+ process.stdout.write("\x1B[?25l");
581
+ // Phase 2: Thinking wave
542
582
  mkdirSync(designDir, { recursive: true });
543
583
  const thinkingTasks = buildThinkingTasks(objective, themes, designDir, plannerModel);
544
584
  console.log(chalk.cyan(`\n ◆ Thinking: ${thinkingTasks.length} agents exploring...\n`));
@@ -571,27 +611,84 @@ async function main() {
571
611
  const flexNote = `This is wave 1 of an adaptive multi-wave run (total budget: ${(budget ?? 10) - thinkingUsed}). Plan the highest-impact foundational work first. Future waves will iterate based on what's learned.`;
572
612
  console.log(chalk.cyan(`\n ◆ Orchestrating plan...\n`));
573
613
  tasks = await orchestrate(objective, designContext, cwd, plannerModel, workerModel, permissionMode, orchBudget, concurrency, makeProgressLog(), flexNote);
574
- const remaining = (budget ?? 10) - thinkingUsed - tasks.length;
575
- process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}${chalk.dim(` · ${remaining} remaining`)}\n\n`);
614
+ process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
576
615
  }
577
616
  else {
578
- // Fallback: no design docs produced, use direct planner
579
- console.log(chalk.yellow(`\n No design docs produced — falling back to direct planning\n`));
617
+ console.log(chalk.yellow(`\n No design docs falling back to direct planning\n`));
580
618
  const waveBudget = Math.min(50, Math.max(concurrency, Math.ceil(((budget ?? 10) - thinkingUsed) * 0.5)));
581
619
  tasks = await planTasks(objective, cwd, plannerModel, workerModel, permissionMode, waveBudget, concurrency, makeProgressLog());
582
620
  process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
583
621
  }
584
622
  }
585
623
  else {
586
- // Small budget: direct planning (no thinking wave)
624
+ // Small budget: direct planning review → run
587
625
  const waveBudget = flex ? Math.min(50, Math.max(concurrency, Math.ceil((budget ?? 10) * 0.5))) : budget;
588
626
  const flexNote = flex
589
627
  ? `This is wave 1 of an adaptive multi-wave run (total budget: ${budget}). Plan the highest-impact foundational work first. Future waves will iterate, polish, and expand based on what's learned.`
590
628
  : undefined;
591
629
  console.log(chalk.cyan(`\n ◆ Planning${flex ? " wave 1" : ""}...\n`));
592
630
  tasks = await planTasks(objective, cwd, plannerModel, workerModel, permissionMode, waveBudget, concurrency, makeProgressLog(), flexNote);
593
- const flexHint = flex ? chalk.dim(` (wave 1, ${(budget ?? 10) - tasks.length} remaining)`) : "";
631
+ const flexHint = flex ? chalk.dim(` · wave 1`) : "";
594
632
  process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}${flexHint}\n\n`);
633
+ // Review loop for small-budget path
634
+ planRestore();
635
+ let reviewing = true;
636
+ while (reviewing) {
637
+ showPlan(tasks);
638
+ const action = await selectKey(`${chalk.white(`${tasks.length} tasks`)} ${chalk.dim(`· ${concurrency} concurrent`)}`, [
639
+ { key: "r", desc: "un" },
640
+ { key: "e", desc: "dit" },
641
+ { key: "c", desc: "hat" },
642
+ { key: "q", desc: "uit" },
643
+ ]);
644
+ switch (action) {
645
+ case "r":
646
+ reviewing = false;
647
+ break;
648
+ case "e": {
649
+ const feedback = await ask(`\n ${chalk.bold("What should change?")}\n ${chalk.cyan(">")} `);
650
+ if (!feedback)
651
+ break;
652
+ console.log(chalk.cyan("\n ◆ Re-planning...\n"));
653
+ process.stdout.write("\x1B[?25l");
654
+ try {
655
+ tasks = await refinePlan(objective, tasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, makeProgressLog());
656
+ process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
657
+ }
658
+ catch (err) {
659
+ console.error(chalk.red(`\n Re-planning failed: ${err.message}\n`));
660
+ }
661
+ planRestore();
662
+ break;
663
+ }
664
+ case "c": {
665
+ const question = await ask(`\n ${chalk.bold("Ask about the plan:")}\n ${chalk.cyan(">")} `);
666
+ if (!question)
667
+ break;
668
+ process.stdout.write("\x1B[?25l");
669
+ try {
670
+ let answer = "";
671
+ for await (const msg of query({
672
+ prompt: `You planned these tasks for the objective "${objective}":\n${tasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n")}\n\nUser question: ${question}`,
673
+ options: { cwd, model: plannerModel, permissionMode, persistSession: false },
674
+ })) {
675
+ if (msg.type === "result" && msg.subtype === "success")
676
+ answer = msg.result || "";
677
+ }
678
+ planRestore();
679
+ if (answer)
680
+ console.log(chalk.dim(`\n ${answer.slice(0, 500)}\n`));
681
+ }
682
+ catch {
683
+ planRestore();
684
+ }
685
+ break;
686
+ }
687
+ case "q":
688
+ console.log(chalk.dim("\n Aborted.\n"));
689
+ process.exit(0);
690
+ }
691
+ }
595
692
  }
596
693
  }
597
694
  catch (err) {
@@ -602,65 +699,6 @@ async function main() {
602
699
  console.error(chalk.red(`\n Planning failed: ${err.message}\n`));
603
700
  process.exit(1);
604
701
  }
605
- // ── Review loop ──
606
- planRestore();
607
- let reviewing = true;
608
- while (reviewing) {
609
- showPlan(tasks);
610
- const action = await selectKey(`${chalk.white(`${tasks.length} tasks`)} ${chalk.dim(`· ${concurrency} concurrent`)}`, [
611
- { key: "r", desc: "un" },
612
- { key: "e", desc: "dit" },
613
- { key: "c", desc: "hat" },
614
- { key: "q", desc: "uit" },
615
- ]);
616
- switch (action) {
617
- case "r":
618
- reviewing = false;
619
- break;
620
- case "e": {
621
- const feedback = await ask(`\n ${chalk.bold("What should change?")}\n ${chalk.cyan(">")} `);
622
- if (!feedback)
623
- break;
624
- console.log(chalk.cyan("\n ◆ Re-planning...\n"));
625
- process.stdout.write("\x1B[?25l");
626
- try {
627
- tasks = await refinePlan(objective, tasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, makeProgressLog());
628
- process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
629
- }
630
- catch (err) {
631
- console.error(chalk.red(`\n Re-planning failed: ${err.message}\n`));
632
- }
633
- planRestore();
634
- break;
635
- }
636
- case "c": {
637
- const question = await ask(`\n ${chalk.bold("Ask about the plan:")}\n ${chalk.cyan(">")} `);
638
- if (!question)
639
- break;
640
- process.stdout.write("\x1B[?25l");
641
- try {
642
- let answer = "";
643
- for await (const msg of query({
644
- prompt: `You planned these tasks for the objective "${objective}":\n${tasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n")}\n\nUser question: ${question}`,
645
- options: { cwd, model: plannerModel, permissionMode, persistSession: false },
646
- })) {
647
- if (msg.type === "result" && msg.subtype === "success")
648
- answer = msg.result || "";
649
- }
650
- planRestore();
651
- if (answer)
652
- console.log(chalk.dim(`\n ${answer.slice(0, 500)}\n`));
653
- }
654
- catch {
655
- planRestore();
656
- }
657
- break;
658
- }
659
- case "q":
660
- console.log(chalk.dim("\n Aborted.\n"));
661
- process.exit(0);
662
- }
663
- }
664
702
  }
665
703
  if (tasks.length === 0) {
666
704
  console.error("No tasks provided.");
package/dist/planner.js CHANGED
@@ -2,7 +2,7 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
2
2
  const INACTIVITY_MS = 5 * 60 * 1000;
3
3
  export function detectModelTier(model) {
4
4
  const m = model.toLowerCase();
5
- if (m.includes("opus"))
5
+ if (m === "default" || m.includes("opus"))
6
6
  return "opus";
7
7
  if (m.includes("sonnet"))
8
8
  return "sonnet";
@@ -146,7 +146,32 @@ Respond with ONLY a JSON object (no markdown fences):
146
146
  ]
147
147
  }`;
148
148
  }
149
+ const RATE_LIMIT_PATTERNS = ["rate", "limit", "overloaded", "429", "hit your limit", "too many"];
150
+ function isRateLimitError(err) {
151
+ const msg = err instanceof Error ? err.message : String(err);
152
+ return RATE_LIMIT_PATTERNS.some((p) => msg.toLowerCase().includes(p));
153
+ }
149
154
  async function runPlannerQuery(prompt, opts, onLog) {
155
+ const MAX_RETRIES = 3;
156
+ const BACKOFF = [30_000, 60_000, 120_000];
157
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
158
+ try {
159
+ return await runPlannerQueryOnce(prompt, opts, onLog);
160
+ }
161
+ catch (err) {
162
+ if (attempt < MAX_RETRIES && isRateLimitError(err)) {
163
+ const waitMs = BACKOFF[attempt];
164
+ const waitSec = Math.round(waitMs / 1000);
165
+ onLog(`Rate limited — waiting ${waitSec}s before retry ${attempt + 1}/${MAX_RETRIES}`);
166
+ await new Promise((r) => setTimeout(r, waitMs));
167
+ continue;
168
+ }
169
+ throw err;
170
+ }
171
+ }
172
+ throw new Error("Planner query failed after retries");
173
+ }
174
+ async function runPlannerQueryOnce(prompt, opts, onLog) {
150
175
  let resultText = "";
151
176
  const startedAt = Date.now();
152
177
  const pq = query({
@@ -213,7 +238,7 @@ async function runPlannerQuery(prompt, opts, onLog) {
213
238
  if (msg.subtype === "success")
214
239
  resultText = msg.result || "";
215
240
  else
216
- throw new Error(`Planner failed: ${msg.subtype}`);
241
+ throw new Error(`Planner failed: ${msg.result || msg.subtype}`);
217
242
  }
218
243
  }
219
244
  };
package/dist/swarm.js CHANGED
@@ -240,9 +240,17 @@ export class Swarm {
240
240
  this.activeQueries.delete(agentQuery);
241
241
  }
242
242
  if (agent.status === "running") {
243
- agent.status = "done";
244
243
  agent.finishedAt = Date.now();
245
- this.completed++;
244
+ const duration = agent.finishedAt - (agent.startedAt || agent.finishedAt);
245
+ if (agent.toolCalls === 0 && (agent.costUsd ?? 0) < 0.001 && duration < 15_000) {
246
+ agent.status = "error";
247
+ agent.error = "Agent did no work (likely rate-limited before starting)";
248
+ this.failed++;
249
+ }
250
+ else {
251
+ agent.status = "done";
252
+ this.completed++;
253
+ }
246
254
  this.log(id, this.agentSummary(agent));
247
255
  }
248
256
  break; // Success — exit retry loop
@@ -424,12 +432,13 @@ export class Swarm {
424
432
  finally {
425
433
  if (stashed) {
426
434
  try {
427
- exec("git stash pop", this.config.cwd);
428
- this.log(-1, "Restored stashed changes");
429
- }
430
- catch (e) {
431
- this.log(-1, `Stash pop failed: ${String(e.message || e).slice(0, 80)}`);
435
+ const stashList = exec("git stash list", this.config.cwd).trim();
436
+ if (stashList) {
437
+ exec("git stash pop", this.config.cwd);
438
+ this.log(-1, "Restored stashed changes");
439
+ }
432
440
  }
441
+ catch { /* stash already gone or empty */ }
433
442
  }
434
443
  }
435
444
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "description": "Fire off Claude agents, come back days later to shipped work. Maximizes every token in your plan.",
5
5
  "type": "module",
6
6
  "bin": {