claude-overnight 1.8.1 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1333,6 +1333,7 @@ async function main() {
1333
1333
  const waveMerge = (flex && runBranch) ? "yolo" : mergeStrategy;
1334
1334
  // Graceful drain
1335
1335
  let stopping = false;
1336
+ let steeringFailed = false;
1336
1337
  const gracefulStop = (signal) => {
1337
1338
  if (stopping) {
1338
1339
  currentSwarm?.cleanup();
@@ -1367,12 +1368,36 @@ async function main() {
1367
1368
  writeStatus(runDir, steer.statusUpdate);
1368
1369
  if (steer.goalUpdate)
1369
1370
  writeGoalUpdate(runDir, steer.goalUpdate);
1371
+ // Persist steering reasoning for debugging
1372
+ const steerDir0 = join(runDir, "steering");
1373
+ mkdirSync(steerDir0, { recursive: true });
1374
+ writeFileSync(join(steerDir0, `pre-wave-attempt-${steerAttempts}.json`), JSON.stringify({
1375
+ done: steer.done, waveKind: steer.waveKind, reasoning: steer.reasoning,
1376
+ taskCount: steer.tasks.length, statusUpdate: steer.statusUpdate, goalUpdate: steer.goalUpdate,
1377
+ }, null, 2), "utf-8");
1370
1378
  if (steer.done || steer.tasks.length === 0) {
1371
1379
  const hasVerification = waveHistory.some(w => w.kind.includes("verif"));
1372
1380
  if (!hasVerification && remaining >= 1) {
1373
- display.updateText(`Done blocked \u2014 verification required`);
1374
- lastWaveKind = "done-blocked";
1375
- continue;
1381
+ // Auto-compose verification instead of retrying steering
1382
+ display.updateText(`Done blocked — auto-composing verification wave`);
1383
+ currentTasks = [{
1384
+ id: "verify-0",
1385
+ prompt: `## Verification: Build, run, and test the application end-to-end
1386
+
1387
+ You are the final gatekeeper before this run is marked complete. The steerer believes the objective is done. Your job: prove it or disprove it.
1388
+
1389
+ 1. Run the build (npm run build, or whatever this project uses). Report ALL errors.
1390
+ 2. Start the dev server. If a port is taken, try another. If a dependency is missing, install it.
1391
+ 3. Navigate key flows as a real user would. Check that the main features work.
1392
+ 4. Write your findings to .claude-overnight/latest/verifications/final-verify.md
1393
+
1394
+ Be relentless. Do not give up if the first approach fails. Search the codebase for dev login routes, test tokens, seed users, env vars, CLI auth commands, or any bypass.`,
1395
+ noWorktree: true,
1396
+ model: plannerModel,
1397
+ }];
1398
+ lastWaveKind = "verification";
1399
+ overheadBudgetUsed += 1;
1400
+ break;
1376
1401
  }
1377
1402
  objectiveComplete = true;
1378
1403
  remaining = 0;
@@ -1493,15 +1518,40 @@ async function main() {
1493
1518
  writeStatus(runDir, steer.statusUpdate);
1494
1519
  if (steer.goalUpdate)
1495
1520
  writeGoalUpdate(runDir, steer.goalUpdate);
1521
+ // Persist steering reasoning for debugging
1522
+ const steerDir = join(runDir, "steering");
1523
+ mkdirSync(steerDir, { recursive: true });
1524
+ writeFileSync(join(steerDir, `wave-${waveNum}-attempt-${steerAttempts}.json`), JSON.stringify({
1525
+ done: steer.done, waveKind: steer.waveKind, reasoning: steer.reasoning,
1526
+ taskCount: steer.tasks.length, statusUpdate: steer.statusUpdate, goalUpdate: steer.goalUpdate,
1527
+ }, null, 2), "utf-8");
1496
1528
  const execWaves = waveHistory.filter(w => w.kind === "execute").length;
1497
1529
  if (execWaves > 0 && execWaves % 5 === 0)
1498
1530
  archiveMilestone(runDir, waveNum);
1499
1531
  if (steer.done || steer.tasks.length === 0) {
1500
1532
  const hasVerification = waveHistory.some(w => w.kind.includes("verif"));
1501
1533
  if (!hasVerification && remaining >= 1) {
1502
- display.updateText(`Done blocked \u2014 verification required`);
1503
- lastWaveKind = "done-blocked";
1504
- continue;
1534
+ // Auto-compose a verification wave instead of retrying steering
1535
+ display.updateText(`Done blocked — auto-composing verification wave`);
1536
+ currentTasks = [{
1537
+ id: "verify-0",
1538
+ prompt: `## Verification: Build, run, and test the application end-to-end
1539
+
1540
+ You are the final gatekeeper before this run is marked complete. The steerer believes the objective is done. Your job: prove it or disprove it.
1541
+
1542
+ 1. Run the build (npm run build, or whatever this project uses). Report ALL errors.
1543
+ 2. Start the dev server. If a port is taken, try another. If a dependency is missing, install it.
1544
+ 3. Navigate key flows as a real user would. Check that the main features work.
1545
+ 4. Write your findings to .claude-overnight/latest/verifications/final-verify.md
1546
+
1547
+ Be relentless. Do not give up if the first approach fails. Search the codebase for dev login routes, test tokens, seed users, env vars, CLI auth commands, or any bypass.`,
1548
+ noWorktree: true,
1549
+ model: plannerModel,
1550
+ }];
1551
+ lastWaveKind = "verification";
1552
+ overheadBudgetUsed += 1;
1553
+ steered = true;
1554
+ break;
1505
1555
  }
1506
1556
  objectiveComplete = true;
1507
1557
  remaining = 0;
@@ -1526,8 +1576,13 @@ async function main() {
1526
1576
  catch (err) {
1527
1577
  const steerCost = getTotalPlannerCost() - plannerCostBefore;
1528
1578
  accCost += steerCost;
1579
+ if (steerAttempts < 3) {
1580
+ display.updateText(`Steering failed (attempt ${steerAttempts}/3) — retrying...`);
1581
+ continue;
1582
+ }
1529
1583
  display.stop();
1530
- console.log(chalk.yellow(` Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
1584
+ console.log(chalk.yellow(` Steering failed after ${steerAttempts} attempts: ${err.message?.slice(0, 80)} stopping\n`));
1585
+ steeringFailed = true;
1531
1586
  break;
1532
1587
  }
1533
1588
  }
@@ -1538,7 +1593,8 @@ async function main() {
1538
1593
  display.stop();
1539
1594
  // Only truly "done" if steering explicitly completed the objective (or non-flex single wave with budget exhausted)
1540
1595
  const trulyDone = objectiveComplete || (!flex && remaining <= 0);
1541
- const finalPhase = trulyDone ? "done" : "capped";
1596
+ const wasCapped = lastCapped || lastAborted;
1597
+ const finalPhase = trulyDone ? "done" : steeringFailed ? "steering" : wasCapped ? "capped" : remaining <= 0 ? "capped" : "stopped";
1542
1598
  saveRunState(runDir, {
1543
1599
  id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: budget ?? tasks.length,
1544
1600
  remaining, workerModel, plannerModel, concurrency, permissionMode,
@@ -1581,14 +1637,26 @@ async function main() {
1581
1637
  if (trulyDone) {
1582
1638
  console.log(chalk.bold.green(` CLAUDE OVERNIGHT — COMPLETE`));
1583
1639
  }
1584
- else {
1640
+ else if (steeringFailed) {
1641
+ console.log(chalk.bold.yellow(` CLAUDE OVERNIGHT — STEERING FAILED`));
1642
+ }
1643
+ else if (remaining <= 0) {
1585
1644
  console.log(chalk.bold.yellow(` CLAUDE OVERNIGHT — BUDGET EXHAUSTED`));
1586
1645
  }
1646
+ else if (lastCapped) {
1647
+ console.log(chalk.bold.yellow(` CLAUDE OVERNIGHT — RATE LIMITED`));
1648
+ }
1649
+ else if (stopping || lastAborted) {
1650
+ console.log(chalk.bold.yellow(` CLAUDE OVERNIGHT — INTERRUPTED`));
1651
+ }
1652
+ else {
1653
+ console.log(chalk.bold.yellow(` CLAUDE OVERNIGHT — STOPPED`));
1654
+ }
1587
1655
  console.log(chalk.green(` ${bannerChar.repeat(Math.min(termW - 4, 60))}`));
1588
1656
  console.log("");
1589
1657
  // Stats grid
1590
1658
  const statRows = [
1591
- [chalk.bold("Waves"), String(waves), chalk.bold("Sessions"), `${accCompleted} done${accFailed > 0 ? ` / ${accFailed} failed` : ""}`],
1659
+ [chalk.bold("Waves"), String(waves), chalk.bold("Sessions"), `${accCompleted} done${accFailed > 0 ? ` / ${accFailed} failed` : ""}${remaining > 0 ? ` (${remaining} remaining)` : ""}`],
1592
1660
  [chalk.bold("Cost"), chalk.green(`$${accCost.toFixed(2)}`), chalk.bold("Elapsed"), elapsedStr],
1593
1661
  [chalk.bold("Merged"), `${totalMerged} branches`, chalk.bold("Conflicts"), totalConflicts > 0 ? chalk.red(String(totalConflicts)) : chalk.green("0")],
1594
1662
  [chalk.bold("Tokens"), `${fmtTokens(accIn)} in / ${fmtTokens(accOut)} out`, chalk.bold("Tool calls"), String(accTools)],
package/dist/planner.js CHANGED
@@ -15,6 +15,46 @@ Consistency is what makes complex things feel simple. One design system, rigid r
15
15
  `;
16
16
  const NUDGE_MS = 15 * 60 * 1000; // 15 min — close & restart with "continue"
17
17
  const HARD_TIMEOUT_MS = 30 * 60 * 1000; // 30 min — give up
18
+ const WALL_CLOCK_LIMIT_MS = 45 * 60 * 1000; // 45 min — absolute max per planner call
19
+ // ── JSON schemas for structured output ──
20
+ const TASKS_SCHEMA = {
21
+ type: "json_schema",
22
+ schema: {
23
+ type: "object",
24
+ properties: { tasks: { type: "array", items: { type: "object", properties: { prompt: { type: "string" } }, required: ["prompt"] } } },
25
+ required: ["tasks"],
26
+ },
27
+ };
28
+ const THEMES_SCHEMA = {
29
+ type: "json_schema",
30
+ schema: {
31
+ type: "object",
32
+ properties: { themes: { type: "array", items: { type: "string" } } },
33
+ required: ["themes"],
34
+ },
35
+ };
36
+ const STEER_SCHEMA = {
37
+ type: "json_schema",
38
+ schema: {
39
+ type: "object",
40
+ properties: {
41
+ done: { type: "boolean" },
42
+ waveKind: { type: "string" },
43
+ reasoning: { type: "string" },
44
+ statusUpdate: { type: "string" },
45
+ goalUpdate: { type: "string" },
46
+ tasks: {
47
+ type: "array",
48
+ items: {
49
+ type: "object",
50
+ properties: { prompt: { type: "string" }, model: { type: "string" }, noWorktree: { type: "boolean" } },
51
+ required: ["prompt"],
52
+ },
53
+ },
54
+ },
55
+ required: ["done", "tasks", "reasoning", "statusUpdate"],
56
+ },
57
+ };
18
58
  export function detectModelTier(model) {
19
59
  const m = model.toLowerCase();
20
60
  if (m === "default" || m.includes("opus"))
@@ -211,6 +251,7 @@ export function getPlannerRateLimitInfo() { return _plannerRateLimitInfo; }
211
251
  async function runPlannerQueryOnce(prompt, opts, onLog) {
212
252
  _plannerRateLimitInfo = { utilization: 0, status: "", isUsingOverage: false, windows: new Map(), costUsd: 0 };
213
253
  let resultText = "";
254
+ let structuredOutput;
214
255
  const startedAt = Date.now();
215
256
  const isResume = !!opts.resumeSessionId;
216
257
  const pq = query({
@@ -225,6 +266,7 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
225
266
  persistSession: true, // needed for interrupt+resume
226
267
  includePartialMessages: true,
227
268
  ...(isResume && { resume: opts.resumeSessionId }),
269
+ ...(opts.outputFormat && { outputFormat: opts.outputFormat }),
228
270
  },
229
271
  });
230
272
  // Progress ticker — fast updates with compact format
@@ -249,7 +291,14 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
249
291
  let timer;
250
292
  const watchdog = new Promise((_, reject) => {
251
293
  const check = () => {
294
+ const elapsed = Date.now() - startedAt;
252
295
  const silent = Date.now() - lastActivity;
296
+ // Wall-clock limit: kill if session has been running too long regardless of activity
297
+ if (elapsed >= WALL_CLOCK_LIMIT_MS) {
298
+ pq.interrupt().catch(() => pq.close());
299
+ reject(new Error(`Planner hit wall-clock limit (${Math.round(elapsed / 60000)}min) — likely rate limited`));
300
+ return;
301
+ }
253
302
  if (silent >= timeoutMs) {
254
303
  // Try interrupt (graceful), fall back to close (hard kill)
255
304
  pq.interrupt().catch(() => pq.close());
@@ -315,8 +364,10 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
315
364
  _plannerRateLimitInfo.costUsd += costUsd;
316
365
  _totalPlannerCostUsd += costUsd;
317
366
  }
318
- if (msg.subtype === "success")
367
+ if (msg.subtype === "success") {
368
+ structuredOutput = r.structured_output;
319
369
  resultText = r.result || "";
370
+ }
320
371
  else
321
372
  throw new Error(`Planner failed: ${r.result || msg.subtype}`);
322
373
  }
@@ -329,6 +380,10 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
329
380
  clearTimeout(timer);
330
381
  clearInterval(ticker);
331
382
  }
383
+ // Prefer SDK-validated structured output — guaranteed to match the schema
384
+ if (structuredOutput != null && typeof structuredOutput === "object") {
385
+ return JSON.stringify(structuredOutput);
386
+ }
332
387
  return resultText;
333
388
  }
334
389
  function postProcess(raw, budget, onLog) {
@@ -395,10 +450,10 @@ export async function planTasks(objective, cwd, plannerModel, workerModel, permi
395
450
  onLog("Analyzing codebase...");
396
451
  const prompt = plannerPrompt(objective, workerModel, budget, concurrency, flexNote);
397
452
  const fileInstruction = outFile ? `\n\nAFTER generating the JSON, also write it to ${outFile} using the Write tool.` : "";
398
- const resultText = await runPlannerQuery(prompt + fileInstruction, { cwd, model: plannerModel, permissionMode }, onLog);
453
+ const resultText = await runPlannerQuery(prompt + fileInstruction, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
399
454
  const parsed = await extractTaskJson(resultText, async () => {
400
455
  onLog("Retrying...");
401
- return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
456
+ return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
402
457
  }, onLog, outFile);
403
458
  let tasks = (parsed.tasks || []).map((t, i) => ({
404
459
  id: String(i),
@@ -412,7 +467,7 @@ export async function planTasks(objective, cwd, plannerModel, workerModel, permi
412
467
  }
413
468
  // ── Thinking wave ──
414
469
  export async function identifyThemes(objective, count, model, permissionMode, onLog = () => { }) {
415
- const resultText = await runPlannerQuery(`Split this objective into exactly ${count} independent research angles for architects exploring a codebase. Each angle should cover a distinct aspect.\n\nObjective: ${objective}\n\nReturn ONLY a JSON object: {"themes": ["angle description", ...]}`, { cwd: process.cwd(), model, permissionMode }, onLog);
470
+ const resultText = await runPlannerQuery(`Split this objective into exactly ${count} independent research angles for architects exploring a codebase. Each angle should cover a distinct aspect.\n\nObjective: ${objective}\n\nReturn ONLY a JSON object: {"themes": ["angle description", ...]}`, { cwd: process.cwd(), model, permissionMode, outputFormat: THEMES_SCHEMA }, onLog);
416
471
  const parsed = attemptJsonParse(resultText);
417
472
  if (parsed?.themes && Array.isArray(parsed.themes))
418
473
  return parsed.themes.slice(0, count);
@@ -479,10 +534,10 @@ Requirements:
479
534
  Respond with ONLY a JSON object (no markdown fences):
480
535
  {"tasks": [{"prompt": "..."}]}${fileInstruction}`;
481
536
  onLog("Synthesizing...");
482
- const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
537
+ const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
483
538
  const parsed = await extractTaskJson(resultText, async () => {
484
539
  onLog("Retrying...");
485
- return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
540
+ return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
486
541
  }, onLog, outFile);
487
542
  let tasks = (parsed.tasks || []).map((t, i) => ({
488
543
  id: String(i),
@@ -519,10 +574,10 @@ ${scaleNote} ${concurrency} agents run in parallel. Update the plan accordingly.
519
574
 
520
575
  Respond with ONLY a JSON object (no markdown):
521
576
  {"tasks":[{"prompt":"..."}]}`;
522
- const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
577
+ const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
523
578
  const parsed = await extractTaskJson(resultText, async () => {
524
579
  onLog("Retrying...");
525
- return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
580
+ return runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode, outputFormat: TASKS_SCHEMA }, onLog);
526
581
  }, onLog);
527
582
  let tasks = (parsed.tasks || []).map((t, i) => ({
528
583
  id: String(i),
@@ -725,19 +780,22 @@ Set "noWorktree": true for verify/user-test tasks — they run in the real proje
725
780
 
726
781
  If done: {"done": true, "waveKind": "done", "reasoning": "...", "statusUpdate": "...", "tasks": []}`;
727
782
  onLog("Assessing...");
728
- const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
783
+ const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA }, onLog);
729
784
  const parsed = await (async () => {
730
785
  const first = attemptJsonParse(resultText);
731
786
  if (first)
732
787
  return first;
733
- onLog("Retrying...");
734
- const retryText = await runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"done":false,"waveKind":"execute","reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
788
+ // Log what failed so we can debug
789
+ onLog(`Steering parse failed (${resultText.length} chars). Asking model to fix...`);
790
+ // Send the broken response back so the model can fix its own output
791
+ const snippet = resultText.length > 2000 ? resultText.slice(0, 1000) + "\n...\n" + resultText.slice(-800) : resultText;
792
+ const retryText = await runPlannerQuery(`Your previous steering response could not be parsed as JSON. Here is what you returned:\n\n---\n${snippet}\n---\n\nExtract or rewrite the above as ONLY a valid JSON object with this schema: {"done":boolean,"waveKind":"execute"|"done","reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}\n\nRespond with ONLY the JSON, no markdown fences, no explanation.`, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA }, onLog);
735
793
  const retryParsed = attemptJsonParse(retryText);
736
794
  if (retryParsed)
737
795
  return retryParsed;
738
796
  // Don't return done:true on parse failure — that permanently marks the run complete.
739
797
  // Throw so the caller's catch block handles it as a transient steering failure.
740
- throw new Error("Could not parse steering response after retry");
798
+ throw new Error(`Could not parse steering response after retry (${resultText.length} chars: ${resultText.slice(0, 120)}...)`);
741
799
  })();
742
800
  const isDone = parsed.done === true;
743
801
  const waveKind = parsed.waveKind || parsed.action || (isDone ? "done" : "execute");
package/dist/types.d.ts CHANGED
@@ -144,7 +144,7 @@ export interface RunState {
144
144
  accOut?: number;
145
145
  accTools?: number;
146
146
  branches: BranchRecord[];
147
- phase: "steering" | "capped" | "done";
147
+ phase: "steering" | "capped" | "done" | "stopped";
148
148
  startedAt: string;
149
149
  cwd: string;
150
150
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.8.1",
3
+ "version": "1.8.4",
4
4
  "description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
5
5
  "type": "module",
6
6
  "bin": {