claude-overnight 1.25.42 → 1.25.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -619,6 +619,21 @@ function extractOutermostBraces(text) {
619
619
  return null;
620
620
  }
621
621
  export function attemptJsonParse(text) {
622
+ // Strip conversational prefaces/suffixes that weak-schema models sometimes
623
+ // wrap around the JSON body (e.g. "Here is the JSON: { ... } Let me know…").
624
+ const preface = /^\s*(?:Here (?:is|are)[^{]*|Let me[^{]*|I'?ll[^{]*|Sure[^{]*|Okay[^{]*)/i;
625
+ const suffix = /\n\n(?:Let me know|Hope this|Please let me)[\s\S]*$/i;
626
+ if (preface.test(text) || suffix.test(text)) {
627
+ const cleaned = text.replace(preface, "").replace(suffix, "").trim();
628
+ if (cleaned && cleaned !== text) {
629
+ try {
630
+ const obj = JSON.parse(cleaned);
631
+ if (typeof obj === "object" && obj !== null)
632
+ return obj;
633
+ }
634
+ catch { }
635
+ }
636
+ }
622
637
  try {
623
638
  const obj = JSON.parse(text);
624
639
  if (typeof obj === "object" && obj !== null)
package/dist/providers.js CHANGED
@@ -178,6 +178,11 @@ export function envFor(p) {
178
178
  base.ANTHROPIC_AUTH_TOKEN = key;
179
179
  }
180
180
  delete base.ANTHROPIC_API_KEY;
181
+ // Prevent CURSOR_API_KEY from leaking into non-proxy envs — would cause
182
+ // isCursorProxyEnv false-positive, silently rerouting through direct fetch
183
+ // which ignores outputFormat (no JSON schema enforcement).
184
+ delete base.CURSOR_API_KEY;
185
+ delete base.CURSOR_AUTH_TOKEN;
181
186
  return base;
182
187
  }
183
188
  /**
package/dist/run.js CHANGED
@@ -3,8 +3,8 @@ import { join } from "path";
3
3
  import { execSync } from "child_process";
4
4
  import chalk from "chalk";
5
5
  import { Swarm } from "./swarm.js";
6
- import { steerWave } from "./steering.js";
7
- import { getTotalPlannerCost, getPlannerRateLimitInfo, getPeakPlannerContext, runPlannerQuery, setPlannerEnvResolver } from "./planner-query.js";
6
+ import { steerWave, STEER_SCHEMA } from "./steering.js";
7
+ import { getTotalPlannerCost, getPlannerRateLimitInfo, getPeakPlannerContext, runPlannerQuery, setPlannerEnvResolver, attemptJsonParse } from "./planner-query.js";
8
8
  import { contextFillInfo } from "./render.js";
9
9
  import { getModelCapability } from "./models.js";
10
10
  import { buildEnvResolver, isCursorProxyProvider } from "./providers.js";
@@ -55,6 +55,8 @@ export async function executeRun(cfg) {
55
55
  let lastCapped = false, lastAborted = false, objectiveComplete = false;
56
56
  let lastEstimate;
57
57
  const branches = [];
58
+ let healFailStreak = 0; // consecutive waves where heal-0 agent changed 0 files
59
+ let zeroFileWaves = 0; // consecutive waves with 0 files across non-heal tasks
58
60
  if (cfg.resuming && cfg.resumeState) {
59
61
  const rs = cfg.resumeState;
60
62
  remaining = Math.max(1, rs.remaining);
@@ -295,8 +297,21 @@ export async function executeRun(cfg) {
295
297
  // Shared steering logic used by both resume-steering and in-loop steering
296
298
  const runSteering = async () => {
297
299
  let steered = false;
300
+ // ── B1: Skip steering when ≥2 unresolved merge-failed branches exist ──
301
+ const mergeFailedBranches = branches.filter(b => b.status === "merge-failed");
302
+ if (mergeFailedBranches.length >= 2) {
303
+ currentTasks = mergeFailedBranches.map((b, i) => ({
304
+ id: `branch-retry-${i}`,
305
+ prompt: `Your previous attempt at this task merge-failed against main. Redo it against the current state of main with minimal, focused edits. Original task:\n\n${b.taskPrompt}`,
306
+ model: workerModel,
307
+ postcondition: "pnpm run build",
308
+ }));
309
+ display.appendSteeringEvent(`Skipping steering — ${mergeFailedBranches.length} merge-failed branches form the wave`);
310
+ return true;
311
+ }
298
312
  let steerAttempts = 0;
299
- while (!steered && remaining > 0 && !stopping && steerAttempts < 3) {
313
+ const MAX_STEER_ATTEMPTS = 2; // B2: retry threshold 3 2
314
+ while (!steered && remaining > 0 && !stopping && steerAttempts < MAX_STEER_ATTEMPTS) {
300
315
  steerAttempts++;
301
316
  const plannerCostBefore = getTotalPlannerCost();
302
317
  try {
@@ -350,23 +365,52 @@ export async function executeRun(cfg) {
350
365
  }
351
366
  catch (err) {
352
367
  accCost += getTotalPlannerCost() - plannerCostBefore;
353
- if (steerAttempts < 3) {
354
- display.appendSteeringEvent(`Steering failed (attempt ${steerAttempts}/3) -- retrying...`);
368
+ const rawPreview = err?.message?.slice(0, 200) || "(no details)";
369
+ if (steerAttempts < MAX_STEER_ATTEMPTS) {
370
+ display.appendSteeringEvent(`Steering failed (attempt ${steerAttempts}/${MAX_STEER_ATTEMPTS}) -- retrying... ${rawPreview}`);
355
371
  continue;
356
372
  }
357
- display.appendSteeringEvent(`Steering failed ${steerAttempts}× -- falling back`);
358
- let fallbackStatus = "";
373
+ // ── B3: Decomposer fallback (replaces single-giant-fallback) ──
374
+ display.appendSteeringEvent(`Steering failed ${MAX_STEER_ATTEMPTS}× — decomposer fallback`);
375
+ // First: try merge-failed recycling even if only 1 unresolved branch exists
376
+ const stillFailed = branches.filter(b => b.status === "merge-failed");
377
+ if (stillFailed.length >= 1) {
378
+ currentTasks = stillFailed.map((b, i) => ({
379
+ id: `branch-retry-${i}`,
380
+ prompt: `Your previous attempt at this task merge-failed against main. Redo it against the current state of main with minimal, focused edits. Original task:\n\n${b.taskPrompt}`,
381
+ model: workerModel,
382
+ postcondition: "pnpm run build",
383
+ }));
384
+ display.appendSteeringEvent(`Decomposer: ${stillFailed.length} merge-failed branch(es) retried as swarm tasks`);
385
+ steered = true;
386
+ break;
387
+ }
388
+ // Second: minimal-prompt planner query
389
+ display.appendSteeringEvent("Decomposer: minimal planner query…");
359
390
  try {
360
- fallbackStatus = readFileSync(join(runDir, "status.md"), "utf-8");
391
+ let statusText = "";
392
+ try {
393
+ statusText = readFileSync(join(runDir, "status.md"), "utf-8");
394
+ }
395
+ catch { }
396
+ const minimalPrompt = `${objective ? `Objective: ${objective}` : ""}\n\nStatus:\n${statusText || "(none)"}\n\nReturn tasks: string[] — 3-6 specific follow-ups. JSON only. {"tasks":[{"prompt":"..."}]}`;
397
+ const minimalText = await runPlannerQuery(minimalPrompt, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA, transcriptName: "decomposer-minimal", maxTurns: 40 }, () => { });
398
+ const parsed = attemptJsonParse(minimalText);
399
+ if (parsed?.tasks?.length > 0) {
400
+ currentTasks = parsed.tasks.map((t, i) => ({
401
+ id: `decompose-${i}`,
402
+ prompt: typeof t === "string" ? t : t.prompt,
403
+ model: workerModel,
404
+ }));
405
+ display.appendSteeringEvent(`Decomposer: ${currentTasks.length} tasks from minimal planner`);
406
+ steered = true;
407
+ break;
408
+ }
361
409
  }
362
410
  catch { }
363
- currentTasks = [{
364
- id: "fallback-0",
365
- prompt: `Steering couldn't decide the next step. Read the project, assess what's done vs. remaining, and do the most impactful work.\n\nObjective: ${objective}${fallbackStatus ? `\n\nStatus:\n${fallbackStatus}` : ""}`,
366
- type: "execute",
367
- }];
368
- steered = true;
369
- break;
411
+ // Finally: halt
412
+ display.appendSteeringEvent(`Decomposer: no tasks produced — halting`);
413
+ return false;
370
414
  }
371
415
  }
372
416
  return steered;
@@ -389,12 +433,26 @@ export async function executeRun(cfg) {
389
433
  // Health check before each wave: a broken build poisons every subsequent
390
434
  // agent context, so prepend a heal task when detected. Steering-planned
391
435
  // tasks still run, just after the build is green again.
436
+ // Skip if prior heal changed 0 files (heal unable to fix).
392
437
  {
393
- const healTask = checkProjectHealth(cwd);
394
- if (healTask && remaining > 0) {
395
- const withoutDup = currentTasks.filter(t => t.id !== "heal-0");
396
- currentTasks = [healTask, ...withoutDup];
397
- display.appendSteeringEvent(`Health check: build broken — queued heal task`);
438
+ const healTasks = healFailStreak > 0 ? [] : checkProjectHealth(cwd);
439
+ if (healTasks.length > 0 && remaining > 0) {
440
+ const healIds = healTasks.map(t => t.id);
441
+ const withoutDup = currentTasks.filter(t => !healIds.includes(t.id));
442
+ currentTasks = [...healTasks, ...withoutDup];
443
+ display.appendSteeringEvent(`Health check: build broken — queued ${healTasks.length} heal task(s)`);
444
+ }
445
+ else if (healTasks.length === 0 && healFailStreak > 0 && checkProjectHealth(cwd).length > 0) {
446
+ display.appendSteeringEvent(`Health check: build broken — heal skipped after ${healFailStreak} failed attempts, needs manual intervention`);
447
+ try {
448
+ const statusPath2 = join(runDir, "status.md");
449
+ const existing2 = existsSync(statusPath2) ? readFileSync(statusPath2, "utf-8") : "";
450
+ const marker = "## Heal blocked";
451
+ if (!existing2.includes(marker)) {
452
+ writeFileSync(statusPath2, `${existing2}${existing2 ? "\n\n" : ""}${marker}\nBuild has been broken for ${healFailStreak} waves, heal agents unable to fix — intervene manually.\n`, "utf-8");
453
+ }
454
+ }
455
+ catch { }
398
456
  }
399
457
  }
400
458
  if (currentTasks.length > remaining)
@@ -598,7 +656,7 @@ export async function executeRun(cfg) {
598
656
  liveConfig.remaining = remaining;
599
657
  lastCapped = swarm.cappedOut;
600
658
  lastAborted = swarm.aborted;
601
- recordBranches(swarm.agents, swarm.mergeResults, branches);
659
+ recordBranches(swarm.agents, swarm.mergeResults, branches, waveNum);
602
660
  saveWaveSession(runDir, waveNum, swarm.agents, swarm.totalCostUsd);
603
661
  // Tasks that never made it into the swarm (queue cleared on abort/cap)
604
662
  // are preserved as currentTasks so resume picks them up. Budget for these
@@ -623,6 +681,34 @@ export async function executeRun(cfg) {
623
681
  };
624
682
  }),
625
683
  });
684
+ // Track heal fail streak: if a heal-0 task existed this wave and changed 0 files, increment.
685
+ // If any non-heal execute task changed files, reset.
686
+ const lastWave = waveHistory[waveHistory.length - 1];
687
+ const healTask = lastWave?.tasks.find(t => t.type === "heal");
688
+ if (healTask && !healTask.filesChanged) {
689
+ healFailStreak++;
690
+ }
691
+ else if (lastWave?.tasks.some(t => (t.type !== "heal") && (t.filesChanged ?? 0) > 0)) {
692
+ healFailStreak = 0;
693
+ }
694
+ // C1: Circuit breaker — halt after 2 consecutive waves with 0 files across non-heal tasks
695
+ const nonHealFiles = lastWave?.tasks.filter(t => t.type !== "heal").reduce((sum, t) => sum + (t.filesChanged ?? 0), 0) ?? 0;
696
+ if (nonHealFiles === 0 && waveNum > 0) {
697
+ zeroFileWaves++;
698
+ if (zeroFileWaves >= 2) {
699
+ display.appendSteeringEvent(`Circuit breaker: 2 consecutive waves produced no merged changes — halting to prevent budget drain`);
700
+ display.stop();
701
+ saveRunState(runDir, buildRunState({ remaining, phase: "stopped", currentTasks: [] }));
702
+ display.stop();
703
+ restore();
704
+ console.log(chalk.red(`\n Circuit breaker: 2 consecutive waves produced no merged changes.`));
705
+ console.log(chalk.red(` Halting to prevent budget drain. Run preserved at ${runDir}.`));
706
+ process.exit(3);
707
+ }
708
+ }
709
+ else {
710
+ zeroFileWaves = 0;
711
+ }
626
712
  // Hook-blocked work: agents that touched files but nothing landed on the
627
713
  // branch (pre-commit hooks, gitignore, writes outside worktree). Surface
628
714
  // as a wave-level warning so steering sees it, not just a per-agent log.
@@ -670,6 +756,20 @@ export async function executeRun(cfg) {
670
756
  }
671
757
  if (next !== existing)
672
758
  writeFileSync(statusPath, next, "utf-8");
759
+ // GC ghost branches: delete merge-failed branches ≥2 waves old and mark discarded.
760
+ // Safe: their work never landed. The decomposer (Phase B) will re-attempt from saved taskPrompt.
761
+ const gcCandidates = branches.filter(b => b.status === "merge-failed" && b.firstFailedWave !== undefined && (waveNum - b.firstFailedWave) >= 2);
762
+ let gcCount = 0;
763
+ for (const b of gcCandidates) {
764
+ try {
765
+ execSync(`git branch -D "${b.branch}"`, { cwd, stdio: "ignore" });
766
+ }
767
+ catch { }
768
+ b.status = "discarded";
769
+ gcCount++;
770
+ }
771
+ if (gcCount > 0)
772
+ display.appendSteeringEvent(`GC: discarded ${gcCount} ghost branch(es) ≥2 waves old`);
673
773
  }
674
774
  catch { }
675
775
  // Fire-and-forget debrief after each wave.
@@ -1039,24 +1139,45 @@ async function promptBudgetExtension(ctx) {
1039
1139
  return suggested;
1040
1140
  return n;
1041
1141
  }
1142
+ /** Detect build errors and return one or more heal tasks. If errors span ≥2 files,
1143
+ * emit one task per file so they heal in parallel without merge conflicts. */
1042
1144
  function checkProjectHealth(cwd) {
1043
1145
  const cmd = detectHealthCommand(cwd);
1044
1146
  if (!cmd)
1045
- return undefined;
1147
+ return [];
1046
1148
  try {
1047
1149
  execSync(cmd, { cwd, encoding: "utf-8", stdio: "pipe", timeout: 60_000 });
1048
- return undefined;
1150
+ return [];
1049
1151
  }
1050
1152
  catch (err) {
1051
1153
  if (err.killed)
1052
- return undefined;
1154
+ return [];
1053
1155
  const output = ((err.stdout || "") + "\n" + (err.stderr || "")).trim();
1054
1156
  const trimmed = output.length > 4000 ? output.slice(0, 2000) + "\n…\n" + output.slice(-2000) : output;
1055
- return {
1056
- id: "heal-0",
1057
- prompt: `Fix the broken build. \`${cmd}\` fails after merging parallel work:\n\`\`\`\n${trimmed}\n\`\`\`\nFix every error. Run \`${cmd}\` when done to verify.`,
1058
- type: "heal",
1059
- };
1157
+ // B4: Split heal by file — extract distinct source file paths from errors
1158
+ const fileRe = /\/src\/[\w./-]+\.(ts|tsx|js|jsx)/g;
1159
+ const files = new Set();
1160
+ for (const m of trimmed.matchAll(fileRe))
1161
+ files.add(m[0]);
1162
+ if (files.size >= 2) {
1163
+ // One task per file — each agent gets only that file's error context
1164
+ const fileErrors = new Map();
1165
+ for (const f of files) {
1166
+ // Extract lines mentioning this file
1167
+ const lines = trimmed.split("\n").filter(l => l.includes(f));
1168
+ fileErrors.set(f, lines.slice(0, 30).join("\n"));
1169
+ }
1170
+ return Array.from(fileErrors.entries()).map(([file, errs], i) => ({
1171
+ id: `heal-${i}`,
1172
+ prompt: `Fix the broken build errors in \`${file}\`. \`${cmd}\` fails:\n\`\`\`\n${errs}\n\`\`\`\nFix every error in this file. Run \`${cmd}\` when done to verify.`,
1173
+ type: "heal",
1174
+ }));
1175
+ }
1176
+ return [{
1177
+ id: "heal-0",
1178
+ prompt: `Fix the broken build. \`${cmd}\` fails after merging parallel work:\n\`\`\`\n${trimmed}\n\`\`\`\nFix every error. Run \`${cmd}\` when done to verify.`,
1179
+ type: "heal",
1180
+ }];
1060
1181
  }
1061
1182
  }
1062
1183
  function detectHealthCommand(cwd) {
package/dist/state.d.ts CHANGED
@@ -72,6 +72,6 @@ export declare function recordBranches(agents: {
72
72
  }[], mergeResults: {
73
73
  branch: string;
74
74
  ok: boolean;
75
- }[], branches: BranchRecord[]): void;
75
+ }[], branches: BranchRecord[], currentWave?: number): void;
76
76
  export declare function autoMergeBranches(cwd: string, branches: BranchRecord[], onLog: (msg: string) => void): void;
77
77
  export declare function archiveMilestone(baseDir: string, waveNum: number): void;
package/dist/state.js CHANGED
@@ -461,7 +461,7 @@ export function loadWaveHistory(runDir) {
461
461
  }
462
462
  }
463
463
  // ── Branch management ──
464
- export function recordBranches(agents, mergeResults, branches) {
464
+ export function recordBranches(agents, mergeResults, branches, currentWave) {
465
465
  for (const a of agents) {
466
466
  if (a.branch) {
467
467
  branches.push({
@@ -475,8 +475,12 @@ export function recordBranches(agents, mergeResults, branches) {
475
475
  }
476
476
  for (const mr of mergeResults) {
477
477
  const br = branches.find(b => b.branch === mr.branch);
478
- if (br)
478
+ if (br) {
479
479
  br.status = mr.ok ? "merged" : "merge-failed";
480
+ if (!mr.ok && !br.firstFailedWave && currentWave !== undefined) {
481
+ br.firstFailedWave = currentWave;
482
+ }
483
+ }
480
484
  }
481
485
  }
482
486
  export function autoMergeBranches(cwd, branches, onLog) {
@@ -1,3 +1,52 @@
1
1
  import type { PermMode, SteerResult, RunMemory, WaveSummary } from "./types.js";
2
2
  import { type PlannerLog } from "./planner-query.js";
3
+ export declare const STEER_SCHEMA: {
4
+ type: "json_schema";
5
+ schema: {
6
+ type: string;
7
+ properties: {
8
+ done: {
9
+ type: string;
10
+ };
11
+ reasoning: {
12
+ type: string;
13
+ };
14
+ statusUpdate: {
15
+ type: string;
16
+ };
17
+ goalUpdate: {
18
+ type: string;
19
+ };
20
+ estimatedSessionsRemaining: {
21
+ type: string;
22
+ };
23
+ tasks: {
24
+ type: string;
25
+ items: {
26
+ type: string;
27
+ properties: {
28
+ prompt: {
29
+ type: string;
30
+ };
31
+ model: {
32
+ type: string;
33
+ };
34
+ noWorktree: {
35
+ type: string;
36
+ };
37
+ type: {
38
+ type: string;
39
+ enum: string[];
40
+ };
41
+ postcondition: {
42
+ type: string;
43
+ };
44
+ };
45
+ required: string[];
46
+ };
47
+ };
48
+ };
49
+ required: string[];
50
+ };
51
+ };
3
52
  export declare function steerWave(objective: string, history: WaveSummary[], remainingBudget: number, cwd: string, plannerModel: string, workerModel: string, fastModel: string | undefined, permissionMode: PermMode, concurrency: number, onLog: PlannerLog, runMemory?: RunMemory, transcriptName?: string): Promise<SteerResult>;
package/dist/steering.js CHANGED
@@ -2,7 +2,10 @@ import { runPlannerQuery, attemptJsonParse, postProcess } from "./planner-query.
2
2
  import { contextConstraintNote } from "./models.js";
3
3
  import { DESIGN_THINKING } from "./planner.js";
4
4
  import { createTurn, beginTurn, endTurn } from "./turns.js";
5
- const STEER_SCHEMA = {
5
+ import { writeFileSync, mkdirSync } from "fs";
6
+ import { join } from "path";
7
+ import { getTranscriptRunDir } from "./transcripts.js";
8
+ export const STEER_SCHEMA = {
6
9
  type: "json_schema",
7
10
  schema: {
8
11
  type: "object",
@@ -24,10 +27,11 @@ const STEER_SCHEMA = {
24
27
  required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
25
28
  },
26
29
  };
27
- export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, fastModel, permissionMode, concurrency, onLog, runMemory, transcriptName = "steer") {
28
- const constraint = contextConstraintNote(workerModel);
29
- const recentWaves = history.slice(-3);
30
- const recentText = recentWaves.length > 0 ? recentWaves.map(w => {
30
+ const PROMPT_BUDGET = 6000;
31
+ /** Build a compact wave summary; keepLast controls how many recent waves to include. */
32
+ function buildRecentText(history, keepLast) {
33
+ const recentWaves = history.slice(-keepLast);
34
+ return recentWaves.length > 0 ? recentWaves.map(w => {
31
35
  const lines = w.tasks.map(t => {
32
36
  const isExecute = !t.type || t.type === "execute";
33
37
  const files = t.filesChanged ? ` (${t.filesChanged} files)` : isExecute ? " (0 files)" : " (read-only)";
@@ -39,16 +43,25 @@ export async function steerWave(objective, history, remainingBudget, cwd, planne
39
43
  const warn = totalExecute > 0 && zeroExecute > totalExecute / 2 ? `\n ⚠ ${zeroExecute}/${totalExecute} execute tasks changed 0 files -- tasks may be mis-scoped or blocked` : "";
40
44
  return `Wave ${w.wave + 1}:\n${lines}${warn}`;
41
45
  }).join("\n\n") : "(first wave)";
46
+ }
47
+ export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, fastModel, permissionMode, concurrency, onLog, runMemory, transcriptName = "steer") {
48
+ const constraint = contextConstraintNote(workerModel);
42
49
  const cap = (s, max) => s.length > max ? s.slice(0, max) + "\n...(truncated)" : s;
43
50
  const statusBlock = runMemory?.status ? `\nCurrent project status:\n${runMemory.status}\n` : "";
44
- const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 4000)}\n` : "";
45
- const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 4000)}\n` : "";
46
- const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 3000)}\n` : "";
47
- const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 3000)}\n` : "";
51
+ const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 2000)}\n` : "";
52
+ const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 1500)}\n` : "";
53
+ const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 1000)}\n` : "";
54
+ const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 1000)}\n` : "";
48
55
  const goalBlock = runMemory?.goal ? `\nNorth star -- what "amazing" means:\n${runMemory.goal}\n` : "";
49
- const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 3000)}\n` : "";
56
+ const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 800)}\n` : "";
50
57
  const guidanceBlock = runMemory?.userGuidance ? `\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\nUSER DIRECTIVES -- highest priority\nThese come directly from the user running this session. They override prior assumptions about status, goal, and next steps. Incorporate them into the wave you compose below. If they conflict with earlier decisions, the user wins. Reflect the new direction in statusUpdate so future waves remember.\n\n${cap(runMemory.userGuidance, 4000)}\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n` : "";
51
- const prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
58
+ // Collapse archetype menu after wave 3 to save ~2 KB
59
+ const archetypesShort = `Archetypes: execute | explore | critique | synthesize | verify | user-test | polish | simplify`;
60
+ const archetypeBlock = history.length >= 3
61
+ ? archetypesShort
62
+ : null;
63
+ let recentText = buildRecentText(history, 3);
64
+ let prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
52
65
  ${guidanceBlock}
53
66
  Objective: ${objective}
54
67
  ${goalBlock}${statusBlock}${milestoneBlock}${prevRunBlock}
@@ -66,7 +79,7 @@ If verification found issues, those are the priority. Fix what's broken before b
66
79
 
67
80
  ## Compose the next wave
68
81
 
69
- You have full creative freedom. Design the wave that will have the highest impact right now. Here are archetypes to draw from -- mix, adapt, or invent your own:
82
+ You have full creative freedom. Design the wave that will have the highest impact right now.${archetypeBlock ? `\n\nUse these archetypes as shorthand — mix, adapt, or invent your own:\n\n${archetypeBlock}` : ` Here are archetypes to draw from -- mix, adapt, or invent your own:
70
83
 
71
84
  **Execute** -- Agents implement concrete changes in parallel. Each touches different files. The bread and butter.
72
85
  Example: 5 agents each owning a different feature or fix
@@ -90,52 +103,86 @@ You have full creative freedom. Design the wave that will have the highest impac
90
103
  Example: 2 agents, one on happy paths, one on error/edge states
91
104
 
92
105
  **Simplify** -- Invoke the 'simplify' skill. It reviews changed code and spawns parallel sub-agents for thorough review.
93
- Example: 1 agent per wave with task type "review", let the skill handle the rest
94
-
95
- You can combine these. A wave can have 3 execute agents + 1 verification agent. Or 2 divergent explorers. Whatever the situation calls for.
106
+ Example: 1 agent per wave with task type "review", let the skill handle the rest`}
96
107
 
97
- For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-N-{topic}.md or .claude-overnight/latest/verifications/wave-N-{topic}.md.
108
+ For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-n-{topic}.md or .claude-overnight/latest/verifications/wave-n-{topic}.md.
98
109
 
99
110
  IMPORTANT: You cannot declare "done" unless at least one verification has confirmed the app works. If you're considering done but haven't verified, compose a verification task first.
100
111
 
101
112
  Respond with ONLY a JSON object (no markdown fences):
102
- {
103
- "done": false,
104
- "reasoning": "your assessment and why you chose this wave composition",
105
- "goalUpdate": "optional -- refine what 'amazing' means as you learn more",
106
- "statusUpdate": "REQUIRED -- concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
107
- "estimatedSessionsRemaining": 15,
108
- "tasks": [
109
- {"prompt": "task instruction...", "model": "worker", "postcondition": "test -f src/new-file.ts"},
110
- {"prompt": "quick icon fix, verified by next wave's workers...", "model": "fast"},
111
- {"prompt": "verify the app end-to-end...", "model": "worker", "noWorktree": true}
112
- ]
113
- }
113
+ {"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"tasks":[{"prompt":"...","model":"worker|fast","noWorktree":true/false,"postcondition":"..."}]}
114
114
 
115
115
  "estimatedSessionsRemaining" is REQUIRED. Your best honest estimate of how many MORE agent sessions (beyond the wave you just composed above) are needed to reach 'amazing' -- include follow-up fixes, polish, verification, and anything else you'd want before shipping. Be realistic, not optimistic. Use 0 only if truly done.
116
116
 
117
- The "model" field on each task — you have **two kinds of workers**, both first-class. Pick the right one per task:
117
+ The "model" field on each task — two kinds of workers. Pick the right one:
118
118
 
119
- **Fast worker — "fast" (${fastModel ?? "not set"})** is the default workhorse for well-scoped, mechanical tasks. It's a real worker, same tools, same environment just a cheaper, faster model. The next wave's workers (fast or main) will catch and fix any issues. Route here by default when any of these apply:
120
- - Single-file edits, refactors, renames
121
- - Surgical multi-line changes with a clear spec (add a param, wrap a call, tweak a prompt line)
122
- - Read/research: scan files, summarize findings
123
- - Build checks, postcondition verification
124
- - E2E test runs with concrete steps
125
- - Simple critiques, polish tweaks
126
- - Running existing scripts/tests and capturing output
127
- - Docs / markdown updates
128
- - Stdlib-only utility scripts with a crisp spec
119
+ **Fast worker — "fast" (${fastModel ?? "not set"})** for well-scoped, mechanical tasks: single-file edits, refactors, renames, read/research, build checks, simple critiques, docs updates.
129
120
 
130
- **Main worker — "worker" (${workerModel})** is for tasks that genuinely need deeper reasoning: multi-file features, complex logic, architectural changes, ambiguous specs, anything where a mis-step costs more than a wave to recover from.
121
+ **Main worker — "worker" (${workerModel})** for tasks that need deeper reasoning: multi-file features, complex logic, architectural changes, ambiguous specs.
131
122
 
132
- When in doubt, pick "fast". Both are workers; the wave loop iterates. Over-using "worker" is a real cost — aim to route the clear majority of well-scoped tasks to the fast worker whenever a fast worker is configured.
123
+ When in doubt, pick "fast".
133
124
 
134
- Set "noWorktree": true for verify/user-test tasks -- they need the real project directory with env files, dependencies, and local config.
125
+ Set "noWorktree": true for verify/user-test tasks.
135
126
 
136
- OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. The framework runs it after merge; if it fails, the agent's "no-op" claim is rejected and the task is retried with the failure output as context. Use it whenever the task has a concrete, machine-checkable outcome. Examples: \`test -f src/tracking/watchlist-poller.ts && grep -q "runWatchlistPoll" src/tracking/watchlist-poller.ts\`, \`grep -q "watchlistPollerTask" src/scraper/scheduler.ts\`, \`pnpm run build\`, \`diff -q src/public/index.html frontend/dist/index.html\`. Keep it cheap (sub-second, no network). Omit for exploratory/research tasks where there is no crisp check.
127
+ OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. Keep it cheap. Omit for exploratory tasks.
137
128
 
138
- If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSessionsRemaining": 0, "tasks": []}`;
129
+ If done: {"done":true,"reasoning":"...","statusUpdate":"...","estimatedSessionsRemaining":0,"tasks":[]}`;
130
+ // ── Hard 6 KB budget: trim non-critical blocks if over limit ──
131
+ let trimmed = 0;
132
+ if (prompt.length > PROMPT_BUDGET) {
133
+ // 1. Keep last 2 waves instead of 3
134
+ recentText = buildRecentText(history, 2);
135
+ prompt = prompt.replace(`Recent waves:\n${buildRecentText(history, 3)}`, `Recent waves:\n${recentText}`);
136
+ trimmed++;
137
+ }
138
+ if (prompt.length > PROMPT_BUDGET && runMemory?.milestones) {
139
+ const old = `\nMilestone snapshots:\n${cap(runMemory.milestones, 2000)}\n`;
140
+ const neu = `\nMilestone snapshots:\n${cap(runMemory.milestones, 1000)}\n`;
141
+ if (old !== neu) {
142
+ prompt = prompt.replace(old, neu);
143
+ trimmed++;
144
+ }
145
+ }
146
+ if (prompt.length > PROMPT_BUDGET && runMemory?.designs) {
147
+ const old = `\nArchitectural research:\n${cap(runMemory.designs, 1500)}\n`;
148
+ const neu = `\nArchitectural research:\n${cap(runMemory.designs, 1000)}\n`;
149
+ if (old !== neu) {
150
+ prompt = prompt.replace(old, neu);
151
+ trimmed++;
152
+ }
153
+ }
154
+ if (prompt.length > PROMPT_BUDGET && runMemory?.reflections) {
155
+ const old = `\nLatest quality reports:\n${cap(runMemory.reflections, 1000)}\n`;
156
+ const neu = `\nLatest quality reports:\n${cap(runMemory.reflections, 500)}\n`;
157
+ if (old !== neu) {
158
+ prompt = prompt.replace(old, neu);
159
+ trimmed++;
160
+ }
161
+ }
162
+ if (prompt.length > PROMPT_BUDGET && runMemory?.verifications) {
163
+ const old = `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 1000)}\n`;
164
+ const neu = `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 500)}\n`;
165
+ if (old !== neu) {
166
+ prompt = prompt.replace(old, neu);
167
+ trimmed++;
168
+ }
169
+ }
170
+ if (prompt.length > PROMPT_BUDGET && runMemory?.previousRuns) {
171
+ const old = `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 800)}\n`;
172
+ const neu = `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 400)}\n`;
173
+ if (old !== neu) {
174
+ prompt = prompt.replace(old, neu);
175
+ trimmed++;
176
+ }
177
+ }
178
+ if (trimmed > 0) {
179
+ onLog(`Steering prompt trimmed ${trimmed} blocks (${prompt.length}/${PROMPT_BUDGET} chars)`, "event");
180
+ }
181
+ // ── Non-Claude planner JSON hardening ──
182
+ if (!/^claude/i.test(plannerModel)) {
183
+ const directive = `OUTPUT: single JSON object. No prose. No markdown fences.`;
184
+ prompt = `${directive}\n\n${prompt}\n\n${directive}`;
185
+ }
139
186
  onLog("Assessing...", "status");
140
187
  onLog(`Reading codebase -- wave ${history.length + 1}`, "event");
141
188
  const turn = createTurn("steer", `Steer wave ${history.length + 1}`, `steer-${history.length}`, plannerModel);
@@ -146,11 +193,34 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSes
146
193
  if (first)
147
194
  return first;
148
195
  onLog(`Steering parse failed (${resultText.length} chars). Asking model to fix...`, "event");
196
+ // C2: persist raw output on parse failure
197
+ const steerDir = getTranscriptRunDir() ? join(getTranscriptRunDir(), "steering") : undefined;
198
+ if (steerDir) {
199
+ try {
200
+ mkdirSync(steerDir, { recursive: true });
201
+ }
202
+ catch { }
203
+ // Extract wave info from transcriptName (e.g. "steer-wave-32-attempt-1")
204
+ const waveMatch = transcriptName.match(/wave-(\d+)-attempt-(\d+)/);
205
+ if (waveMatch) {
206
+ writeFileSync(join(steerDir, `wave-${waveMatch[1]}-attempt-${waveMatch[2]}-raw.txt`), resultText, "utf-8");
207
+ }
208
+ }
149
209
  const snippet = resultText.length > 2000 ? resultText.slice(0, 1000) + "\n...\n" + resultText.slice(-800) : resultText;
150
210
  const retryText = await runPlannerQuery(`Your previous steering response could not be parsed as JSON. Here is what you returned:\n\n---\n${snippet}\n---\n\nExtract or rewrite the above as ONLY a valid JSON object with this schema: {"done":boolean,"reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}\n\nRespond with ONLY the JSON, no markdown fences, no explanation.`, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA, transcriptName: `${transcriptName}-retry`, turnId: turn.id }, onLog);
151
211
  const retryParsed = attemptJsonParse(retryText);
152
212
  if (retryParsed)
153
213
  return retryParsed;
214
+ // C2: persist retry raw output
215
+ if (steerDir) {
216
+ try {
217
+ const waveMatch2 = transcriptName.match(/wave-(\d+)-attempt-(\d+)/);
218
+ if (waveMatch2) {
219
+ writeFileSync(join(steerDir, `wave-${waveMatch2[1]}-attempt-${waveMatch2[2]}-retry-raw.txt`), retryText, "utf-8");
220
+ }
221
+ }
222
+ catch { }
223
+ }
154
224
  throw new Error(`Could not parse steering response after retry (${resultText.length} chars: ${resultText.slice(0, 120)}...)`);
155
225
  })();
156
226
  const isDone = parsed.done === true;
@@ -1,5 +1,5 @@
1
1
  export declare function setTranscriptRunDir(dir: string | undefined): void;
2
2
  export declare function getTranscriptRunDir(): string | undefined;
3
3
  export declare function transcriptPath(name: string): string | undefined;
4
- /** Append a single event; silent on error (disk full, permission, etc.). */
4
+ /** Append a single event; log to stderr once per name on failure (C5). */
5
5
  export declare function writeTranscriptEvent(name: string, event: Record<string, unknown>): void;
@@ -25,7 +25,9 @@ export function getTranscriptRunDir() {
25
25
  export function transcriptPath(name) {
26
26
  return _runDir ? join(_runDir, "transcripts", `${name}.ndjson`) : undefined;
27
27
  }
28
- /** Append a single event; silent on error (disk full, permission, etc.). */
28
+ /** Names that already errored guard against repeated stderr spam. */
29
+ const _seenErrors = new Set();
30
+ /** Append a single event; log to stderr once per name on failure (C5). */
29
31
  export function writeTranscriptEvent(name, event) {
30
32
  const path = transcriptPath(name);
31
33
  if (!path)
@@ -34,5 +36,11 @@ export function writeTranscriptEvent(name, event) {
34
36
  mkdirSync(dirname(path), { recursive: true });
35
37
  appendFileSync(path, JSON.stringify({ t: Date.now(), ...event }) + "\n", "utf-8");
36
38
  }
37
- catch { }
39
+ catch (err) {
40
+ if (!_seenErrors.has(name)) {
41
+ _seenErrors.add(name);
42
+ const msg = err instanceof Error ? err.message : String(err);
43
+ process.stderr.write(`[transcript] writeTranscriptEvent("${name}") failed: ${msg}\n`);
44
+ }
45
+ }
38
46
  }
package/dist/types.d.ts CHANGED
@@ -156,9 +156,10 @@ export type MergeStrategy = "yolo" | "branch";
156
156
  export interface BranchRecord {
157
157
  branch: string;
158
158
  taskPrompt: string;
159
- status: "merged" | "unmerged" | "failed" | "merge-failed";
159
+ status: "merged" | "unmerged" | "failed" | "merge-failed" | "discarded";
160
160
  filesChanged: number;
161
161
  costUsd: number;
162
+ firstFailedWave?: number;
162
163
  }
163
164
  /** Per-window rate limit snapshot (matches SDK rateLimitType). */
164
165
  export interface RateLimitWindow {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.25.42",
3
+ "version": "1.25.43",
4
4
  "description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.25.42",
3
+ "version": "1.25.43",
4
4
  "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
5
5
  "author": {
6
6
  "name": "Francesco Fornace"