claude-overnight 1.25.42 → 1.25.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/planner-query.js +15 -0
- package/dist/providers.js +5 -0
- package/dist/run.js +150 -29
- package/dist/state.d.ts +1 -1
- package/dist/state.js +6 -2
- package/dist/steering.d.ts +49 -0
- package/dist/steering.js +114 -44
- package/dist/transcripts.d.ts +1 -1
- package/dist/transcripts.js +10 -2
- package/dist/types.d.ts +2 -1
- package/package.json +1 -1
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
package/dist/planner-query.js
CHANGED
|
@@ -619,6 +619,21 @@ function extractOutermostBraces(text) {
|
|
|
619
619
|
return null;
|
|
620
620
|
}
|
|
621
621
|
export function attemptJsonParse(text) {
|
|
622
|
+
// Strip conversational prefaces/suffixes that weak-schema models sometimes
|
|
623
|
+
// wrap around the JSON body (e.g. "Here is the JSON: { ... } Let me know…").
|
|
624
|
+
const preface = /^\s*(?:Here (?:is|are)[^{]*|Let me[^{]*|I'?ll[^{]*|Sure[^{]*|Okay[^{]*)/i;
|
|
625
|
+
const suffix = /\n\n(?:Let me know|Hope this|Please let me)[\s\S]*$/i;
|
|
626
|
+
if (preface.test(text) || suffix.test(text)) {
|
|
627
|
+
const cleaned = text.replace(preface, "").replace(suffix, "").trim();
|
|
628
|
+
if (cleaned && cleaned !== text) {
|
|
629
|
+
try {
|
|
630
|
+
const obj = JSON.parse(cleaned);
|
|
631
|
+
if (typeof obj === "object" && obj !== null)
|
|
632
|
+
return obj;
|
|
633
|
+
}
|
|
634
|
+
catch { }
|
|
635
|
+
}
|
|
636
|
+
}
|
|
622
637
|
try {
|
|
623
638
|
const obj = JSON.parse(text);
|
|
624
639
|
if (typeof obj === "object" && obj !== null)
|
package/dist/providers.js
CHANGED
|
@@ -178,6 +178,11 @@ export function envFor(p) {
|
|
|
178
178
|
base.ANTHROPIC_AUTH_TOKEN = key;
|
|
179
179
|
}
|
|
180
180
|
delete base.ANTHROPIC_API_KEY;
|
|
181
|
+
// Prevent CURSOR_API_KEY from leaking into non-proxy envs — would cause
|
|
182
|
+
// isCursorProxyEnv false-positive, silently rerouting through direct fetch
|
|
183
|
+
// which ignores outputFormat (no JSON schema enforcement).
|
|
184
|
+
delete base.CURSOR_API_KEY;
|
|
185
|
+
delete base.CURSOR_AUTH_TOKEN;
|
|
181
186
|
return base;
|
|
182
187
|
}
|
|
183
188
|
/**
|
package/dist/run.js
CHANGED
|
@@ -3,8 +3,8 @@ import { join } from "path";
|
|
|
3
3
|
import { execSync } from "child_process";
|
|
4
4
|
import chalk from "chalk";
|
|
5
5
|
import { Swarm } from "./swarm.js";
|
|
6
|
-
import { steerWave } from "./steering.js";
|
|
7
|
-
import { getTotalPlannerCost, getPlannerRateLimitInfo, getPeakPlannerContext, runPlannerQuery, setPlannerEnvResolver } from "./planner-query.js";
|
|
6
|
+
import { steerWave, STEER_SCHEMA } from "./steering.js";
|
|
7
|
+
import { getTotalPlannerCost, getPlannerRateLimitInfo, getPeakPlannerContext, runPlannerQuery, setPlannerEnvResolver, attemptJsonParse } from "./planner-query.js";
|
|
8
8
|
import { contextFillInfo } from "./render.js";
|
|
9
9
|
import { getModelCapability } from "./models.js";
|
|
10
10
|
import { buildEnvResolver, isCursorProxyProvider } from "./providers.js";
|
|
@@ -55,6 +55,8 @@ export async function executeRun(cfg) {
|
|
|
55
55
|
let lastCapped = false, lastAborted = false, objectiveComplete = false;
|
|
56
56
|
let lastEstimate;
|
|
57
57
|
const branches = [];
|
|
58
|
+
let healFailStreak = 0; // consecutive waves where heal-0 agent changed 0 files
|
|
59
|
+
let zeroFileWaves = 0; // consecutive waves with 0 files across non-heal tasks
|
|
58
60
|
if (cfg.resuming && cfg.resumeState) {
|
|
59
61
|
const rs = cfg.resumeState;
|
|
60
62
|
remaining = Math.max(1, rs.remaining);
|
|
@@ -295,8 +297,21 @@ export async function executeRun(cfg) {
|
|
|
295
297
|
// Shared steering logic used by both resume-steering and in-loop steering
|
|
296
298
|
const runSteering = async () => {
|
|
297
299
|
let steered = false;
|
|
300
|
+
// ── B1: Skip steering when ≥2 unresolved merge-failed branches exist ──
|
|
301
|
+
const mergeFailedBranches = branches.filter(b => b.status === "merge-failed");
|
|
302
|
+
if (mergeFailedBranches.length >= 2) {
|
|
303
|
+
currentTasks = mergeFailedBranches.map((b, i) => ({
|
|
304
|
+
id: `branch-retry-${i}`,
|
|
305
|
+
prompt: `Your previous attempt at this task merge-failed against main. Redo it against the current state of main with minimal, focused edits. Original task:\n\n${b.taskPrompt}`,
|
|
306
|
+
model: workerModel,
|
|
307
|
+
postcondition: "pnpm run build",
|
|
308
|
+
}));
|
|
309
|
+
display.appendSteeringEvent(`Skipping steering — ${mergeFailedBranches.length} merge-failed branches form the wave`);
|
|
310
|
+
return true;
|
|
311
|
+
}
|
|
298
312
|
let steerAttempts = 0;
|
|
299
|
-
|
|
313
|
+
const MAX_STEER_ATTEMPTS = 2; // B2: retry threshold 3 → 2
|
|
314
|
+
while (!steered && remaining > 0 && !stopping && steerAttempts < MAX_STEER_ATTEMPTS) {
|
|
300
315
|
steerAttempts++;
|
|
301
316
|
const plannerCostBefore = getTotalPlannerCost();
|
|
302
317
|
try {
|
|
@@ -350,23 +365,52 @@ export async function executeRun(cfg) {
|
|
|
350
365
|
}
|
|
351
366
|
catch (err) {
|
|
352
367
|
accCost += getTotalPlannerCost() - plannerCostBefore;
|
|
353
|
-
|
|
354
|
-
|
|
368
|
+
const rawPreview = err?.message?.slice(0, 200) || "(no details)";
|
|
369
|
+
if (steerAttempts < MAX_STEER_ATTEMPTS) {
|
|
370
|
+
display.appendSteeringEvent(`Steering failed (attempt ${steerAttempts}/${MAX_STEER_ATTEMPTS}) -- retrying... ${rawPreview}`);
|
|
355
371
|
continue;
|
|
356
372
|
}
|
|
357
|
-
|
|
358
|
-
|
|
373
|
+
// ── B3: Decomposer fallback (replaces single-giant-fallback) ──
|
|
374
|
+
display.appendSteeringEvent(`Steering failed ${MAX_STEER_ATTEMPTS}× — decomposer fallback`);
|
|
375
|
+
// First: try merge-failed recycling even if only 1 unresolved branch exists
|
|
376
|
+
const stillFailed = branches.filter(b => b.status === "merge-failed");
|
|
377
|
+
if (stillFailed.length >= 1) {
|
|
378
|
+
currentTasks = stillFailed.map((b, i) => ({
|
|
379
|
+
id: `branch-retry-${i}`,
|
|
380
|
+
prompt: `Your previous attempt at this task merge-failed against main. Redo it against the current state of main with minimal, focused edits. Original task:\n\n${b.taskPrompt}`,
|
|
381
|
+
model: workerModel,
|
|
382
|
+
postcondition: "pnpm run build",
|
|
383
|
+
}));
|
|
384
|
+
display.appendSteeringEvent(`Decomposer: ${stillFailed.length} merge-failed branch(es) retried as swarm tasks`);
|
|
385
|
+
steered = true;
|
|
386
|
+
break;
|
|
387
|
+
}
|
|
388
|
+
// Second: minimal-prompt planner query
|
|
389
|
+
display.appendSteeringEvent("Decomposer: minimal planner query…");
|
|
359
390
|
try {
|
|
360
|
-
|
|
391
|
+
let statusText = "";
|
|
392
|
+
try {
|
|
393
|
+
statusText = readFileSync(join(runDir, "status.md"), "utf-8");
|
|
394
|
+
}
|
|
395
|
+
catch { }
|
|
396
|
+
const minimalPrompt = `${objective ? `Objective: ${objective}` : ""}\n\nStatus:\n${statusText || "(none)"}\n\nReturn tasks: string[] — 3-6 specific follow-ups. JSON only. {"tasks":[{"prompt":"..."}]}`;
|
|
397
|
+
const minimalText = await runPlannerQuery(minimalPrompt, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA, transcriptName: "decomposer-minimal", maxTurns: 40 }, () => { });
|
|
398
|
+
const parsed = attemptJsonParse(minimalText);
|
|
399
|
+
if (parsed?.tasks?.length > 0) {
|
|
400
|
+
currentTasks = parsed.tasks.map((t, i) => ({
|
|
401
|
+
id: `decompose-${i}`,
|
|
402
|
+
prompt: typeof t === "string" ? t : t.prompt,
|
|
403
|
+
model: workerModel,
|
|
404
|
+
}));
|
|
405
|
+
display.appendSteeringEvent(`Decomposer: ${currentTasks.length} tasks from minimal planner`);
|
|
406
|
+
steered = true;
|
|
407
|
+
break;
|
|
408
|
+
}
|
|
361
409
|
}
|
|
362
410
|
catch { }
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
type: "execute",
|
|
367
|
-
}];
|
|
368
|
-
steered = true;
|
|
369
|
-
break;
|
|
411
|
+
// Finally: halt
|
|
412
|
+
display.appendSteeringEvent(`Decomposer: no tasks produced — halting`);
|
|
413
|
+
return false;
|
|
370
414
|
}
|
|
371
415
|
}
|
|
372
416
|
return steered;
|
|
@@ -389,12 +433,26 @@ export async function executeRun(cfg) {
|
|
|
389
433
|
// Health check before each wave: a broken build poisons every subsequent
|
|
390
434
|
// agent context, so prepend a heal task when detected. Steering-planned
|
|
391
435
|
// tasks still run, just after the build is green again.
|
|
436
|
+
// Skip if prior heal changed 0 files (heal unable to fix).
|
|
392
437
|
{
|
|
393
|
-
const
|
|
394
|
-
if (
|
|
395
|
-
const
|
|
396
|
-
|
|
397
|
-
|
|
438
|
+
const healTasks = healFailStreak > 0 ? [] : checkProjectHealth(cwd);
|
|
439
|
+
if (healTasks.length > 0 && remaining > 0) {
|
|
440
|
+
const healIds = healTasks.map(t => t.id);
|
|
441
|
+
const withoutDup = currentTasks.filter(t => !healIds.includes(t.id));
|
|
442
|
+
currentTasks = [...healTasks, ...withoutDup];
|
|
443
|
+
display.appendSteeringEvent(`Health check: build broken — queued ${healTasks.length} heal task(s)`);
|
|
444
|
+
}
|
|
445
|
+
else if (healTasks.length === 0 && healFailStreak > 0 && checkProjectHealth(cwd).length > 0) {
|
|
446
|
+
display.appendSteeringEvent(`Health check: build broken — heal skipped after ${healFailStreak} failed attempts, needs manual intervention`);
|
|
447
|
+
try {
|
|
448
|
+
const statusPath2 = join(runDir, "status.md");
|
|
449
|
+
const existing2 = existsSync(statusPath2) ? readFileSync(statusPath2, "utf-8") : "";
|
|
450
|
+
const marker = "## Heal blocked";
|
|
451
|
+
if (!existing2.includes(marker)) {
|
|
452
|
+
writeFileSync(statusPath2, `${existing2}${existing2 ? "\n\n" : ""}${marker}\nBuild has been broken for ${healFailStreak} waves, heal agents unable to fix — intervene manually.\n`, "utf-8");
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
catch { }
|
|
398
456
|
}
|
|
399
457
|
}
|
|
400
458
|
if (currentTasks.length > remaining)
|
|
@@ -598,7 +656,7 @@ export async function executeRun(cfg) {
|
|
|
598
656
|
liveConfig.remaining = remaining;
|
|
599
657
|
lastCapped = swarm.cappedOut;
|
|
600
658
|
lastAborted = swarm.aborted;
|
|
601
|
-
recordBranches(swarm.agents, swarm.mergeResults, branches);
|
|
659
|
+
recordBranches(swarm.agents, swarm.mergeResults, branches, waveNum);
|
|
602
660
|
saveWaveSession(runDir, waveNum, swarm.agents, swarm.totalCostUsd);
|
|
603
661
|
// Tasks that never made it into the swarm (queue cleared on abort/cap)
|
|
604
662
|
// are preserved as currentTasks so resume picks them up. Budget for these
|
|
@@ -623,6 +681,34 @@ export async function executeRun(cfg) {
|
|
|
623
681
|
};
|
|
624
682
|
}),
|
|
625
683
|
});
|
|
684
|
+
// Track heal fail streak: if a heal-0 task existed this wave and changed 0 files, increment.
|
|
685
|
+
// If any non-heal execute task changed files, reset.
|
|
686
|
+
const lastWave = waveHistory[waveHistory.length - 1];
|
|
687
|
+
const healTask = lastWave?.tasks.find(t => t.type === "heal");
|
|
688
|
+
if (healTask && !healTask.filesChanged) {
|
|
689
|
+
healFailStreak++;
|
|
690
|
+
}
|
|
691
|
+
else if (lastWave?.tasks.some(t => (t.type !== "heal") && (t.filesChanged ?? 0) > 0)) {
|
|
692
|
+
healFailStreak = 0;
|
|
693
|
+
}
|
|
694
|
+
// C1: Circuit breaker — halt after 2 consecutive waves with 0 files across non-heal tasks
|
|
695
|
+
const nonHealFiles = lastWave?.tasks.filter(t => t.type !== "heal").reduce((sum, t) => sum + (t.filesChanged ?? 0), 0) ?? 0;
|
|
696
|
+
if (nonHealFiles === 0 && waveNum > 0) {
|
|
697
|
+
zeroFileWaves++;
|
|
698
|
+
if (zeroFileWaves >= 2) {
|
|
699
|
+
display.appendSteeringEvent(`Circuit breaker: 2 consecutive waves produced no merged changes — halting to prevent budget drain`);
|
|
700
|
+
display.stop();
|
|
701
|
+
saveRunState(runDir, buildRunState({ remaining, phase: "stopped", currentTasks: [] }));
|
|
702
|
+
display.stop();
|
|
703
|
+
restore();
|
|
704
|
+
console.log(chalk.red(`\n Circuit breaker: 2 consecutive waves produced no merged changes.`));
|
|
705
|
+
console.log(chalk.red(` Halting to prevent budget drain. Run preserved at ${runDir}.`));
|
|
706
|
+
process.exit(3);
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
else {
|
|
710
|
+
zeroFileWaves = 0;
|
|
711
|
+
}
|
|
626
712
|
// Hook-blocked work: agents that touched files but nothing landed on the
|
|
627
713
|
// branch (pre-commit hooks, gitignore, writes outside worktree). Surface
|
|
628
714
|
// as a wave-level warning so steering sees it, not just a per-agent log.
|
|
@@ -670,6 +756,20 @@ export async function executeRun(cfg) {
|
|
|
670
756
|
}
|
|
671
757
|
if (next !== existing)
|
|
672
758
|
writeFileSync(statusPath, next, "utf-8");
|
|
759
|
+
// GC ghost branches: delete merge-failed branches ≥2 waves old and mark discarded.
|
|
760
|
+
// Safe: their work never landed. The decomposer (Phase B) will re-attempt from saved taskPrompt.
|
|
761
|
+
const gcCandidates = branches.filter(b => b.status === "merge-failed" && b.firstFailedWave !== undefined && (waveNum - b.firstFailedWave) >= 2);
|
|
762
|
+
let gcCount = 0;
|
|
763
|
+
for (const b of gcCandidates) {
|
|
764
|
+
try {
|
|
765
|
+
execSync(`git branch -D "${b.branch}"`, { cwd, stdio: "ignore" });
|
|
766
|
+
}
|
|
767
|
+
catch { }
|
|
768
|
+
b.status = "discarded";
|
|
769
|
+
gcCount++;
|
|
770
|
+
}
|
|
771
|
+
if (gcCount > 0)
|
|
772
|
+
display.appendSteeringEvent(`GC: discarded ${gcCount} ghost branch(es) ≥2 waves old`);
|
|
673
773
|
}
|
|
674
774
|
catch { }
|
|
675
775
|
// Fire-and-forget debrief after each wave.
|
|
@@ -1039,24 +1139,45 @@ async function promptBudgetExtension(ctx) {
|
|
|
1039
1139
|
return suggested;
|
|
1040
1140
|
return n;
|
|
1041
1141
|
}
|
|
1142
|
+
/** Detect build errors and return one or more heal tasks. If errors span ≥2 files,
|
|
1143
|
+
* emit one task per file so they heal in parallel without merge conflicts. */
|
|
1042
1144
|
function checkProjectHealth(cwd) {
|
|
1043
1145
|
const cmd = detectHealthCommand(cwd);
|
|
1044
1146
|
if (!cmd)
|
|
1045
|
-
return
|
|
1147
|
+
return [];
|
|
1046
1148
|
try {
|
|
1047
1149
|
execSync(cmd, { cwd, encoding: "utf-8", stdio: "pipe", timeout: 60_000 });
|
|
1048
|
-
return
|
|
1150
|
+
return [];
|
|
1049
1151
|
}
|
|
1050
1152
|
catch (err) {
|
|
1051
1153
|
if (err.killed)
|
|
1052
|
-
return
|
|
1154
|
+
return [];
|
|
1053
1155
|
const output = ((err.stdout || "") + "\n" + (err.stderr || "")).trim();
|
|
1054
1156
|
const trimmed = output.length > 4000 ? output.slice(0, 2000) + "\n…\n" + output.slice(-2000) : output;
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1157
|
+
// B4: Split heal by file — extract distinct source file paths from errors
|
|
1158
|
+
const fileRe = /\/src\/[\w./-]+\.(ts|tsx|js|jsx)/g;
|
|
1159
|
+
const files = new Set();
|
|
1160
|
+
for (const m of trimmed.matchAll(fileRe))
|
|
1161
|
+
files.add(m[0]);
|
|
1162
|
+
if (files.size >= 2) {
|
|
1163
|
+
// One task per file — each agent gets only that file's error context
|
|
1164
|
+
const fileErrors = new Map();
|
|
1165
|
+
for (const f of files) {
|
|
1166
|
+
// Extract lines mentioning this file
|
|
1167
|
+
const lines = trimmed.split("\n").filter(l => l.includes(f));
|
|
1168
|
+
fileErrors.set(f, lines.slice(0, 30).join("\n"));
|
|
1169
|
+
}
|
|
1170
|
+
return Array.from(fileErrors.entries()).map(([file, errs], i) => ({
|
|
1171
|
+
id: `heal-${i}`,
|
|
1172
|
+
prompt: `Fix the broken build errors in \`${file}\`. \`${cmd}\` fails:\n\`\`\`\n${errs}\n\`\`\`\nFix every error in this file. Run \`${cmd}\` when done to verify.`,
|
|
1173
|
+
type: "heal",
|
|
1174
|
+
}));
|
|
1175
|
+
}
|
|
1176
|
+
return [{
|
|
1177
|
+
id: "heal-0",
|
|
1178
|
+
prompt: `Fix the broken build. \`${cmd}\` fails after merging parallel work:\n\`\`\`\n${trimmed}\n\`\`\`\nFix every error. Run \`${cmd}\` when done to verify.`,
|
|
1179
|
+
type: "heal",
|
|
1180
|
+
}];
|
|
1060
1181
|
}
|
|
1061
1182
|
}
|
|
1062
1183
|
function detectHealthCommand(cwd) {
|
package/dist/state.d.ts
CHANGED
|
@@ -72,6 +72,6 @@ export declare function recordBranches(agents: {
|
|
|
72
72
|
}[], mergeResults: {
|
|
73
73
|
branch: string;
|
|
74
74
|
ok: boolean;
|
|
75
|
-
}[], branches: BranchRecord[]): void;
|
|
75
|
+
}[], branches: BranchRecord[], currentWave?: number): void;
|
|
76
76
|
export declare function autoMergeBranches(cwd: string, branches: BranchRecord[], onLog: (msg: string) => void): void;
|
|
77
77
|
export declare function archiveMilestone(baseDir: string, waveNum: number): void;
|
package/dist/state.js
CHANGED
|
@@ -461,7 +461,7 @@ export function loadWaveHistory(runDir) {
|
|
|
461
461
|
}
|
|
462
462
|
}
|
|
463
463
|
// ── Branch management ──
|
|
464
|
-
export function recordBranches(agents, mergeResults, branches) {
|
|
464
|
+
export function recordBranches(agents, mergeResults, branches, currentWave) {
|
|
465
465
|
for (const a of agents) {
|
|
466
466
|
if (a.branch) {
|
|
467
467
|
branches.push({
|
|
@@ -475,8 +475,12 @@ export function recordBranches(agents, mergeResults, branches) {
|
|
|
475
475
|
}
|
|
476
476
|
for (const mr of mergeResults) {
|
|
477
477
|
const br = branches.find(b => b.branch === mr.branch);
|
|
478
|
-
if (br)
|
|
478
|
+
if (br) {
|
|
479
479
|
br.status = mr.ok ? "merged" : "merge-failed";
|
|
480
|
+
if (!mr.ok && !br.firstFailedWave && currentWave !== undefined) {
|
|
481
|
+
br.firstFailedWave = currentWave;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
480
484
|
}
|
|
481
485
|
}
|
|
482
486
|
export function autoMergeBranches(cwd, branches, onLog) {
|
package/dist/steering.d.ts
CHANGED
|
@@ -1,3 +1,52 @@
|
|
|
1
1
|
import type { PermMode, SteerResult, RunMemory, WaveSummary } from "./types.js";
|
|
2
2
|
import { type PlannerLog } from "./planner-query.js";
|
|
3
|
+
export declare const STEER_SCHEMA: {
|
|
4
|
+
type: "json_schema";
|
|
5
|
+
schema: {
|
|
6
|
+
type: string;
|
|
7
|
+
properties: {
|
|
8
|
+
done: {
|
|
9
|
+
type: string;
|
|
10
|
+
};
|
|
11
|
+
reasoning: {
|
|
12
|
+
type: string;
|
|
13
|
+
};
|
|
14
|
+
statusUpdate: {
|
|
15
|
+
type: string;
|
|
16
|
+
};
|
|
17
|
+
goalUpdate: {
|
|
18
|
+
type: string;
|
|
19
|
+
};
|
|
20
|
+
estimatedSessionsRemaining: {
|
|
21
|
+
type: string;
|
|
22
|
+
};
|
|
23
|
+
tasks: {
|
|
24
|
+
type: string;
|
|
25
|
+
items: {
|
|
26
|
+
type: string;
|
|
27
|
+
properties: {
|
|
28
|
+
prompt: {
|
|
29
|
+
type: string;
|
|
30
|
+
};
|
|
31
|
+
model: {
|
|
32
|
+
type: string;
|
|
33
|
+
};
|
|
34
|
+
noWorktree: {
|
|
35
|
+
type: string;
|
|
36
|
+
};
|
|
37
|
+
type: {
|
|
38
|
+
type: string;
|
|
39
|
+
enum: string[];
|
|
40
|
+
};
|
|
41
|
+
postcondition: {
|
|
42
|
+
type: string;
|
|
43
|
+
};
|
|
44
|
+
};
|
|
45
|
+
required: string[];
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
required: string[];
|
|
50
|
+
};
|
|
51
|
+
};
|
|
3
52
|
export declare function steerWave(objective: string, history: WaveSummary[], remainingBudget: number, cwd: string, plannerModel: string, workerModel: string, fastModel: string | undefined, permissionMode: PermMode, concurrency: number, onLog: PlannerLog, runMemory?: RunMemory, transcriptName?: string): Promise<SteerResult>;
|
package/dist/steering.js
CHANGED
|
@@ -2,7 +2,10 @@ import { runPlannerQuery, attemptJsonParse, postProcess } from "./planner-query.
|
|
|
2
2
|
import { contextConstraintNote } from "./models.js";
|
|
3
3
|
import { DESIGN_THINKING } from "./planner.js";
|
|
4
4
|
import { createTurn, beginTurn, endTurn } from "./turns.js";
|
|
5
|
-
|
|
5
|
+
import { writeFileSync, mkdirSync } from "fs";
|
|
6
|
+
import { join } from "path";
|
|
7
|
+
import { getTranscriptRunDir } from "./transcripts.js";
|
|
8
|
+
export const STEER_SCHEMA = {
|
|
6
9
|
type: "json_schema",
|
|
7
10
|
schema: {
|
|
8
11
|
type: "object",
|
|
@@ -24,10 +27,11 @@ const STEER_SCHEMA = {
|
|
|
24
27
|
required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
|
|
25
28
|
},
|
|
26
29
|
};
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const
|
|
30
|
+
const PROMPT_BUDGET = 6000;
|
|
31
|
+
/** Build a compact wave summary; keepLast controls how many recent waves to include. */
|
|
32
|
+
function buildRecentText(history, keepLast) {
|
|
33
|
+
const recentWaves = history.slice(-keepLast);
|
|
34
|
+
return recentWaves.length > 0 ? recentWaves.map(w => {
|
|
31
35
|
const lines = w.tasks.map(t => {
|
|
32
36
|
const isExecute = !t.type || t.type === "execute";
|
|
33
37
|
const files = t.filesChanged ? ` (${t.filesChanged} files)` : isExecute ? " (0 files)" : " (read-only)";
|
|
@@ -39,16 +43,25 @@ export async function steerWave(objective, history, remainingBudget, cwd, planne
|
|
|
39
43
|
const warn = totalExecute > 0 && zeroExecute > totalExecute / 2 ? `\n ⚠ ${zeroExecute}/${totalExecute} execute tasks changed 0 files -- tasks may be mis-scoped or blocked` : "";
|
|
40
44
|
return `Wave ${w.wave + 1}:\n${lines}${warn}`;
|
|
41
45
|
}).join("\n\n") : "(first wave)";
|
|
46
|
+
}
|
|
47
|
+
export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, fastModel, permissionMode, concurrency, onLog, runMemory, transcriptName = "steer") {
|
|
48
|
+
const constraint = contextConstraintNote(workerModel);
|
|
42
49
|
const cap = (s, max) => s.length > max ? s.slice(0, max) + "\n...(truncated)" : s;
|
|
43
50
|
const statusBlock = runMemory?.status ? `\nCurrent project status:\n${runMemory.status}\n` : "";
|
|
44
|
-
const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones,
|
|
45
|
-
const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs,
|
|
46
|
-
const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections,
|
|
47
|
-
const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications,
|
|
51
|
+
const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 2000)}\n` : "";
|
|
52
|
+
const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 1500)}\n` : "";
|
|
53
|
+
const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 1000)}\n` : "";
|
|
54
|
+
const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 1000)}\n` : "";
|
|
48
55
|
const goalBlock = runMemory?.goal ? `\nNorth star -- what "amazing" means:\n${runMemory.goal}\n` : "";
|
|
49
|
-
const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns,
|
|
56
|
+
const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 800)}\n` : "";
|
|
50
57
|
const guidanceBlock = runMemory?.userGuidance ? `\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\nUSER DIRECTIVES -- highest priority\nThese come directly from the user running this session. They override prior assumptions about status, goal, and next steps. Incorporate them into the wave you compose below. If they conflict with earlier decisions, the user wins. Reflect the new direction in statusUpdate so future waves remember.\n\n${cap(runMemory.userGuidance, 4000)}\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n` : "";
|
|
51
|
-
|
|
58
|
+
// Collapse archetype menu after wave 3 to save ~2 KB
|
|
59
|
+
const archetypesShort = `Archetypes: execute | explore | critique | synthesize | verify | user-test | polish | simplify`;
|
|
60
|
+
const archetypeBlock = history.length >= 3
|
|
61
|
+
? archetypesShort
|
|
62
|
+
: null;
|
|
63
|
+
let recentText = buildRecentText(history, 3);
|
|
64
|
+
let prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
|
|
52
65
|
${guidanceBlock}
|
|
53
66
|
Objective: ${objective}
|
|
54
67
|
${goalBlock}${statusBlock}${milestoneBlock}${prevRunBlock}
|
|
@@ -66,7 +79,7 @@ If verification found issues, those are the priority. Fix what's broken before b
|
|
|
66
79
|
|
|
67
80
|
## Compose the next wave
|
|
68
81
|
|
|
69
|
-
You have full creative freedom. Design the wave that will have the highest impact right now
|
|
82
|
+
You have full creative freedom. Design the wave that will have the highest impact right now.${archetypeBlock ? `\n\nUse these archetypes as shorthand — mix, adapt, or invent your own:\n\n${archetypeBlock}` : ` Here are archetypes to draw from -- mix, adapt, or invent your own:
|
|
70
83
|
|
|
71
84
|
**Execute** -- Agents implement concrete changes in parallel. Each touches different files. The bread and butter.
|
|
72
85
|
Example: 5 agents each owning a different feature or fix
|
|
@@ -90,52 +103,86 @@ You have full creative freedom. Design the wave that will have the highest impac
|
|
|
90
103
|
Example: 2 agents, one on happy paths, one on error/edge states
|
|
91
104
|
|
|
92
105
|
**Simplify** -- Invoke the 'simplify' skill. It reviews changed code and spawns parallel sub-agents for thorough review.
|
|
93
|
-
Example: 1 agent per wave with task type "review", let the skill handle the rest
|
|
94
|
-
|
|
95
|
-
You can combine these. A wave can have 3 execute agents + 1 verification agent. Or 2 divergent explorers. Whatever the situation calls for.
|
|
106
|
+
Example: 1 agent per wave with task type "review", let the skill handle the rest`}
|
|
96
107
|
|
|
97
|
-
For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-
|
|
108
|
+
For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-n-{topic}.md or .claude-overnight/latest/verifications/wave-n-{topic}.md.
|
|
98
109
|
|
|
99
110
|
IMPORTANT: You cannot declare "done" unless at least one verification has confirmed the app works. If you're considering done but haven't verified, compose a verification task first.
|
|
100
111
|
|
|
101
112
|
Respond with ONLY a JSON object (no markdown fences):
|
|
102
|
-
{
|
|
103
|
-
"done": false,
|
|
104
|
-
"reasoning": "your assessment and why you chose this wave composition",
|
|
105
|
-
"goalUpdate": "optional -- refine what 'amazing' means as you learn more",
|
|
106
|
-
"statusUpdate": "REQUIRED -- concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
|
|
107
|
-
"estimatedSessionsRemaining": 15,
|
|
108
|
-
"tasks": [
|
|
109
|
-
{"prompt": "task instruction...", "model": "worker", "postcondition": "test -f src/new-file.ts"},
|
|
110
|
-
{"prompt": "quick icon fix, verified by next wave's workers...", "model": "fast"},
|
|
111
|
-
{"prompt": "verify the app end-to-end...", "model": "worker", "noWorktree": true}
|
|
112
|
-
]
|
|
113
|
-
}
|
|
113
|
+
{"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"tasks":[{"prompt":"...","model":"worker|fast","noWorktree":true/false,"postcondition":"..."}]}
|
|
114
114
|
|
|
115
115
|
"estimatedSessionsRemaining" is REQUIRED. Your best honest estimate of how many MORE agent sessions (beyond the wave you just composed above) are needed to reach 'amazing' -- include follow-up fixes, polish, verification, and anything else you'd want before shipping. Be realistic, not optimistic. Use 0 only if truly done.
|
|
116
116
|
|
|
117
|
-
The "model" field on each task —
|
|
117
|
+
The "model" field on each task — two kinds of workers. Pick the right one:
|
|
118
118
|
|
|
119
|
-
**Fast worker — "fast" (${fastModel ?? "not set"})**
|
|
120
|
-
- Single-file edits, refactors, renames
|
|
121
|
-
- Surgical multi-line changes with a clear spec (add a param, wrap a call, tweak a prompt line)
|
|
122
|
-
- Read/research: scan files, summarize findings
|
|
123
|
-
- Build checks, postcondition verification
|
|
124
|
-
- E2E test runs with concrete steps
|
|
125
|
-
- Simple critiques, polish tweaks
|
|
126
|
-
- Running existing scripts/tests and capturing output
|
|
127
|
-
- Docs / markdown updates
|
|
128
|
-
- Stdlib-only utility scripts with a crisp spec
|
|
119
|
+
**Fast worker — "fast" (${fastModel ?? "not set"})** for well-scoped, mechanical tasks: single-file edits, refactors, renames, read/research, build checks, simple critiques, docs updates.
|
|
129
120
|
|
|
130
|
-
**Main worker — "worker" (${workerModel})**
|
|
121
|
+
**Main worker — "worker" (${workerModel})** for tasks that need deeper reasoning: multi-file features, complex logic, architectural changes, ambiguous specs.
|
|
131
122
|
|
|
132
|
-
When in doubt, pick "fast".
|
|
123
|
+
When in doubt, pick "fast".
|
|
133
124
|
|
|
134
|
-
Set "noWorktree": true for verify/user-test tasks
|
|
125
|
+
Set "noWorktree": true for verify/user-test tasks.
|
|
135
126
|
|
|
136
|
-
OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done.
|
|
127
|
+
OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. Keep it cheap. Omit for exploratory tasks.
|
|
137
128
|
|
|
138
|
-
If done: {"done":
|
|
129
|
+
If done: {"done":true,"reasoning":"...","statusUpdate":"...","estimatedSessionsRemaining":0,"tasks":[]}`;
|
|
130
|
+
// ── Hard 6 KB budget: trim non-critical blocks if over limit ──
|
|
131
|
+
let trimmed = 0;
|
|
132
|
+
if (prompt.length > PROMPT_BUDGET) {
|
|
133
|
+
// 1. Keep last 2 waves instead of 3
|
|
134
|
+
recentText = buildRecentText(history, 2);
|
|
135
|
+
prompt = prompt.replace(`Recent waves:\n${buildRecentText(history, 3)}`, `Recent waves:\n${recentText}`);
|
|
136
|
+
trimmed++;
|
|
137
|
+
}
|
|
138
|
+
if (prompt.length > PROMPT_BUDGET && runMemory?.milestones) {
|
|
139
|
+
const old = `\nMilestone snapshots:\n${cap(runMemory.milestones, 2000)}\n`;
|
|
140
|
+
const neu = `\nMilestone snapshots:\n${cap(runMemory.milestones, 1000)}\n`;
|
|
141
|
+
if (old !== neu) {
|
|
142
|
+
prompt = prompt.replace(old, neu);
|
|
143
|
+
trimmed++;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
if (prompt.length > PROMPT_BUDGET && runMemory?.designs) {
|
|
147
|
+
const old = `\nArchitectural research:\n${cap(runMemory.designs, 1500)}\n`;
|
|
148
|
+
const neu = `\nArchitectural research:\n${cap(runMemory.designs, 1000)}\n`;
|
|
149
|
+
if (old !== neu) {
|
|
150
|
+
prompt = prompt.replace(old, neu);
|
|
151
|
+
trimmed++;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (prompt.length > PROMPT_BUDGET && runMemory?.reflections) {
|
|
155
|
+
const old = `\nLatest quality reports:\n${cap(runMemory.reflections, 1000)}\n`;
|
|
156
|
+
const neu = `\nLatest quality reports:\n${cap(runMemory.reflections, 500)}\n`;
|
|
157
|
+
if (old !== neu) {
|
|
158
|
+
prompt = prompt.replace(old, neu);
|
|
159
|
+
trimmed++;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
if (prompt.length > PROMPT_BUDGET && runMemory?.verifications) {
|
|
163
|
+
const old = `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 1000)}\n`;
|
|
164
|
+
const neu = `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 500)}\n`;
|
|
165
|
+
if (old !== neu) {
|
|
166
|
+
prompt = prompt.replace(old, neu);
|
|
167
|
+
trimmed++;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (prompt.length > PROMPT_BUDGET && runMemory?.previousRuns) {
|
|
171
|
+
const old = `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 800)}\n`;
|
|
172
|
+
const neu = `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 400)}\n`;
|
|
173
|
+
if (old !== neu) {
|
|
174
|
+
prompt = prompt.replace(old, neu);
|
|
175
|
+
trimmed++;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (trimmed > 0) {
|
|
179
|
+
onLog(`Steering prompt trimmed ${trimmed} blocks (${prompt.length}/${PROMPT_BUDGET} chars)`, "event");
|
|
180
|
+
}
|
|
181
|
+
// ── Non-Claude planner JSON hardening ──
|
|
182
|
+
if (!/^claude/i.test(plannerModel)) {
|
|
183
|
+
const directive = `OUTPUT: single JSON object. No prose. No markdown fences.`;
|
|
184
|
+
prompt = `${directive}\n\n${prompt}\n\n${directive}`;
|
|
185
|
+
}
|
|
139
186
|
onLog("Assessing...", "status");
|
|
140
187
|
onLog(`Reading codebase -- wave ${history.length + 1}`, "event");
|
|
141
188
|
const turn = createTurn("steer", `Steer wave ${history.length + 1}`, `steer-${history.length}`, plannerModel);
|
|
@@ -146,11 +193,34 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSes
|
|
|
146
193
|
if (first)
|
|
147
194
|
return first;
|
|
148
195
|
onLog(`Steering parse failed (${resultText.length} chars). Asking model to fix...`, "event");
|
|
196
|
+
// C2: persist raw output on parse failure
|
|
197
|
+
const steerDir = getTranscriptRunDir() ? join(getTranscriptRunDir(), "steering") : undefined;
|
|
198
|
+
if (steerDir) {
|
|
199
|
+
try {
|
|
200
|
+
mkdirSync(steerDir, { recursive: true });
|
|
201
|
+
}
|
|
202
|
+
catch { }
|
|
203
|
+
// Extract wave info from transcriptName (e.g. "steer-wave-32-attempt-1")
|
|
204
|
+
const waveMatch = transcriptName.match(/wave-(\d+)-attempt-(\d+)/);
|
|
205
|
+
if (waveMatch) {
|
|
206
|
+
writeFileSync(join(steerDir, `wave-${waveMatch[1]}-attempt-${waveMatch[2]}-raw.txt`), resultText, "utf-8");
|
|
207
|
+
}
|
|
208
|
+
}
|
|
149
209
|
const snippet = resultText.length > 2000 ? resultText.slice(0, 1000) + "\n...\n" + resultText.slice(-800) : resultText;
|
|
150
210
|
const retryText = await runPlannerQuery(`Your previous steering response could not be parsed as JSON. Here is what you returned:\n\n---\n${snippet}\n---\n\nExtract or rewrite the above as ONLY a valid JSON object with this schema: {"done":boolean,"reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}\n\nRespond with ONLY the JSON, no markdown fences, no explanation.`, { cwd, model: plannerModel, permissionMode, outputFormat: STEER_SCHEMA, transcriptName: `${transcriptName}-retry`, turnId: turn.id }, onLog);
|
|
151
211
|
const retryParsed = attemptJsonParse(retryText);
|
|
152
212
|
if (retryParsed)
|
|
153
213
|
return retryParsed;
|
|
214
|
+
// C2: persist retry raw output
|
|
215
|
+
if (steerDir) {
|
|
216
|
+
try {
|
|
217
|
+
const waveMatch2 = transcriptName.match(/wave-(\d+)-attempt-(\d+)/);
|
|
218
|
+
if (waveMatch2) {
|
|
219
|
+
writeFileSync(join(steerDir, `wave-${waveMatch2[1]}-attempt-${waveMatch2[2]}-retry-raw.txt`), retryText, "utf-8");
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
catch { }
|
|
223
|
+
}
|
|
154
224
|
throw new Error(`Could not parse steering response after retry (${resultText.length} chars: ${resultText.slice(0, 120)}...)`);
|
|
155
225
|
})();
|
|
156
226
|
const isDone = parsed.done === true;
|
package/dist/transcripts.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare function setTranscriptRunDir(dir: string | undefined): void;
|
|
2
2
|
export declare function getTranscriptRunDir(): string | undefined;
|
|
3
3
|
export declare function transcriptPath(name: string): string | undefined;
|
|
4
|
-
/** Append a single event;
|
|
4
|
+
/** Append a single event; log to stderr once per name on failure (C5). */
|
|
5
5
|
export declare function writeTranscriptEvent(name: string, event: Record<string, unknown>): void;
|
package/dist/transcripts.js
CHANGED
|
@@ -25,7 +25,9 @@ export function getTranscriptRunDir() {
|
|
|
25
25
|
export function transcriptPath(name) {
|
|
26
26
|
return _runDir ? join(_runDir, "transcripts", `${name}.ndjson`) : undefined;
|
|
27
27
|
}
|
|
28
|
-
/**
|
|
28
|
+
/** Names that already errored — guard against repeated stderr spam. */
|
|
29
|
+
const _seenErrors = new Set();
|
|
30
|
+
/** Append a single event; log to stderr once per name on failure (C5). */
|
|
29
31
|
export function writeTranscriptEvent(name, event) {
|
|
30
32
|
const path = transcriptPath(name);
|
|
31
33
|
if (!path)
|
|
@@ -34,5 +36,11 @@ export function writeTranscriptEvent(name, event) {
|
|
|
34
36
|
mkdirSync(dirname(path), { recursive: true });
|
|
35
37
|
appendFileSync(path, JSON.stringify({ t: Date.now(), ...event }) + "\n", "utf-8");
|
|
36
38
|
}
|
|
37
|
-
catch {
|
|
39
|
+
catch (err) {
|
|
40
|
+
if (!_seenErrors.has(name)) {
|
|
41
|
+
_seenErrors.add(name);
|
|
42
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
43
|
+
process.stderr.write(`[transcript] writeTranscriptEvent("${name}") failed: ${msg}\n`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
38
46
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -156,9 +156,10 @@ export type MergeStrategy = "yolo" | "branch";
|
|
|
156
156
|
export interface BranchRecord {
|
|
157
157
|
branch: string;
|
|
158
158
|
taskPrompt: string;
|
|
159
|
-
status: "merged" | "unmerged" | "failed" | "merge-failed";
|
|
159
|
+
status: "merged" | "unmerged" | "failed" | "merge-failed" | "discarded";
|
|
160
160
|
filesChanged: number;
|
|
161
161
|
costUsd: number;
|
|
162
|
+
firstFailedWave?: number;
|
|
162
163
|
}
|
|
163
164
|
/** Per-window rate limit snapshot (matches SDK rateLimitType). */
|
|
164
165
|
export interface RateLimitWindow {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.25.
|
|
3
|
+
"version": "1.25.43",
|
|
4
4
|
"description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.25.
|
|
3
|
+
"version": "1.25.43",
|
|
4
4
|
"description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Francesco Fornace"
|