claude-overnight 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/dist/index.js +131 -82
- package/dist/planner.d.ts +3 -3
- package/dist/planner.js +85 -77
- package/dist/swarm.d.ts +3 -0
- package/dist/swarm.js +2 -0
- package/dist/types.d.ts +3 -3
- package/dist/ui.js +28 -9
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -49,6 +49,7 @@ claude-overnight
|
|
|
49
49
|
◆ Thinking: 5 agents exploring... ← architects analyze your codebase
|
|
50
50
|
◆ Orchestrating plan... ← synthesizes 50 concrete tasks
|
|
51
51
|
◆ Wave 1 · 50 tasks · $4.20 spent ← fully autonomous from here
|
|
52
|
+
↑ 1.2M in ↓ 340K out $4.20 / $4.24 total
|
|
52
53
|
◆ Assessing... how close to amazing?
|
|
53
54
|
◆ Wave 2 · 30 tasks · $18.50 spent ← improvements from assessment
|
|
54
55
|
◆ Reflection: 2 agents reviewing ← deep quality audit
|
|
@@ -223,6 +224,8 @@ Changes take effect between waves — active agents finish their current task.
|
|
|
223
224
|
|
|
224
225
|
The usage bar cycles through all rate limit windows (5h, 7d, etc.) every 3 seconds, showing utilization per window. Usage info is shown during all phases — thinking, orchestration, steering, and execution.
|
|
225
226
|
|
|
227
|
+
When using extra usage with a budget, a dedicated progress bar shows spend vs limit with color-coded fill (magenta → yellow → red).
|
|
228
|
+
|
|
226
229
|
## Rate limits
|
|
227
230
|
|
|
228
231
|
Built for unattended runs lasting hours or days.
|
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@ import { createInterface } from "readline";
|
|
|
7
7
|
import chalk from "chalk";
|
|
8
8
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
9
9
|
import { Swarm } from "./swarm.js";
|
|
10
|
-
import { planTasks, refinePlan, detectModelTier, steerWave, identifyThemes, buildThinkingTasks,
|
|
10
|
+
import { planTasks, refinePlan, detectModelTier, steerWave, identifyThemes, buildThinkingTasks, orchestrate } from "./planner.js";
|
|
11
11
|
import { startRenderLoop, renderSummary } from "./ui.js";
|
|
12
12
|
// ── CLI flag parsing ──
|
|
13
13
|
function parseCliFlags(argv) {
|
|
@@ -295,6 +295,7 @@ function readRunMemory(runDir, previousRuns) {
|
|
|
295
295
|
return {
|
|
296
296
|
designs: readMdDir(join(runDir, "designs")),
|
|
297
297
|
reflections: readMdDir(join(runDir, "reflections")),
|
|
298
|
+
verifications: readMdDir(join(runDir, "verifications")),
|
|
298
299
|
milestones: readMdDir(join(runDir, "milestones")),
|
|
299
300
|
status,
|
|
300
301
|
goal,
|
|
@@ -385,6 +386,7 @@ function createRunDir(rootDir) {
|
|
|
385
386
|
const runDir = join(rootDir, "runs", ts);
|
|
386
387
|
mkdirSync(join(runDir, "designs"), { recursive: true });
|
|
387
388
|
mkdirSync(join(runDir, "reflections"), { recursive: true });
|
|
389
|
+
mkdirSync(join(runDir, "verifications"), { recursive: true });
|
|
388
390
|
mkdirSync(join(runDir, "milestones"), { recursive: true });
|
|
389
391
|
mkdirSync(join(runDir, "sessions"), { recursive: true });
|
|
390
392
|
return runDir;
|
|
@@ -408,6 +410,35 @@ function saveWaveSession(baseDir, waveNum, kind, swarm) {
|
|
|
408
410
|
totalCost: swarm.totalCostUsd,
|
|
409
411
|
}, null, 2), "utf-8");
|
|
410
412
|
}
|
|
413
|
+
/** Rebuild waveHistory from saved session files on resume. */
|
|
414
|
+
function loadWaveHistory(runDir) {
|
|
415
|
+
const dir = join(runDir, "sessions");
|
|
416
|
+
try {
|
|
417
|
+
return readdirSync(dir)
|
|
418
|
+
.filter(f => f.startsWith("wave-") && f.endsWith(".json"))
|
|
419
|
+
.sort((a, b) => {
|
|
420
|
+
const numA = parseInt(a.replace("wave-", "").replace(".json", ""));
|
|
421
|
+
const numB = parseInt(b.replace("wave-", "").replace(".json", ""));
|
|
422
|
+
return numA - numB;
|
|
423
|
+
})
|
|
424
|
+
.map(f => {
|
|
425
|
+
const data = JSON.parse(readFileSync(join(dir, f), "utf-8"));
|
|
426
|
+
return {
|
|
427
|
+
wave: data.wave,
|
|
428
|
+
kind: data.kind,
|
|
429
|
+
tasks: (data.agents || []).map((a) => ({
|
|
430
|
+
prompt: a.prompt,
|
|
431
|
+
status: a.status,
|
|
432
|
+
filesChanged: a.filesChanged,
|
|
433
|
+
error: a.error,
|
|
434
|
+
})),
|
|
435
|
+
};
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
catch {
|
|
439
|
+
return [];
|
|
440
|
+
}
|
|
441
|
+
}
|
|
411
442
|
function recordBranches(swarm, branches) {
|
|
412
443
|
for (const a of swarm.agents) {
|
|
413
444
|
if (a.branch) {
|
|
@@ -669,6 +700,11 @@ async function main() {
|
|
|
669
700
|
if (unmerged > 0) {
|
|
670
701
|
console.log("");
|
|
671
702
|
autoMergeBranches(cwd, prev.branches, (msg) => console.log(chalk.dim(` ${msg}`)));
|
|
703
|
+
// Persist merged branch statuses immediately so they survive a crash before next saveRunState
|
|
704
|
+
try {
|
|
705
|
+
saveRunState(incomplete.dir, prev);
|
|
706
|
+
}
|
|
707
|
+
catch { }
|
|
672
708
|
}
|
|
673
709
|
}
|
|
674
710
|
}
|
|
@@ -681,7 +717,18 @@ async function main() {
|
|
|
681
717
|
let usageCap;
|
|
682
718
|
let allowExtraUsage = false;
|
|
683
719
|
let extraUsageBudget;
|
|
684
|
-
if (
|
|
720
|
+
if (resuming) {
|
|
721
|
+
// Skip interactive flow entirely — all config is restored from saved state later
|
|
722
|
+
workerModel = resumeState.workerModel;
|
|
723
|
+
plannerModel = resumeState.plannerModel;
|
|
724
|
+
budget = resumeState.budget;
|
|
725
|
+
concurrency = resumeState.concurrency;
|
|
726
|
+
objective = resumeState.objective;
|
|
727
|
+
usageCap = resumeState.usageCap;
|
|
728
|
+
allowExtraUsage = resumeState.allowExtraUsage ?? false;
|
|
729
|
+
extraUsageBudget = resumeState.extraUsageBudget;
|
|
730
|
+
}
|
|
731
|
+
else if (!nonInteractive) {
|
|
685
732
|
// ① Objective
|
|
686
733
|
while (true) {
|
|
687
734
|
objective = await ask(`\n ${chalk.cyan("①")} ${chalk.bold("What should the agents do?")}\n ${chalk.cyan(">")} `);
|
|
@@ -817,11 +864,11 @@ async function main() {
|
|
|
817
864
|
}
|
|
818
865
|
}
|
|
819
866
|
validateConcurrency(concurrency);
|
|
820
|
-
|
|
821
|
-
|
|
867
|
+
let permissionMode = resuming ? resumeState.permissionMode : (fileCfg?.permissionMode ?? "auto");
|
|
868
|
+
let useWorktrees = resuming ? resumeState.useWorktrees : (fileCfg?.useWorktrees ?? isGitRepo(cwd));
|
|
822
869
|
if (useWorktrees)
|
|
823
870
|
validateGitRepo(cwd);
|
|
824
|
-
|
|
871
|
+
let mergeStrategy = resuming ? resumeState.mergeStrategy : (fileCfg?.mergeStrategy ?? "yolo");
|
|
825
872
|
if (nonInteractive) {
|
|
826
873
|
const capStr = usageCap != null ? ` cap=${Math.round(usageCap * 100)}%` : "";
|
|
827
874
|
const extraStr = allowExtraUsage ? (extraUsageBudget ? ` extra=$${extraUsageBudget}` : " extra=∞") : " extra=off";
|
|
@@ -838,7 +885,7 @@ async function main() {
|
|
|
838
885
|
const runDir = resuming && resumeRunDir ? resumeRunDir : (orphanedDir ?? createRunDir(rootDir));
|
|
839
886
|
const previousKnowledge = readPreviousRunKnowledge(rootDir);
|
|
840
887
|
// ── Plan phase (interactive: review loop, non-interactive: auto-plan or skip) ──
|
|
841
|
-
const needsPlan = tasks.length === 0;
|
|
888
|
+
const needsPlan = tasks.length === 0 && !resuming;
|
|
842
889
|
const designDir = join(runDir, "designs");
|
|
843
890
|
if (needsPlan) {
|
|
844
891
|
if (noTTY) {
|
|
@@ -1052,7 +1099,7 @@ async function main() {
|
|
|
1052
1099
|
process.exit(1);
|
|
1053
1100
|
}
|
|
1054
1101
|
}
|
|
1055
|
-
if (tasks.length === 0) {
|
|
1102
|
+
if (tasks.length === 0 && !resuming) {
|
|
1056
1103
|
console.error("No tasks provided.");
|
|
1057
1104
|
process.exit(1);
|
|
1058
1105
|
}
|
|
@@ -1064,7 +1111,7 @@ async function main() {
|
|
|
1064
1111
|
// ── Run (wave loop) ──
|
|
1065
1112
|
process.stdout.write("\x1B[?25l");
|
|
1066
1113
|
const restore = () => process.stdout.write("\x1B[?25h\n");
|
|
1067
|
-
const runStartedAt = Date.now();
|
|
1114
|
+
const runStartedAt = resuming && resumeState?.startedAt ? new Date(resumeState.startedAt).getTime() : Date.now();
|
|
1068
1115
|
// Wave-loop state — either fresh or resumed
|
|
1069
1116
|
mkdirSync(join(runDir, "reflections"), { recursive: true });
|
|
1070
1117
|
mkdirSync(join(runDir, "milestones"), { recursive: true });
|
|
@@ -1079,7 +1126,7 @@ async function main() {
|
|
|
1079
1126
|
let accIn = 0, accOut = 0;
|
|
1080
1127
|
let lastCapped = false, lastAborted = false, objectiveComplete = false;
|
|
1081
1128
|
let lastWaveKind;
|
|
1082
|
-
let
|
|
1129
|
+
let overheadBudgetUsed;
|
|
1083
1130
|
const branches = [];
|
|
1084
1131
|
if (resuming && resumeState) {
|
|
1085
1132
|
// Restore ALL config from saved state
|
|
@@ -1091,7 +1138,7 @@ async function main() {
|
|
|
1091
1138
|
accFailed = resumeState.accFailed;
|
|
1092
1139
|
accTools = 0;
|
|
1093
1140
|
lastWaveKind = resumeState.lastWaveKind;
|
|
1094
|
-
|
|
1141
|
+
overheadBudgetUsed = resumeState.overheadBudgetUsed ?? (resumeState.reflectionBudgetUsed ?? 0) + (resumeState.verificationBudgetUsed ?? 0);
|
|
1095
1142
|
branches.push(...resumeState.branches);
|
|
1096
1143
|
objective = resumeState.objective;
|
|
1097
1144
|
workerModel = resumeState.workerModel;
|
|
@@ -1102,7 +1149,12 @@ async function main() {
|
|
|
1102
1149
|
usageCap = resumeState.usageCap;
|
|
1103
1150
|
allowExtraUsage = resumeState.allowExtraUsage ?? false;
|
|
1104
1151
|
extraUsageBudget = resumeState.extraUsageBudget;
|
|
1105
|
-
|
|
1152
|
+
permissionMode = resumeState.permissionMode;
|
|
1153
|
+
useWorktrees = resumeState.useWorktrees;
|
|
1154
|
+
mergeStrategy = resumeState.mergeStrategy;
|
|
1155
|
+
// Restore wave history from saved session files so steerer has full context
|
|
1156
|
+
waveHistory.push(...loadWaveHistory(runDir));
|
|
1157
|
+
console.log(chalk.green(`\n ✓ Resumed`) + chalk.dim(` · wave ${waveNum + 1} · ${remaining} remaining · $${accCost.toFixed(2)} spent · ${waveHistory.length} prior waves\n`));
|
|
1106
1158
|
}
|
|
1107
1159
|
else {
|
|
1108
1160
|
// Fresh run
|
|
@@ -1121,15 +1173,15 @@ async function main() {
|
|
|
1121
1173
|
accIn = thinkingIn;
|
|
1122
1174
|
accOut = thinkingOut;
|
|
1123
1175
|
lastWaveKind = "execute";
|
|
1124
|
-
|
|
1176
|
+
overheadBudgetUsed = 0;
|
|
1125
1177
|
}
|
|
1126
1178
|
liveConfig.remaining = remaining;
|
|
1127
1179
|
liveConfig.usageCap = usageCap;
|
|
1128
|
-
const
|
|
1180
|
+
const maxOverheadBudget = Math.max(4, Math.ceil((budget ?? 10) * 0.15));
|
|
1129
1181
|
// For flex + branch strategy: create one target branch, waves merge via yolo into it
|
|
1130
1182
|
let runBranch;
|
|
1131
1183
|
let originalRef;
|
|
1132
|
-
if (flex && mergeStrategy === "branch" && useWorktrees) {
|
|
1184
|
+
if (flex && mergeStrategy === "branch" && useWorktrees && !resuming) {
|
|
1133
1185
|
try {
|
|
1134
1186
|
originalRef = execSync("git rev-parse --abbrev-ref HEAD", { cwd, encoding: "utf-8", stdio: "pipe" }).trim();
|
|
1135
1187
|
if (originalRef === "HEAD")
|
|
@@ -1158,6 +1210,38 @@ async function main() {
|
|
|
1158
1210
|
process.on("SIGTERM", () => gracefulStop("SIGTERM"));
|
|
1159
1211
|
process.on("uncaughtException", (err) => { currentSwarm?.abort(); currentSwarm?.cleanup(); restore(); console.error(chalk.red(`\n Uncaught: ${err.message}`)); process.exit(1); });
|
|
1160
1212
|
process.on("unhandledRejection", (reason) => { currentSwarm?.abort(); currentSwarm?.cleanup(); restore(); console.error(chalk.red(`\n Unhandled: ${reason instanceof Error ? reason.message : reason}`)); process.exit(1); });
|
|
1213
|
+
// When resuming a flex run with no queued tasks, steer immediately to get the next wave
|
|
1214
|
+
if (resuming && flex && currentTasks.length === 0 && remaining > 0) {
|
|
1215
|
+
console.log(chalk.cyan(`\n ◆ Assessing...\n`));
|
|
1216
|
+
process.stdout.write("\x1B[?25l");
|
|
1217
|
+
try {
|
|
1218
|
+
const memory = readRunMemory(runDir, previousKnowledge || undefined);
|
|
1219
|
+
const steer = await steerWave(objective, waveHistory, remaining, cwd, plannerModel, workerModel, permissionMode, concurrency, makeProgressLog(), memory);
|
|
1220
|
+
process.stdout.write(`\x1B[2K\r`);
|
|
1221
|
+
process.stdout.write("\x1B[?25h");
|
|
1222
|
+
if (steer.statusUpdate)
|
|
1223
|
+
writeStatus(runDir, steer.statusUpdate);
|
|
1224
|
+
if (steer.goalUpdate)
|
|
1225
|
+
writeGoalUpdate(runDir, steer.goalUpdate);
|
|
1226
|
+
if (!steer.done && steer.tasks.length > 0) {
|
|
1227
|
+
console.log(chalk.dim(` ${steer.reasoning}\n`));
|
|
1228
|
+
currentTasks = steer.tasks.map(t => ({
|
|
1229
|
+
...t,
|
|
1230
|
+
model: t.model === "planner" ? plannerModel : t.model === "worker" ? workerModel : t.model,
|
|
1231
|
+
}));
|
|
1232
|
+
lastWaveKind = steer.waveKind;
|
|
1233
|
+
}
|
|
1234
|
+
else if (steer.done) {
|
|
1235
|
+
console.log(chalk.green(` \u2713 ${steer.reasoning}\n`));
|
|
1236
|
+
objectiveComplete = true;
|
|
1237
|
+
remaining = 0;
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
catch (err) {
|
|
1241
|
+
process.stdout.write("\x1B[?25h");
|
|
1242
|
+
console.log(chalk.yellow(` Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1161
1245
|
while (remaining > 0 && currentTasks.length > 0 && !stopping) {
|
|
1162
1246
|
if (currentTasks.length > remaining)
|
|
1163
1247
|
currentTasks = currentTasks.slice(0, remaining);
|
|
@@ -1168,6 +1252,7 @@ async function main() {
|
|
|
1168
1252
|
const swarm = new Swarm({
|
|
1169
1253
|
tasks: currentTasks, concurrency, cwd, model: workerModel, permissionMode, allowedTools,
|
|
1170
1254
|
useWorktrees, mergeStrategy: waveMerge, agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
|
|
1255
|
+
baseCostUsd: accCost,
|
|
1171
1256
|
});
|
|
1172
1257
|
currentSwarm = swarm;
|
|
1173
1258
|
const stopRender = startRenderLoop(swarm, liveConfig);
|
|
@@ -1209,8 +1294,8 @@ async function main() {
|
|
|
1209
1294
|
saveRunState(runDir, {
|
|
1210
1295
|
id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective, budget: budget ?? tasks.length,
|
|
1211
1296
|
remaining, workerModel, plannerModel, concurrency, permissionMode,
|
|
1212
|
-
usageCap, allowExtraUsage, extraUsageBudget, flex, useWorktrees, mergeStrategy, waveNum, currentTasks,
|
|
1213
|
-
lastWaveKind,
|
|
1297
|
+
usageCap, allowExtraUsage, extraUsageBudget, flex, useWorktrees, mergeStrategy, waveNum, currentTasks: [],
|
|
1298
|
+
lastWaveKind, overheadBudgetUsed, accCost, accCompleted, accFailed,
|
|
1214
1299
|
branches, phase: "steering", startedAt: new Date(runStartedAt).toISOString(), cwd,
|
|
1215
1300
|
});
|
|
1216
1301
|
waveHistory.push({
|
|
@@ -1225,11 +1310,10 @@ async function main() {
|
|
|
1225
1310
|
});
|
|
1226
1311
|
if (!flex || remaining <= 0 || swarm.aborted || swarm.cappedOut)
|
|
1227
1312
|
break;
|
|
1228
|
-
// ── Steer: assess
|
|
1229
|
-
|
|
1230
|
-
let steerDone = false;
|
|
1313
|
+
// ── Steer: assess and compose the next wave ──
|
|
1314
|
+
let steered = false;
|
|
1231
1315
|
let steerAttempts = 0;
|
|
1232
|
-
while (!
|
|
1316
|
+
while (!steered && remaining > 0 && !stopping && steerAttempts < 3) {
|
|
1233
1317
|
steerAttempts++;
|
|
1234
1318
|
console.log(chalk.cyan(`\n ◆ Assessing...\n`));
|
|
1235
1319
|
process.stdout.write("\x1B[?25l");
|
|
@@ -1238,86 +1322,51 @@ async function main() {
|
|
|
1238
1322
|
const steer = await steerWave(objective, waveHistory, remaining, cwd, plannerModel, workerModel, permissionMode, concurrency, makeProgressLog(), memory);
|
|
1239
1323
|
process.stdout.write(`\x1B[2K\r`);
|
|
1240
1324
|
process.stdout.write("\x1B[?25h");
|
|
1241
|
-
// Persist context layers
|
|
1242
1325
|
if (steer.statusUpdate)
|
|
1243
1326
|
writeStatus(runDir, steer.statusUpdate);
|
|
1244
1327
|
if (steer.goalUpdate) {
|
|
1245
1328
|
writeGoalUpdate(runDir, steer.goalUpdate);
|
|
1246
1329
|
console.log(chalk.dim(` Goal refined: ${steer.goalUpdate.slice(0, 100)}\n`));
|
|
1247
1330
|
}
|
|
1248
|
-
// Archive milestone every ~5 execution waves
|
|
1249
1331
|
const execWaves = waveHistory.filter(w => w.kind === "execute").length;
|
|
1250
1332
|
if (execWaves > 0 && execWaves % 5 === 0)
|
|
1251
1333
|
archiveMilestone(runDir, waveNum);
|
|
1252
|
-
if (steer.done || steer.
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
objectiveComplete = true;
|
|
1256
|
-
remaining = 0; // exit outer loop too
|
|
1257
|
-
break;
|
|
1258
|
-
}
|
|
1259
|
-
if (steer.action === "reflect") {
|
|
1260
|
-
// Safety: no consecutive reflections, budget cap
|
|
1261
|
-
const canReflect = lastWaveKind !== "reflect" && reflectionBudgetUsed + 2 <= maxReflectionBudget;
|
|
1262
|
-
if (!canReflect) {
|
|
1334
|
+
if (steer.done || steer.tasks.length === 0) {
|
|
1335
|
+
const hasVerification = waveHistory.some(w => w.kind.includes("verif"));
|
|
1336
|
+
if (!hasVerification && remaining >= 1) {
|
|
1263
1337
|
console.log(chalk.dim(` ${steer.reasoning}`));
|
|
1264
|
-
console.log(chalk.yellow(`
|
|
1265
|
-
lastWaveKind = "
|
|
1266
|
-
continue; // re-steer
|
|
1338
|
+
console.log(chalk.yellow(` Done blocked — verification required before completion\n`));
|
|
1339
|
+
lastWaveKind = "done-blocked";
|
|
1340
|
+
continue; // re-steer — steerer will see the hint
|
|
1267
1341
|
}
|
|
1268
|
-
// Run reflection wave
|
|
1269
|
-
console.log(chalk.dim(` ${steer.reasoning}`));
|
|
1270
|
-
console.log(chalk.cyan(`\n ◆ Reflection: 2 agents reviewing...\n`));
|
|
1271
|
-
const reflectionDir = join(runDir, "reflections");
|
|
1272
|
-
waveNum++;
|
|
1273
|
-
const reflTasks = buildReflectionTasks(objective, memory.goal, reflectionDir, waveNum, plannerModel);
|
|
1274
|
-
const reflSwarm = new Swarm({
|
|
1275
|
-
tasks: reflTasks, concurrency: 2, cwd,
|
|
1276
|
-
model: plannerModel, permissionMode,
|
|
1277
|
-
useWorktrees: false, mergeStrategy: "yolo",
|
|
1278
|
-
agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
|
|
1279
|
-
});
|
|
1280
|
-
currentSwarm = reflSwarm;
|
|
1281
|
-
const stopReflRender = startRenderLoop(reflSwarm, liveConfig);
|
|
1282
|
-
try {
|
|
1283
|
-
await reflSwarm.run();
|
|
1284
|
-
}
|
|
1285
|
-
finally {
|
|
1286
|
-
stopReflRender();
|
|
1287
|
-
}
|
|
1288
|
-
console.log(renderSummary(reflSwarm));
|
|
1289
|
-
accCost += reflSwarm.totalCostUsd;
|
|
1290
|
-
accIn += reflSwarm.totalInputTokens;
|
|
1291
|
-
accOut += reflSwarm.totalOutputTokens;
|
|
1292
|
-
accCompleted += reflSwarm.completed;
|
|
1293
|
-
accFailed += reflSwarm.failed;
|
|
1294
|
-
accTools += reflSwarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
|
|
1295
|
-
remaining -= reflSwarm.completed + reflSwarm.failed;
|
|
1296
|
-
reflectionBudgetUsed += reflSwarm.completed + reflSwarm.failed;
|
|
1297
|
-
waveHistory.push({
|
|
1298
|
-
wave: waveNum,
|
|
1299
|
-
kind: "reflect",
|
|
1300
|
-
tasks: reflSwarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, filesChanged: a.filesChanged, error: a.error })),
|
|
1301
|
-
});
|
|
1302
|
-
lastWaveKind = "reflect";
|
|
1303
|
-
continue; // re-steer with reflection artifacts
|
|
1304
|
-
}
|
|
1305
|
-
// action === "execute"
|
|
1306
|
-
if (steer.tasks.length === 0) {
|
|
1307
1342
|
console.log(chalk.green(` \u2713 ${steer.reasoning}\n`));
|
|
1308
1343
|
objectiveComplete = true;
|
|
1309
1344
|
remaining = 0;
|
|
1310
1345
|
break;
|
|
1311
1346
|
}
|
|
1347
|
+
const isOverhead = steer.waveKind !== "execute";
|
|
1348
|
+
if (isOverhead && overheadBudgetUsed + steer.tasks.length > maxOverheadBudget) {
|
|
1349
|
+
console.log(chalk.dim(` ${steer.reasoning}`));
|
|
1350
|
+
console.log(chalk.yellow(` Overhead budget exhausted (${overheadBudgetUsed}/${maxOverheadBudget}) — re-assessing\n`));
|
|
1351
|
+
lastWaveKind = "overhead-capped";
|
|
1352
|
+
continue; // re-steer
|
|
1353
|
+
}
|
|
1312
1354
|
console.log(chalk.dim(` ${steer.reasoning}\n`));
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1355
|
+
// Resolve model aliases: "planner" → plannerModel, "worker" → workerModel
|
|
1356
|
+
currentTasks = steer.tasks.map(t => ({
|
|
1357
|
+
...t,
|
|
1358
|
+
model: t.model === "planner" ? plannerModel : t.model === "worker" ? workerModel
|
|
1359
|
+
: isOverhead && !t.model ? plannerModel : t.model,
|
|
1360
|
+
}));
|
|
1361
|
+
lastWaveKind = steer.waveKind;
|
|
1362
|
+
if (isOverhead)
|
|
1363
|
+
overheadBudgetUsed += currentTasks.length;
|
|
1364
|
+
steered = true;
|
|
1316
1365
|
}
|
|
1317
1366
|
catch (err) {
|
|
1318
1367
|
process.stdout.write("\x1B[?25h");
|
|
1319
1368
|
console.log(chalk.yellow(` Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
|
|
1320
|
-
remaining
|
|
1369
|
+
// Don't zero out remaining — preserve unspent budget so resume works
|
|
1321
1370
|
break;
|
|
1322
1371
|
}
|
|
1323
1372
|
}
|
|
@@ -1330,7 +1379,7 @@ async function main() {
|
|
|
1330
1379
|
id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: budget ?? tasks.length,
|
|
1331
1380
|
remaining, workerModel, plannerModel, concurrency, permissionMode,
|
|
1332
1381
|
usageCap, allowExtraUsage, extraUsageBudget, flex, useWorktrees, mergeStrategy, waveNum, currentTasks: [],
|
|
1333
|
-
lastWaveKind,
|
|
1382
|
+
lastWaveKind, overheadBudgetUsed, accCost, accCompleted, accFailed,
|
|
1334
1383
|
branches, phase: finalPhase, startedAt: new Date(runStartedAt).toISOString(), cwd,
|
|
1335
1384
|
});
|
|
1336
1385
|
if (trulyDone) {
|
|
@@ -1364,8 +1413,8 @@ async function main() {
|
|
|
1364
1413
|
boxLines.push(`${elapsedStr} · ${fmtTokens(accIn)} in / ${fmtTokens(accOut)} out · ${accTools} tools`);
|
|
1365
1414
|
if (totalMerged > 0 || totalConflicts > 0)
|
|
1366
1415
|
boxLines.push(`${totalMerged} merged${totalConflicts > 0 ? ` · ${totalConflicts} conflicts` : ""}`);
|
|
1367
|
-
if (
|
|
1368
|
-
boxLines.push(`${
|
|
1416
|
+
if (overheadBudgetUsed > 0)
|
|
1417
|
+
boxLines.push(`${overheadBudgetUsed} overhead agents (review/verify/explore)`);
|
|
1369
1418
|
if (lastCapped)
|
|
1370
1419
|
boxLines.push(chalk.yellow(`Capped at ${usageCap != null ? Math.round(usageCap * 100) : 100}%`));
|
|
1371
1420
|
const boxW = Math.max(...boxLines.map(l => l.replace(/\x1B\[[0-9;]*m/g, "").length)) + 4;
|
package/dist/planner.d.ts
CHANGED
|
@@ -10,7 +10,7 @@ export interface PlannerRateLimitInfo {
|
|
|
10
10
|
}
|
|
11
11
|
export interface WaveSummary {
|
|
12
12
|
wave: number;
|
|
13
|
-
kind:
|
|
13
|
+
kind: string;
|
|
14
14
|
tasks: {
|
|
15
15
|
prompt: string;
|
|
16
16
|
status: string;
|
|
@@ -20,15 +20,16 @@ export interface WaveSummary {
|
|
|
20
20
|
}
|
|
21
21
|
export interface SteerResult {
|
|
22
22
|
done: boolean;
|
|
23
|
-
action: "execute" | "reflect" | "done";
|
|
24
23
|
tasks: Task[];
|
|
25
24
|
reasoning: string;
|
|
25
|
+
waveKind: string;
|
|
26
26
|
goalUpdate?: string;
|
|
27
27
|
statusUpdate?: string;
|
|
28
28
|
}
|
|
29
29
|
export interface RunMemory {
|
|
30
30
|
designs: string;
|
|
31
31
|
reflections: string;
|
|
32
|
+
verifications: string;
|
|
32
33
|
milestones: string;
|
|
33
34
|
status: string;
|
|
34
35
|
goal: string;
|
|
@@ -40,7 +41,6 @@ export declare function getPlannerRateLimitInfo(): PlannerRateLimitInfo;
|
|
|
40
41
|
export declare function planTasks(objective: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string): Promise<Task[]>;
|
|
41
42
|
export declare function identifyThemes(objective: string, count: number, model: string, permissionMode: PermMode, onLog?: (text: string) => void): Promise<string[]>;
|
|
42
43
|
export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string, previousKnowledge?: string): Task[];
|
|
43
|
-
export declare function buildReflectionTasks(objective: string, goal: string, reflectionDir: string, waveNum: number, plannerModel: string): Task[];
|
|
44
44
|
export declare function orchestrate(objective: string, designDocs: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string): Promise<Task[]>;
|
|
45
45
|
export declare function refinePlan(objective: string, previousTasks: Task[], feedback: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void): Promise<Task[]>;
|
|
46
46
|
export declare function steerWave(objective: string, history: WaveSummary[], remainingBudget: number, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, concurrency: number, onLog: (text: string) => void, runMemory?: RunMemory): Promise<SteerResult>;
|
package/dist/planner.js
CHANGED
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
2
2
|
import { readFileSync } from "fs";
|
|
3
3
|
import { NudgeError } from "./types.js";
|
|
4
|
+
// The core framing for all planning. Not a checklist — a way of thinking.
|
|
5
|
+
const DESIGN_THINKING = `
|
|
6
|
+
HOW TO THINK ABOUT EVERY TASK:
|
|
7
|
+
|
|
8
|
+
Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply" — not "I need a currency conversion API." Every decision — what to build, how fast it responds, what happens on error — flows from the job.
|
|
9
|
+
|
|
10
|
+
The experience IS the product. A 200ms server response is not a "performance metric" — it's the difference between an app that feels alive and one that feels broken. A loading state is not "polish" — it's the user knowing the app heard them. An error message is not "error handling" — it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render — they're all one experience the user either trusts or doesn't.
|
|
11
|
+
|
|
12
|
+
Build the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.
|
|
13
|
+
|
|
14
|
+
Consistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.
|
|
15
|
+
`;
|
|
4
16
|
const NUDGE_MS = 15 * 60 * 1000; // 15 min — close & restart with "continue"
|
|
5
17
|
const HARD_TIMEOUT_MS = 30 * 60 * 1000; // 30 min — give up
|
|
6
18
|
export function detectModelTier(model) {
|
|
@@ -412,17 +424,20 @@ export function buildThinkingTasks(objective, themes, designDir, plannerModel, p
|
|
|
412
424
|
OVERALL OBJECTIVE: ${objective}
|
|
413
425
|
${prevBlock}
|
|
414
426
|
YOUR FOCUS: ${theme}
|
|
415
|
-
|
|
427
|
+
${DESIGN_THINKING}
|
|
416
428
|
Explore the codebase thoroughly using Read, Glob, and Grep. Then write a design document to ${designDir}/focus-${i}.md with these sections:
|
|
417
429
|
|
|
418
430
|
## Findings
|
|
419
431
|
Key files, patterns, and architecture you discovered. Cite specific file paths and function names.
|
|
420
432
|
|
|
433
|
+
## The Job
|
|
434
|
+
What is someone hiring this product to do? Not the feature — the outcome. Frame everything below through this lens.
|
|
435
|
+
|
|
421
436
|
## Proposed Work Items
|
|
422
437
|
For each item:
|
|
423
438
|
- **What**: What to build or change
|
|
424
439
|
- **Where**: Specific file paths
|
|
425
|
-
- **Why**:
|
|
440
|
+
- **Why**: How this serves the job — including how fast it needs to respond and what happens when it fails
|
|
426
441
|
- **Risk**: Conflicts or complications
|
|
427
442
|
|
|
428
443
|
## Key Files
|
|
@@ -432,44 +447,6 @@ Be thorough — your findings drive the execution plan.`,
|
|
|
432
447
|
model: plannerModel,
|
|
433
448
|
}));
|
|
434
449
|
}
|
|
435
|
-
export function buildReflectionTasks(objective, goal, reflectionDir, waveNum, plannerModel) {
|
|
436
|
-
const goalBlock = goal ? `\nEVOLVED GOAL:\n${goal}\n` : "";
|
|
437
|
-
return [
|
|
438
|
-
{
|
|
439
|
-
id: "review-0",
|
|
440
|
-
prompt: `You are a senior code reviewer performing a deep quality audit.
|
|
441
|
-
|
|
442
|
-
OBJECTIVE: ${objective}
|
|
443
|
-
${goalBlock}
|
|
444
|
-
Read the codebase thoroughly. Assess:
|
|
445
|
-
- **Correctness**: Bugs, missing error handling, broken flows?
|
|
446
|
-
- **Architecture**: Clean design? Unnecessary or missing abstractions?
|
|
447
|
-
- **Code quality**: Readability, naming, duplication, dead code?
|
|
448
|
-
- **Completeness**: What's missing vs. the objective? Half-done work?
|
|
449
|
-
- **Polish**: Edge cases, error messages, loading states?
|
|
450
|
-
|
|
451
|
-
Write findings to ${reflectionDir}/wave-${waveNum}-quality.md.
|
|
452
|
-
End with a ## Verdict: is this closer to "good enough" or "amazing"? What would make the biggest difference?`,
|
|
453
|
-
model: plannerModel,
|
|
454
|
-
},
|
|
455
|
-
{
|
|
456
|
-
id: "review-1",
|
|
457
|
-
prompt: `You are a UX and integration reviewer.
|
|
458
|
-
|
|
459
|
-
OBJECTIVE: ${objective}
|
|
460
|
-
${goalBlock}
|
|
461
|
-
Read the codebase. Assess:
|
|
462
|
-
- **UX coherence**: Do user-facing flows make sense end-to-end? Consistent experience?
|
|
463
|
-
- **Integration**: Do pieces fit together? Seams, inconsistencies, broken contracts?
|
|
464
|
-
- **Testing**: Meaningful coverage? Testing the right things?
|
|
465
|
-
- **Gaps**: Unhandled use cases? What would surprise a user?
|
|
466
|
-
|
|
467
|
-
Write findings to ${reflectionDir}/wave-${waveNum}-ux.md.
|
|
468
|
-
End with ## Priorities: rank the top 3 things that would most improve the result.`,
|
|
469
|
-
model: plannerModel,
|
|
470
|
-
},
|
|
471
|
-
];
|
|
472
|
-
}
|
|
473
450
|
export async function orchestrate(objective, designDocs, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote, outFile) {
|
|
474
451
|
const capability = modelCapabilityBlock(workerModel);
|
|
475
452
|
const flexLine = flexNote ? `\n\n${flexNote}` : "";
|
|
@@ -483,7 +460,7 @@ Your architects explored the codebase and found:
|
|
|
483
460
|
${designDocs}
|
|
484
461
|
|
|
485
462
|
AGENT CAPABILITY: ${capability}
|
|
486
|
-
|
|
463
|
+
${DESIGN_THINKING}
|
|
487
464
|
Create exactly ~${budget} concrete execution tasks based on these findings.
|
|
488
465
|
|
|
489
466
|
Requirements:
|
|
@@ -492,7 +469,8 @@ Requirements:
|
|
|
492
469
|
- ${concurrency} agents run in parallel — tasks must touch DIFFERENT files
|
|
493
470
|
- Trust the research — don't tell agents to re-explore what's documented
|
|
494
471
|
- Reference specific files and patterns from the findings
|
|
495
|
-
-
|
|
472
|
+
- Build the core user job first, then expand. Each task should produce something complete and usable — not scaffolding for later
|
|
473
|
+
- There is no separate "polish" phase. Loading states, error handling, sub-200ms responses, and edge cases are part of every task${flexLine}
|
|
496
474
|
|
|
497
475
|
Respond with ONLY a JSON object (no markdown fences):
|
|
498
476
|
{"tasks": [{"prompt": "..."}]}${fileInstruction}`;
|
|
@@ -655,24 +633,25 @@ async function extractTaskJson(raw, retry, onLog, outFile) {
|
|
|
655
633
|
// ── Wave steering ──
|
|
656
634
|
export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog, runMemory) {
|
|
657
635
|
const capability = modelCapabilityBlock(workerModel);
|
|
658
|
-
// Three-layer context: status (current), milestones (strategic), recent waves (tactical)
|
|
659
636
|
const recentWaves = history.slice(-3);
|
|
660
637
|
const recentText = recentWaves.length > 0 ? recentWaves.map(w => {
|
|
661
|
-
const tag = w.kind === "reflect" ? " (reflection)" : w.kind === "think" ? " (thinking)" : "";
|
|
662
638
|
const lines = w.tasks.map(t => {
|
|
663
639
|
const files = t.filesChanged ? ` (${t.filesChanged} files)` : "";
|
|
664
640
|
const err = t.error ? ` — ${t.error}` : "";
|
|
665
641
|
return ` - [${t.status}] ${t.prompt.slice(0, 120)}${files}${err}`;
|
|
666
642
|
}).join("\n");
|
|
667
|
-
return `Wave ${w.wave + 1}${
|
|
643
|
+
return `Wave ${w.wave + 1} (${w.kind}):\n${lines}`;
|
|
668
644
|
}).join("\n\n") : "(first wave)";
|
|
669
|
-
const
|
|
670
|
-
const
|
|
645
|
+
const lastKind = history.length > 0 ? history[history.length - 1].kind : "";
|
|
646
|
+
const repeatHint = lastKind && lastKind !== "execute"
|
|
647
|
+
? `\nThe previous wave was "${lastKind}". Don't repeat the same wave kind unless you have a strong reason.\n`
|
|
648
|
+
: "";
|
|
671
649
|
const cap = (s, max) => s.length > max ? s.slice(0, max) + "\n...(truncated)" : s;
|
|
672
650
|
const statusBlock = runMemory?.status ? `\nCurrent project status:\n${runMemory.status}\n` : "";
|
|
673
651
|
const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 4000)}\n` : "";
|
|
674
652
|
const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 4000)}\n` : "";
|
|
675
653
|
const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 3000)}\n` : "";
|
|
654
|
+
const verificationBlock = runMemory?.verifications ? `\nVerification results (from actually running the app):\n${cap(runMemory.verifications, 3000)}\n` : "";
|
|
676
655
|
const goalBlock = runMemory?.goal ? `\nNorth star — what "amazing" means:\n${runMemory.goal}\n` : "";
|
|
677
656
|
const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 3000)}\n` : "";
|
|
678
657
|
const prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
|
|
@@ -681,38 +660,63 @@ Objective: ${objective}
|
|
|
681
660
|
${goalBlock}${statusBlock}${milestoneBlock}${prevRunBlock}
|
|
682
661
|
Recent waves:
|
|
683
662
|
${recentText}
|
|
684
|
-
${designBlock}${reflectionBlock}
|
|
663
|
+
${designBlock}${reflectionBlock}${verificationBlock}
|
|
685
664
|
Remaining budget: ${remainingBudget} agent sessions. ${concurrency} agents run in parallel — tasks must touch DIFFERENT files.
|
|
686
665
|
${capability}
|
|
666
|
+
${DESIGN_THINKING}
|
|
687
667
|
Total waves completed: ${history.length}
|
|
688
668
|
|
|
689
|
-
Read the codebase. Assess
|
|
669
|
+
Read the codebase. Assess from the user's chair: does this product do the job someone would hire it for? Does it feel fast, honest, and trustworthy? Not "is the code clean" — "would I use this?"
|
|
670
|
+
|
|
671
|
+
If verification found issues, those are the priority. Fix what's broken before building what's missing. Iterate on what exists before expanding scope.
|
|
672
|
+
|
|
673
|
+
## Compose the next wave
|
|
674
|
+
|
|
675
|
+
You have full creative freedom. Design the wave that will have the highest impact right now. Here are archetypes to draw from — mix, adapt, or invent your own:
|
|
690
676
|
|
|
691
|
-
|
|
677
|
+
**Execute** — Agents implement concrete changes in parallel. Each touches different files. The bread and butter.
|
|
678
|
+
Example: 5 agents each owning a different feature or fix
|
|
692
679
|
|
|
693
|
-
**
|
|
694
|
-
|
|
695
|
-
- You're unsure about quality and need expert eyes
|
|
696
|
-
- A subsystem just "completed" and deserves verification
|
|
680
|
+
**Explore** — Multiple agents independently tackle the same problem from different angles. Each writes a design/approach to a separate file. Use when you need creative alternatives before committing.
|
|
681
|
+
Example: 3 agents each design a different navigation approach, writing to designs/nav-{approach}.md
|
|
697
682
|
|
|
698
|
-
**
|
|
699
|
-
|
|
700
|
-
- There are clear gaps, bugs, or improvements to make
|
|
683
|
+
**Critique** — Agents review what exists as skeptical experts. They read the codebase and write findings to files. Use after substantial new code ships.
|
|
684
|
+
Example: 1 code quality reviewer, 1 UX reviewer examining flows end-to-end
|
|
701
685
|
|
|
702
|
-
**
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
686
|
+
**Synthesize** — An agent reads multiple alternatives or review findings and makes a decision. Writes the chosen approach or prioritized fix list.
|
|
687
|
+
Example: 1 agent reads 3 design docs and writes the implementation plan
|
|
688
|
+
|
|
689
|
+
**Verify** — Agents actually RUN the application: build it, start it, navigate it, click things, try edge cases. They report what works and what's broken. Not code reading — real testing.
|
|
690
|
+
Example: 1 agent does end-to-end QA, writing a report with reproduction steps
|
|
691
|
+
|
|
692
|
+
**User-test** — Agents emulate specific user personas interacting with the product. "First-time user who just downloaded this." "Power user trying to do X fast." They test from that perspective and report friction.
|
|
693
|
+
Example: 2 agents, one new user, one power user, each writing a report
|
|
694
|
+
|
|
695
|
+
**Polish** — Agents focus purely on feel: loading states, error messages, micro-interactions, empty states, responsiveness. Not features — the texture that makes users trust the product.
|
|
696
|
+
Example: 2 agents, one on happy paths, one on error/edge states
|
|
697
|
+
|
|
698
|
+
You can combine these. A wave can have 3 execute agents + 1 verification agent. Or 2 divergent explorers. Whatever the situation calls for.
|
|
699
|
+
|
|
700
|
+
For non-execute tasks (critique, verify, user-test, synthesize), tell agents to write their output to files in the run directory so findings persist for future waves. Use paths like: .claude-overnight/latest/reflections/wave-N-{topic}.md or .claude-overnight/latest/verifications/wave-N-{topic}.md.
|
|
701
|
+
|
|
702
|
+
IMPORTANT: You cannot declare "done" unless at least one verification wave has confirmed the app works. If you're considering done but haven't verified, compose a verification wave first.
|
|
703
|
+
${repeatHint}
|
|
707
704
|
Respond with ONLY a JSON object (no markdown fences):
|
|
708
705
|
{
|
|
709
|
-
"
|
|
710
|
-
"
|
|
711
|
-
"reasoning": "your assessment and why you chose this
|
|
706
|
+
"done": false,
|
|
707
|
+
"waveKind": "execute",
|
|
708
|
+
"reasoning": "your assessment and why you chose this wave composition",
|
|
712
709
|
"goalUpdate": "optional — refine what 'amazing' means as you learn more",
|
|
713
|
-
"statusUpdate": "REQUIRED —
|
|
714
|
-
"tasks": [
|
|
715
|
-
}
|
|
710
|
+
"statusUpdate": "REQUIRED — concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
|
|
711
|
+
"tasks": [
|
|
712
|
+
{"prompt": "task instruction...", "model": "worker"},
|
|
713
|
+
{"prompt": "review task...", "model": "planner"}
|
|
714
|
+
]
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
The "model" field on each task: use "worker" (${workerModel}) for implementation tasks, "planner" (${plannerModel}) for review/analysis/verification tasks. Default is "worker".
|
|
718
|
+
|
|
719
|
+
If done: {"done": true, "waveKind": "done", "reasoning": "...", "statusUpdate": "...", "tasks": []}`;
|
|
716
720
|
onLog("Assessing...");
|
|
717
721
|
const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
|
|
718
722
|
const parsed = await (async () => {
|
|
@@ -720,21 +724,25 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
720
724
|
if (first)
|
|
721
725
|
return first;
|
|
722
726
|
onLog("Retrying...");
|
|
723
|
-
const retryText = await runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"
|
|
724
|
-
|
|
727
|
+
const retryText = await runPlannerQuery(`Your previous response was not valid JSON. Respond with ONLY a JSON object {"done":false,"waveKind":"execute","reasoning":"...","statusUpdate":"...","tasks":[{"prompt":"..."}]}.\n\n${prompt}`, { cwd, model: plannerModel, permissionMode }, onLog);
|
|
728
|
+
const retryParsed = attemptJsonParse(retryText);
|
|
729
|
+
if (retryParsed)
|
|
730
|
+
return retryParsed;
|
|
731
|
+
// Don't return done:true on parse failure — that permanently marks the run complete.
|
|
732
|
+
// Throw so the caller's catch block handles it as a transient steering failure.
|
|
733
|
+
throw new Error("Could not parse steering response after retry");
|
|
725
734
|
})();
|
|
726
|
-
const
|
|
735
|
+
const isDone = parsed.done === true;
|
|
736
|
+
const waveKind = parsed.waveKind || parsed.action || (isDone ? "done" : "execute");
|
|
727
737
|
const statusUpdate = parsed.statusUpdate || undefined;
|
|
728
|
-
if (
|
|
729
|
-
return { done: true,
|
|
730
|
-
}
|
|
731
|
-
if (action === "reflect") {
|
|
732
|
-
return { done: false, action: "reflect", tasks: [], reasoning: parsed.reasoning || "Quality audit needed", goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
738
|
+
if (isDone) {
|
|
739
|
+
return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete", waveKind: "done", goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
733
740
|
}
|
|
734
741
|
let tasks = (parsed.tasks || []).map((t, i) => ({
|
|
735
742
|
id: String(i),
|
|
736
743
|
prompt: typeof t === "string" ? t : t.prompt,
|
|
744
|
+
...(t.model && { model: t.model }),
|
|
737
745
|
}));
|
|
738
746
|
tasks = postProcess(tasks, remainingBudget, onLog);
|
|
739
|
-
return { done: tasks.length === 0,
|
|
747
|
+
return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "", waveKind: tasks.length === 0 ? "done" : waveKind, goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
740
748
|
}
|
package/dist/swarm.d.ts
CHANGED
|
@@ -16,6 +16,8 @@ export interface SwarmConfig {
|
|
|
16
16
|
allowExtraUsage?: boolean;
|
|
17
17
|
/** Max $ to spend on extra usage before stopping. Only applies when allowExtraUsage is true. */
|
|
18
18
|
extraUsageBudget?: number;
|
|
19
|
+
/** Cost from previous waves — lets the UI show an accurate running total. */
|
|
20
|
+
baseCostUsd?: number;
|
|
19
21
|
}
|
|
20
22
|
export interface MergeResult {
|
|
21
23
|
branch: string;
|
|
@@ -64,6 +66,7 @@ export declare class Swarm {
|
|
|
64
66
|
usageCap: number | undefined;
|
|
65
67
|
readonly allowExtraUsage: boolean;
|
|
66
68
|
readonly extraUsageBudget: number | undefined;
|
|
69
|
+
readonly baseCostUsd: number;
|
|
67
70
|
constructor(config: SwarmConfig);
|
|
68
71
|
get active(): number;
|
|
69
72
|
get pending(): number;
|
package/dist/swarm.js
CHANGED
|
@@ -50,6 +50,7 @@ export class Swarm {
|
|
|
50
50
|
usageCap; // mutable — can be changed live
|
|
51
51
|
allowExtraUsage;
|
|
52
52
|
extraUsageBudget;
|
|
53
|
+
baseCostUsd;
|
|
53
54
|
constructor(config) {
|
|
54
55
|
if (!config.tasks.length) {
|
|
55
56
|
throw new Error("SwarmConfig: tasks array must not be empty");
|
|
@@ -73,6 +74,7 @@ export class Swarm {
|
|
|
73
74
|
this.usageCap = config.usageCap;
|
|
74
75
|
this.allowExtraUsage = config.allowExtraUsage ?? false;
|
|
75
76
|
this.extraUsageBudget = config.extraUsageBudget;
|
|
77
|
+
this.baseCostUsd = config.baseCostUsd ?? 0;
|
|
76
78
|
this.queue = [...config.tasks];
|
|
77
79
|
this.total = config.tasks.length;
|
|
78
80
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -133,13 +133,13 @@ export interface RunState {
|
|
|
133
133
|
mergeStrategy: MergeStrategy;
|
|
134
134
|
waveNum: number;
|
|
135
135
|
currentTasks: Task[];
|
|
136
|
-
lastWaveKind:
|
|
137
|
-
|
|
136
|
+
lastWaveKind: string;
|
|
137
|
+
overheadBudgetUsed: number;
|
|
138
138
|
accCost: number;
|
|
139
139
|
accCompleted: number;
|
|
140
140
|
accFailed: number;
|
|
141
141
|
branches: BranchRecord[];
|
|
142
|
-
phase: "executing" | "steering" | "reflecting" | "capped" | "done";
|
|
142
|
+
phase: "executing" | "steering" | "reflecting" | "verifying" | "capped" | "done";
|
|
143
143
|
startedAt: string;
|
|
144
144
|
cwd: string;
|
|
145
145
|
}
|
package/dist/ui.js
CHANGED
|
@@ -39,14 +39,19 @@ export function renderFrame(swarm, showHotkeys = false) {
|
|
|
39
39
|
chalk.gray(`${swarm.pending} queued`) +
|
|
40
40
|
" " +
|
|
41
41
|
chalk.gray(`\u23F1 ${fmtDur(Date.now() - swarm.startedAt)}`));
|
|
42
|
-
// Stats line
|
|
42
|
+
// Stats line — show wave cost + overall if there's a base
|
|
43
43
|
const tokIn = fmtTokens(swarm.totalInputTokens);
|
|
44
44
|
const tokOut = fmtTokens(swarm.totalOutputTokens);
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
const waveCost = swarm.totalCostUsd;
|
|
46
|
+
const totalCost = swarm.baseCostUsd + waveCost;
|
|
47
|
+
let costStr = "";
|
|
48
|
+
if (totalCost > 0) {
|
|
49
|
+
costStr = swarm.baseCostUsd > 0
|
|
50
|
+
? chalk.yellow(`$${waveCost.toFixed(3)}`) + chalk.dim(` / $${totalCost.toFixed(2)} total`)
|
|
51
|
+
: chalk.yellow(`$${waveCost.toFixed(3)}`);
|
|
52
|
+
}
|
|
48
53
|
out.push(chalk.gray(` \u2191 ${tokIn} in \u2193 ${tokOut} out`) +
|
|
49
|
-
(
|
|
54
|
+
(costStr ? ` ${costStr}` : ""));
|
|
50
55
|
// ── Usage bar(s) — cycle through windows every 3s ──
|
|
51
56
|
const windows = Array.from(swarm.rateLimitWindows.values());
|
|
52
57
|
const rlPct = swarm.rateLimitUtilization;
|
|
@@ -82,10 +87,7 @@ export function renderFrame(swarm, showHotkeys = false) {
|
|
|
82
87
|
label = chalk.red(`Waiting for reset ${mm > 0 ? `${mm}m ${ss}s` : `${ss}s`}`);
|
|
83
88
|
}
|
|
84
89
|
if (swarm.isUsingOverage && !swarm.cappedOut) {
|
|
85
|
-
|
|
86
|
-
? ` $${swarm.overageCostUsd.toFixed(2)}/$${swarm.extraUsageBudget}`
|
|
87
|
-
: "";
|
|
88
|
-
label += chalk.red(` [EXTRA USAGE${budgetInfo}]`);
|
|
90
|
+
label += chalk.red(" [EXTRA USAGE]");
|
|
89
91
|
}
|
|
90
92
|
const prefix = windowLabel ? chalk.dim(windowLabel.padEnd(6)) : chalk.dim("Usage ");
|
|
91
93
|
out.push(` ${prefix}${barStr} ${label}`);
|
|
@@ -104,6 +106,23 @@ export function renderFrame(swarm, showHotkeys = false) {
|
|
|
104
106
|
renderBar(rlPct);
|
|
105
107
|
}
|
|
106
108
|
}
|
|
109
|
+
// ── Extra usage budget bar ──
|
|
110
|
+
if (swarm.isUsingOverage && swarm.extraUsageBudget != null && swarm.extraUsageBudget > 0) {
|
|
111
|
+
const barW = Math.min(30, w - 40);
|
|
112
|
+
const pct = Math.min(1, swarm.overageCostUsd / swarm.extraUsageBudget);
|
|
113
|
+
const filled = Math.round(pct * barW);
|
|
114
|
+
let barStr = "";
|
|
115
|
+
for (let i = 0; i < barW; i++) {
|
|
116
|
+
if (i < filled)
|
|
117
|
+
barStr += pct > 0.9 ? chalk.red("\u2588") : pct > 0.75 ? chalk.yellow("\u2588") : chalk.magenta("\u2588");
|
|
118
|
+
else
|
|
119
|
+
barStr += chalk.gray("\u2591");
|
|
120
|
+
}
|
|
121
|
+
const label = swarm.cappedOut
|
|
122
|
+
? chalk.red(`$${swarm.overageCostUsd.toFixed(2)}/$${swarm.extraUsageBudget} — budget hit`)
|
|
123
|
+
: `$${swarm.overageCostUsd.toFixed(2)}/$${swarm.extraUsageBudget}`;
|
|
124
|
+
out.push(` ${chalk.dim("Extra ")}${barStr} ${label}`);
|
|
125
|
+
}
|
|
107
126
|
out.push("");
|
|
108
127
|
// ── Agent table ──
|
|
109
128
|
const running = swarm.agents.filter((a) => a.status === "running");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|