claude-overnight 1.25.33 → 1.25.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_version.d.ts +1 -1
- package/dist/_version.js +1 -1
- package/dist/index.js +4 -2
- package/dist/interactive-panel.d.ts +9 -0
- package/dist/interactive-panel.js +18 -3
- package/dist/planner-query.js +2 -2
- package/dist/providers.js +91 -10
- package/dist/run.js +113 -15
- package/dist/steering.js +5 -2
- package/dist/swarm.js +5 -2
- package/dist/types.d.ts +2 -0
- package/dist/ui.d.ts +4 -2
- package/dist/ui.js +8 -2
- package/package.json +2 -2
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
package/dist/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.25.
|
|
1
|
+
export declare const VERSION = "1.25.37";
|
package/dist/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.25.
|
|
2
|
+
export const VERSION = "1.25.37";
|
package/dist/index.js
CHANGED
|
@@ -839,8 +839,10 @@ async function main() {
|
|
|
839
839
|
statusLineActive = false;
|
|
840
840
|
}
|
|
841
841
|
};
|
|
842
|
-
/** Cursor agent cold start + thinking-variant model latency can exceed 20s
|
|
843
|
-
|
|
842
|
+
/** Cursor agent cold start + thinking-variant model latency can exceed 20s, and the cursor
|
|
843
|
+
* preflight now also runs a write-capability probe (see probeCursorWriteCapability) that
|
|
844
|
+
* asks cursor to Bash a marker file — so the total budget must cover auth ping + write turn. */
|
|
845
|
+
const preflightMs = (p) => isCursorProxyProvider(p) ? 90_000 : 20_000;
|
|
844
846
|
const results = await Promise.all(pending.map(async ([role, p]) => {
|
|
845
847
|
statuses.set(role, "connecting…");
|
|
846
848
|
renderStatus();
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
export type PanelMode = "debrief" | "ask" | "custom" | "none";
|
|
2
|
+
export interface DebriefEntry {
|
|
3
|
+
label: string;
|
|
4
|
+
text: string;
|
|
5
|
+
time: number;
|
|
6
|
+
}
|
|
2
7
|
/** Mutable state of the interactive panel. */
|
|
3
8
|
export interface PanelState {
|
|
4
9
|
mode: PanelMode;
|
|
@@ -11,12 +16,16 @@ export interface PanelState {
|
|
|
11
16
|
export declare class InteractivePanel {
|
|
12
17
|
state: PanelState;
|
|
13
18
|
private _bodyLines;
|
|
19
|
+
/** Accumulated debrief entries — each wave/phase appends one. */
|
|
20
|
+
private _debriefHistory;
|
|
14
21
|
set(params: {
|
|
15
22
|
mode: PanelMode;
|
|
16
23
|
header: string;
|
|
17
24
|
preview: string;
|
|
18
25
|
body: string;
|
|
19
26
|
}): void;
|
|
27
|
+
/** Append a debrief entry to the running history. Only meaningful in debrief mode. */
|
|
28
|
+
appendHistory(label: string, text: string): void;
|
|
20
29
|
/** Close the panel entirely (set mode to "none"). */
|
|
21
30
|
close(): void;
|
|
22
31
|
collapse(): void;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
const
|
|
2
|
-
const
|
|
1
|
+
const BLACK_BG = "\x1B[48;5;232m";
|
|
2
|
+
const SUBTLE_FG = "\x1B[38;5;108m";
|
|
3
3
|
const BRIGHT_WHITE_FG = "\x1B[38;5;231m";
|
|
4
4
|
const SOFT_GREEN_FG = "\x1B[38;5;114m";
|
|
5
5
|
const RESET = "\x1B[0m";
|
|
@@ -15,7 +15,7 @@ function truncate(s, max) {
|
|
|
15
15
|
}
|
|
16
16
|
/** Wrap a plain (ANSI-free) line in the dark-green bg, padded to width. */
|
|
17
17
|
function bgLine(text, width) {
|
|
18
|
-
return `${
|
|
18
|
+
return `${BLACK_BG}${SUBTLE_FG}${padTo(text, width)}${RESET}`;
|
|
19
19
|
}
|
|
20
20
|
export class InteractivePanel {
|
|
21
21
|
state = {
|
|
@@ -27,6 +27,8 @@ export class InteractivePanel {
|
|
|
27
27
|
body: "",
|
|
28
28
|
};
|
|
29
29
|
_bodyLines = [];
|
|
30
|
+
/** Accumulated debrief entries — each wave/phase appends one. */
|
|
31
|
+
_debriefHistory = [];
|
|
30
32
|
set(params) {
|
|
31
33
|
this.state.mode = params.mode;
|
|
32
34
|
this.state.header = params.header;
|
|
@@ -34,6 +36,19 @@ export class InteractivePanel {
|
|
|
34
36
|
this.state.body = params.body;
|
|
35
37
|
this._bodyLines = params.body.split("\n").filter(l => l.length > 0);
|
|
36
38
|
this.state.scrollOffset = 0;
|
|
39
|
+
// Clear history when mode changes away from debrief
|
|
40
|
+
if (params.mode !== "debrief")
|
|
41
|
+
this._debriefHistory = [];
|
|
42
|
+
}
|
|
43
|
+
/** Append a debrief entry to the running history. Only meaningful in debrief mode. */
|
|
44
|
+
appendHistory(label, text) {
|
|
45
|
+
if (this.state.mode !== "debrief")
|
|
46
|
+
return;
|
|
47
|
+
this._debriefHistory.push({ label, text, time: Date.now() });
|
|
48
|
+
// Rebuild body from full history so expanded view shows everything
|
|
49
|
+
const historyBody = this._debriefHistory.map(e => ` ${e.label}\n ${e.text}`).join("\n\n");
|
|
50
|
+
this.state.body = historyBody;
|
|
51
|
+
this._bodyLines = historyBody.split("\n");
|
|
37
52
|
}
|
|
38
53
|
/** Close the panel entirely (set mode to "none"). */
|
|
39
54
|
close() {
|
package/dist/planner-query.js
CHANGED
|
@@ -454,9 +454,9 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
|
|
|
454
454
|
export function postProcess(raw, budget, onLog) {
|
|
455
455
|
let tasks = raw;
|
|
456
456
|
const before = tasks.length;
|
|
457
|
-
tasks = tasks.filter((t) => t.prompt && t.prompt.trim().
|
|
457
|
+
tasks = tasks.filter((t) => t.prompt && t.prompt.trim().length >= 1);
|
|
458
458
|
if (tasks.length < before)
|
|
459
|
-
onLog(`Filtered ${before - tasks.length} task(s) with
|
|
459
|
+
onLog(`Filtered ${before - tasks.length} task(s) with empty prompt`);
|
|
460
460
|
// Read-only tasks (verify/audit/user-test) shouldn't get a worktree: they
|
|
461
461
|
// don't change files, so they'd just create empty swarm branches that show
|
|
462
462
|
// up as "0 files changed" noise. Run them in the real project directory so
|
package/dist/providers.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { readFileSync, writeFileSync, mkdirSync, existsSync, chmodSync, realpathSync, openSync, statSync, readSync, closeSync } from "fs";
|
|
1
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync, chmodSync, realpathSync, openSync, statSync, readSync, closeSync, unlinkSync } from "fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
2
3
|
import { createRequire } from "node:module";
|
|
3
4
|
import { homedir } from "os";
|
|
4
5
|
import { join, dirname } from "path";
|
|
@@ -147,10 +148,10 @@ export function envFor(p) {
|
|
|
147
148
|
// SDK replaces env for subprocesses — force these so nothing inherits a bad CI / skip flag.
|
|
148
149
|
base.CI = "true";
|
|
149
150
|
base.CURSOR_SKIP_KEYCHAIN = "1";
|
|
150
|
-
//
|
|
151
|
-
// Glob, Grep, Write, Bash)
|
|
152
|
-
//
|
|
153
|
-
base.CURSOR_BRIDGE_MODE = "
|
|
151
|
+
// "agent" omits --mode so cursor-agent runs full agentic mode (Read,
|
|
152
|
+
// Glob, Grep, Write, Bash). Passing --mode plan or ask forces read-only —
|
|
153
|
+
// Write/Bash tool calls are silently dropped, exit 0, empty stdout.
|
|
154
|
+
base.CURSOR_BRIDGE_MODE = "agent";
|
|
154
155
|
// Use system Node.js for agent subprocess to avoid macOS segfaults with
|
|
155
156
|
// bundled Node.js. Resolve lazily.
|
|
156
157
|
if (!_cachedAgentNode || !_cachedAgentScript) {
|
|
@@ -382,6 +383,10 @@ async function preflightCursorProxyViaHttp(p, timeoutMs, opts) {
|
|
|
382
383
|
const headers = { "content-type": "application/json" };
|
|
383
384
|
if (key)
|
|
384
385
|
headers["authorization"] = `Bearer ${key}`;
|
|
386
|
+
// Shared deadline: auth ping + write probe split the total timeout budget.
|
|
387
|
+
const overallDeadlineAt = Date.now() + timeoutMs;
|
|
388
|
+
const remaining = () => Math.max(1_000, overallDeadlineAt - Date.now());
|
|
389
|
+
const authBudget = Math.max(5_000, Math.floor(timeoutMs / 2));
|
|
385
390
|
const controller = new AbortController();
|
|
386
391
|
let elapsed = 0;
|
|
387
392
|
const PROGRESS_INTERVAL_MS = 3_000;
|
|
@@ -389,7 +394,7 @@ async function preflightCursorProxyViaHttp(p, timeoutMs, opts) {
|
|
|
389
394
|
elapsed += PROGRESS_INTERVAL_MS;
|
|
390
395
|
opts?.onProgress?.(`still waiting… (${(elapsed / 1000).toFixed(1)}s)`);
|
|
391
396
|
}, PROGRESS_INTERVAL_MS);
|
|
392
|
-
const deadline = setTimeout(() => controller.abort(),
|
|
397
|
+
const deadline = setTimeout(() => controller.abort(), authBudget);
|
|
393
398
|
try {
|
|
394
399
|
// max_tokens must accommodate thinking tokens for `*-thinking-*` variants —
|
|
395
400
|
// 1 token leaves zero reasoning budget and crashes the cursor-agent subprocess
|
|
@@ -410,7 +415,6 @@ async function preflightCursorProxyViaHttp(p, timeoutMs, opts) {
|
|
|
410
415
|
}
|
|
411
416
|
// Drain body so the connection closes cleanly; we don't care about content.
|
|
412
417
|
await res.text().catch(() => "");
|
|
413
|
-
return { ok: true };
|
|
414
418
|
}
|
|
415
419
|
catch (err) {
|
|
416
420
|
if (err?.name === "AbortError") {
|
|
@@ -422,6 +426,82 @@ async function preflightCursorProxyViaHttp(p, timeoutMs, opts) {
|
|
|
422
426
|
clearTimeout(deadline);
|
|
423
427
|
clearInterval(progressTimer);
|
|
424
428
|
}
|
|
429
|
+
// Write-capability probe — catches the --mode plan / ask regression where
|
|
430
|
+
// the proxy silently swallows Write/Bash tool calls (exit 0, empty body,
|
|
431
|
+
// no file changes). Ask cursor to write a unique marker file; fail if the
|
|
432
|
+
// file doesn't appear. Keeps the first wave from silently burning budget.
|
|
433
|
+
opts?.onProgress?.(`probing write capability…`);
|
|
434
|
+
const probeErr = await probeCursorWriteCapability(baseURL, key, p.model, remaining(), opts);
|
|
435
|
+
if (probeErr)
|
|
436
|
+
return { ok: false, error: probeErr };
|
|
437
|
+
return { ok: true };
|
|
438
|
+
}
|
|
439
|
+
/**
|
|
440
|
+
* Ask the proxy to create a unique marker file via its Bash tool; verify the
|
|
441
|
+
* file appeared on disk. Returns an error string on failure, null on success.
|
|
442
|
+
*
|
|
443
|
+
* Failure modes caught:
|
|
444
|
+
* - `CURSOR_BRIDGE_MODE=plan|ask` silently drops Write/Bash (regression fixed in
|
|
445
|
+
* cursor-composer-in-claude 0.9.3; this keeps older proxy versions actionable).
|
|
446
|
+
* - Workspace is untrusted or agent is otherwise nonfunctional — exit 0 with
|
|
447
|
+
* no side effects.
|
|
448
|
+
*/
|
|
449
|
+
async function probeCursorWriteCapability(baseURL, key, model, timeoutMs, opts) {
|
|
450
|
+
const marker = `co-probe-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
|
451
|
+
const probeFile = join(tmpdir(), `${marker}.txt`);
|
|
452
|
+
try {
|
|
453
|
+
unlinkSync(probeFile);
|
|
454
|
+
}
|
|
455
|
+
catch { }
|
|
456
|
+
const prompt = `Run this exact shell command via your Bash tool, then reply with only the word DONE:\n` +
|
|
457
|
+
`printf 'ok' > ${probeFile}`;
|
|
458
|
+
const controller = new AbortController();
|
|
459
|
+
let elapsed = 0;
|
|
460
|
+
const PROGRESS_INTERVAL_MS = 3_000;
|
|
461
|
+
const progressTimer = setInterval(() => {
|
|
462
|
+
elapsed += PROGRESS_INTERVAL_MS;
|
|
463
|
+
opts?.onProgress?.(`write probe… (${(elapsed / 1000).toFixed(1)}s)`);
|
|
464
|
+
}, PROGRESS_INTERVAL_MS);
|
|
465
|
+
const deadline = setTimeout(() => controller.abort(), timeoutMs);
|
|
466
|
+
const headers = { "content-type": "application/json" };
|
|
467
|
+
if (key)
|
|
468
|
+
headers["authorization"] = `Bearer ${key}`;
|
|
469
|
+
try {
|
|
470
|
+
const res = await fetch(`${baseURL}/v1/messages`, {
|
|
471
|
+
method: "POST",
|
|
472
|
+
headers,
|
|
473
|
+
body: JSON.stringify({
|
|
474
|
+
model,
|
|
475
|
+
max_tokens: 4096,
|
|
476
|
+
messages: [{ role: "user", content: prompt }],
|
|
477
|
+
}),
|
|
478
|
+
signal: controller.signal,
|
|
479
|
+
});
|
|
480
|
+
if (!res.ok) {
|
|
481
|
+
const text = await res.text().catch(() => "");
|
|
482
|
+
return `write probe: HTTP ${res.status}: ${text.slice(0, 200)}`;
|
|
483
|
+
}
|
|
484
|
+
await res.text().catch(() => "");
|
|
485
|
+
}
|
|
486
|
+
catch (err) {
|
|
487
|
+
if (err?.name === "AbortError")
|
|
488
|
+
return `write probe: timeout after ${Math.round(timeoutMs / 1000)}s`;
|
|
489
|
+
return `write probe: ${String(err?.message || err).slice(0, 200)}`;
|
|
490
|
+
}
|
|
491
|
+
finally {
|
|
492
|
+
clearTimeout(deadline);
|
|
493
|
+
clearInterval(progressTimer);
|
|
494
|
+
}
|
|
495
|
+
if (!existsSync(probeFile)) {
|
|
496
|
+
return (`write probe: cursor returned without creating the marker file. ` +
|
|
497
|
+
`Most likely cause: CURSOR_BRIDGE_MODE=plan|ask (silent read-only mode). ` +
|
|
498
|
+
`Upgrade cursor-composer-in-claude to ≥0.9.3 and set CURSOR_BRIDGE_MODE=agent (or unset).`);
|
|
499
|
+
}
|
|
500
|
+
try {
|
|
501
|
+
unlinkSync(probeFile);
|
|
502
|
+
}
|
|
503
|
+
catch { }
|
|
504
|
+
return null;
|
|
425
505
|
}
|
|
426
506
|
// ── Cursor API Proxy ──
|
|
427
507
|
export const PROXY_DEFAULT_URL = "http://127.0.0.1:8765";
|
|
@@ -924,9 +1004,10 @@ async function startProxyProcess(baseUrl, url, port) {
|
|
|
924
1004
|
// the CLI path injects keychain-shim-inject.js via NODE_OPTIONS which no-ops
|
|
925
1005
|
// /usr/bin/security calls on macOS (cursor-composer/dist/lib/process.js).
|
|
926
1006
|
CURSOR_BRIDGE_USE_ACP: "0",
|
|
927
|
-
//
|
|
928
|
-
//
|
|
929
|
-
|
|
1007
|
+
// "agent" omits --mode so cursor-agent runs full agentic mode with
|
|
1008
|
+
// Read/Glob/Grep/Write/Bash. --mode plan and --mode ask are both strictly
|
|
1009
|
+
// read-only — Write/Bash calls exit 0 with empty stdout.
|
|
1010
|
+
CURSOR_BRIDGE_MODE: "agent",
|
|
930
1011
|
// cursor-composer chat-only mode fakes HOME to a temp dir; on macOS the agent still waits on
|
|
931
1012
|
// Keychain (~30s) for `cursor-user` despite CURSOR_API_KEY. Use the real workspace profile.
|
|
932
1013
|
CURSOR_BRIDGE_CHAT_ONLY_WORKSPACE: "false",
|
package/dist/run.js
CHANGED
|
@@ -185,11 +185,11 @@ export async function executeRun(cfg) {
|
|
|
185
185
|
waveHistory.length ? `Waves done: ${waveHistory.length}` : "",
|
|
186
186
|
memory.reflections ? `Reflections:\n${cap(memory.reflections, 600)}` : "",
|
|
187
187
|
].filter(Boolean).join("\n\n");
|
|
188
|
-
const prompt = `${label}\n\n${ctx}\n\nWrite one short sentence (max
|
|
188
|
+
const prompt = `${label}\n\n${ctx}\n\nWrite one short sentence (max 180 chars) summarising progress and what's next. No preamble.`;
|
|
189
189
|
// Show in-flight feedback so the panel isn't empty while the planner thinks.
|
|
190
190
|
display.setDebrief(`Summarizing ${label.toLowerCase().replace(/\.$/, "")}\u2026`);
|
|
191
191
|
void runPlannerQuery(prompt, { cwd, model: debriefModel, permissionMode }, () => { })
|
|
192
|
-
.then(text => { display.setDebrief(text.trim().slice(0,
|
|
192
|
+
.then(text => { display.setDebrief(text.trim().slice(0, 210), label); })
|
|
193
193
|
.catch(() => { display.setDebrief(undefined); });
|
|
194
194
|
};
|
|
195
195
|
/** Generate a longer narrative summary at run end. Awaited (not fire-and-forget)
|
|
@@ -455,17 +455,50 @@ export async function executeRun(cfg) {
|
|
|
455
455
|
}
|
|
456
456
|
display.pause();
|
|
457
457
|
console.log(renderSummary(swarm));
|
|
458
|
-
// Retry execute tasks that returned filesChanged=0
|
|
459
|
-
//
|
|
458
|
+
// Retry execute tasks that returned filesChanged=0 OR whose postcondition
|
|
459
|
+
// shell-check failed after merge. One retry with a nudge that includes the
|
|
460
|
+
// failure output; if still failing, fail loudly so steering re-plans.
|
|
460
461
|
if (!swarm.aborted && !swarm.cappedOut && remaining > 0) {
|
|
461
|
-
const
|
|
462
|
+
const failedBranches = new Set(swarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
|
|
463
|
+
const postResults = new Map();
|
|
464
|
+
for (const a of swarm.agents) {
|
|
465
|
+
if (a.status !== "done" || !a.task.postcondition)
|
|
466
|
+
continue;
|
|
467
|
+
if (a.branch && failedBranches.has(a.branch))
|
|
468
|
+
continue; // merge-failed: postcondition can't pass on main anyway
|
|
469
|
+
try {
|
|
470
|
+
const out = execSync(a.task.postcondition, { cwd, encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], timeout: 30_000 });
|
|
471
|
+
postResults.set(a.id, { ok: true, output: out.trim().slice(0, 400) });
|
|
472
|
+
}
|
|
473
|
+
catch (err) {
|
|
474
|
+
const output = ((err.stderr || "") + "\n" + (err.stdout || err.message || "")).trim().slice(0, 400);
|
|
475
|
+
postResults.set(a.id, { ok: false, output });
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
const zeroWork = swarm.agents.filter(a => {
|
|
479
|
+
if (a.status !== "done" || (a.task.type && a.task.type !== "execute"))
|
|
480
|
+
return false;
|
|
481
|
+
if ((a.filesChanged ?? 0) === 0)
|
|
482
|
+
return true;
|
|
483
|
+
const pr = postResults.get(a.id);
|
|
484
|
+
return pr && !pr.ok;
|
|
485
|
+
});
|
|
462
486
|
if (zeroWork.length > 0) {
|
|
463
|
-
|
|
464
|
-
const
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
487
|
+
const noFiles = zeroWork.filter(a => (a.filesChanged ?? 0) === 0).length;
|
|
488
|
+
const badPost = zeroWork.length - noFiles;
|
|
489
|
+
display.appendSteeringEvent(`Retry: ${zeroWork.length} task(s) (${noFiles} with 0 files, ${badPost} failed postcondition)`);
|
|
490
|
+
const retryTasks = zeroWork.map(a => {
|
|
491
|
+
const pr = postResults.get(a.id);
|
|
492
|
+
const postFailBlock = pr && !pr.ok
|
|
493
|
+
? `\n\nThe postcondition \`${a.task.postcondition}\` failed after your last attempt:\n${pr.output || "(no output)"}\n\nFix what makes the check fail and try again.`
|
|
494
|
+
: `\n\nIMPORTANT: your last attempt made no file edits. If the fix truly needs no changes, say 'no-op:' at the start and explain why. Otherwise, make the actual edits.`;
|
|
495
|
+
return {
|
|
496
|
+
id: `${a.task.id}-retry`,
|
|
497
|
+
prompt: `${a.task.prompt}${postFailBlock}`,
|
|
498
|
+
type: "execute",
|
|
499
|
+
postcondition: a.task.postcondition,
|
|
500
|
+
};
|
|
501
|
+
});
|
|
469
502
|
const retrySwarm = new Swarm({
|
|
470
503
|
tasks: retryTasks, concurrency: Math.min(concurrency, retryTasks.length), cwd, model: workerModel,
|
|
471
504
|
permissionMode, allowedTools, useWorktrees, mergeStrategy: waveMerge,
|
|
@@ -485,10 +518,29 @@ export async function executeRun(cfg) {
|
|
|
485
518
|
accIn += retrySwarm.totalInputTokens;
|
|
486
519
|
accOut += retrySwarm.totalOutputTokens;
|
|
487
520
|
accTools += retrySwarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
|
|
488
|
-
// Any retry that still has 0 files → hard fail
|
|
489
|
-
const
|
|
521
|
+
// Any retry that still has 0 files OR a still-failing postcondition → hard fail
|
|
522
|
+
const retryFailedBranches = new Set(retrySwarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
|
|
523
|
+
const stillZero = retrySwarm.agents.filter(a => {
|
|
524
|
+
if (a.status !== "done")
|
|
525
|
+
return false;
|
|
526
|
+
if ((a.filesChanged ?? 0) === 0)
|
|
527
|
+
return true;
|
|
528
|
+
if (!a.task.postcondition)
|
|
529
|
+
return false;
|
|
530
|
+
if (a.branch && retryFailedBranches.has(a.branch))
|
|
531
|
+
return true;
|
|
532
|
+
try {
|
|
533
|
+
execSync(a.task.postcondition, { cwd, stdio: "ignore", timeout: 30_000 });
|
|
534
|
+
return false;
|
|
535
|
+
}
|
|
536
|
+
catch {
|
|
537
|
+
return true;
|
|
538
|
+
}
|
|
539
|
+
});
|
|
490
540
|
for (const a of stillZero) {
|
|
491
|
-
|
|
541
|
+
const why = (a.filesChanged ?? 0) === 0 ? "still changed 0 files" : "postcondition still failing";
|
|
542
|
+
display.appendSteeringEvent(`RETRY FAILED: agent ${a.id} ${why} — task dropped as error`);
|
|
543
|
+
a.error = a.error ?? `retry failed: ${why}`;
|
|
492
544
|
accFailed++;
|
|
493
545
|
remaining = Math.max(0, remaining - 1);
|
|
494
546
|
}
|
|
@@ -554,9 +606,22 @@ export async function executeRun(cfg) {
|
|
|
554
606
|
const attemptedPrompts = new Set(swarm.agents.map(a => a.task.prompt));
|
|
555
607
|
const neverStarted = currentTasks.filter(t => !attemptedPrompts.has(t.prompt));
|
|
556
608
|
saveRunState(runDir, buildRunState({ remaining, phase: "steering", currentTasks: neverStarted }));
|
|
609
|
+
// Overlay merge outcomes: if an agent's branch failed to merge, its changes
|
|
610
|
+
// did NOT land — tell steering the truth (filesChanged=0, error attached)
|
|
611
|
+
// so it can't declare victory on work that didn't reach the codebase.
|
|
612
|
+
const failedMergeBranches = new Set(swarm.mergeResults.filter(r => !r.ok).map(r => r.branch));
|
|
557
613
|
waveHistory.push({
|
|
558
614
|
wave: waveNum,
|
|
559
|
-
tasks: swarm.agents.map(a =>
|
|
615
|
+
tasks: swarm.agents.map(a => {
|
|
616
|
+
const mergeFailed = a.branch && failedMergeBranches.has(a.branch);
|
|
617
|
+
return {
|
|
618
|
+
prompt: a.task.prompt,
|
|
619
|
+
status: a.status,
|
|
620
|
+
type: a.task.type,
|
|
621
|
+
filesChanged: mergeFailed ? 0 : a.filesChanged,
|
|
622
|
+
error: mergeFailed ? `merge-failed: branch ${a.branch} did not land` : a.error,
|
|
623
|
+
};
|
|
624
|
+
}),
|
|
560
625
|
});
|
|
561
626
|
// Hook-blocked work: agents that touched files but nothing landed on the
|
|
562
627
|
// branch (pre-commit hooks, gitignore, writes outside worktree). Surface
|
|
@@ -574,6 +639,39 @@ export async function executeRun(cfg) {
|
|
|
574
639
|
}
|
|
575
640
|
catch { }
|
|
576
641
|
}
|
|
642
|
+
// Merge-failed branches: changes never reached the codebase. Regenerate a
|
|
643
|
+
// pinned section in status.md every wave from live git state — resolved
|
|
644
|
+
// branches (deleted from git) drop off automatically; still-broken ones
|
|
645
|
+
// keep shouting at steering until a follow-up wave lands them or discards
|
|
646
|
+
// them. This is what turns merge-failed from a silent state into a
|
|
647
|
+
// first-class blocker.
|
|
648
|
+
try {
|
|
649
|
+
const unresolved = branches.filter(b => {
|
|
650
|
+
if (b.status !== "merge-failed")
|
|
651
|
+
return false;
|
|
652
|
+
try {
|
|
653
|
+
execSync(`git rev-parse --verify "${b.branch}"`, { cwd, stdio: "ignore" });
|
|
654
|
+
return true;
|
|
655
|
+
}
|
|
656
|
+
catch {
|
|
657
|
+
return false;
|
|
658
|
+
} // branch gone → treat as resolved
|
|
659
|
+
});
|
|
660
|
+
const statusPath = join(runDir, "status.md");
|
|
661
|
+
const existing = existsSync(statusPath) ? readFileSync(statusPath, "utf-8") : "";
|
|
662
|
+
const marker = "## Unresolved merge failures";
|
|
663
|
+
const idx = existing.indexOf(marker);
|
|
664
|
+
const base = idx >= 0 ? existing.slice(0, idx).replace(/\n+$/, "") : existing;
|
|
665
|
+
let next = base;
|
|
666
|
+
if (unresolved.length > 0) {
|
|
667
|
+
const list = unresolved.map(b => ` - ${b.branch} — ${b.taskPrompt.slice(0, 120)}`).join("\n");
|
|
668
|
+
next = `${base}${base ? "\n\n" : ""}${marker}\n${unresolved.length} branch(es) contain unmerged agent work. Resolve or discard before relying on those changes:\n${list}\n`;
|
|
669
|
+
display.appendSteeringEvent(`⚠ ${unresolved.length} unresolved merge failure(s) — see status.md`);
|
|
670
|
+
}
|
|
671
|
+
if (next !== existing)
|
|
672
|
+
writeFileSync(statusPath, next, "utf-8");
|
|
673
|
+
}
|
|
674
|
+
catch { }
|
|
577
675
|
// Fire-and-forget debrief after each wave.
|
|
578
676
|
runDebrief(`Wave ${waveNum + 1} just finished.`);
|
|
579
677
|
// After-wave commands: run shell commands in cwd after each wave (e.g. "supabase db push").
|
package/dist/steering.js
CHANGED
|
@@ -16,7 +16,7 @@ const STEER_SCHEMA = {
|
|
|
16
16
|
type: "array",
|
|
17
17
|
items: {
|
|
18
18
|
type: "object",
|
|
19
|
-
properties: { prompt: { type: "string" }, model: { type: "string" }, noWorktree: { type: "boolean" }, type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] } },
|
|
19
|
+
properties: { prompt: { type: "string" }, model: { type: "string" }, noWorktree: { type: "boolean" }, type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] }, postcondition: { type: "string" } },
|
|
20
20
|
required: ["prompt"],
|
|
21
21
|
},
|
|
22
22
|
},
|
|
@@ -103,7 +103,7 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
103
103
|
"statusUpdate": "REQUIRED -- concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status.",
|
|
104
104
|
"estimatedSessionsRemaining": 15,
|
|
105
105
|
"tasks": [
|
|
106
|
-
{"prompt": "task instruction...", "model": "worker"},
|
|
106
|
+
{"prompt": "task instruction...", "model": "worker", "postcondition": "test -f src/new-file.ts"},
|
|
107
107
|
{"prompt": "quick icon fix, verified by worker next wave...", "model": "fast"},
|
|
108
108
|
{"prompt": "verify the app end-to-end...", "model": "worker", "noWorktree": true}
|
|
109
109
|
]
|
|
@@ -114,6 +114,8 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
114
114
|
The "model" field on each task: use "worker" (${workerModel}) for all tasks. Use "fast" (${fastModel ?? "not set"}) for small, single-file changes that will be checked by the worker in the next wave.
|
|
115
115
|
Set "noWorktree": true for verify/user-test tasks -- they need the real project directory with env files, dependencies, and local config.
|
|
116
116
|
|
|
117
|
+
OPTIONAL "postcondition": a single shell one-liner that exits 0 when the task is truly done. The framework runs it after merge; if it fails, the agent's "no-op" claim is rejected and the task is retried with the failure output as context. Use it whenever the task has a concrete, machine-checkable outcome. Examples: \`test -f src/tracking/watchlist-poller.ts && grep -q "runWatchlistPoll" src/tracking/watchlist-poller.ts\`, \`grep -q "watchlistPollerTask" src/scraper/scheduler.ts\`, \`pnpm run build\`, \`diff -q src/public/index.html frontend/dist/index.html\`. Keep it cheap (sub-second, no network). Omit for exploratory/research tasks where there is no crisp check.
|
|
118
|
+
|
|
117
119
|
If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSessionsRemaining": 0, "tasks": []}`;
|
|
118
120
|
onLog("Assessing...", "status");
|
|
119
121
|
onLog(`Reading codebase -- wave ${history.length + 1}`, "event");
|
|
@@ -151,6 +153,7 @@ If done: {"done": true, "reasoning": "...", "statusUpdate": "...", "estimatedSes
|
|
|
151
153
|
...(t.model && { model: resolveModel(t.model) }),
|
|
152
154
|
...(t.noWorktree && { noWorktree: true }),
|
|
153
155
|
...(t.type && { type: t.type }),
|
|
156
|
+
...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
|
|
154
157
|
}));
|
|
155
158
|
tasks = postProcess(tasks, remainingBudget, onLog);
|
|
156
159
|
endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");
|
package/dist/swarm.js
CHANGED
|
@@ -553,10 +553,13 @@ export class Swarm {
|
|
|
553
553
|
let resumePrompt = "Continue. Complete the task.";
|
|
554
554
|
const runOnce = async (isResume) => {
|
|
555
555
|
const preamble = "Keep files under ~500 lines. If a file would exceed that, split it.\n\n";
|
|
556
|
+
const postBlock = task.postcondition
|
|
557
|
+
? `\n\nEXIT CRITERION — after you finish, the framework will run this shell check in cwd and reject a no-op if it fails:\n $ ${task.postcondition}\nYour work is not done until that command exits 0. Don't claim no-op unless you can prove the check already passes.`
|
|
558
|
+
: "";
|
|
556
559
|
const agentPrompt = isResume ? resumePrompt
|
|
557
560
|
: this.config.useWorktrees && !task.noWorktree
|
|
558
|
-
? `You are working in an isolated git worktree. Focus only on this task. Do NOT commit your changes -- the framework handles that.\n\n${preamble}${task.prompt}`
|
|
559
|
-
: `${preamble}${task.prompt}`;
|
|
561
|
+
? `You are working in an isolated git worktree. Focus only on this task. Do NOT commit your changes -- the framework handles that.\n\n${preamble}${task.prompt}${postBlock}`
|
|
562
|
+
: `${preamble}${task.prompt}${postBlock}`;
|
|
560
563
|
const effectiveModel = task.model || this.config.model;
|
|
561
564
|
const envOverride = this.config.envForModel?.(effectiveModel);
|
|
562
565
|
const agentQuery = query({
|
package/dist/types.d.ts
CHANGED
|
@@ -16,6 +16,8 @@ export interface Task {
|
|
|
16
16
|
agentCwd?: string;
|
|
17
17
|
/** The kind of work: "execute" modifies files, others are read-only/analysis. Defaults to "execute". */
|
|
18
18
|
type?: string;
|
|
19
|
+
/** Shell command that must exit 0 for the task to be considered done. Runs in cwd after merge. Failed postconditions trigger the same retry path as filesChanged=0. */
|
|
20
|
+
postcondition?: string;
|
|
19
21
|
}
|
|
20
22
|
/** Schema for a JSON task file that defines a batch of work for the swarm. */
|
|
21
23
|
export interface TaskFile {
|
package/dist/ui.d.ts
CHANGED
|
@@ -84,8 +84,10 @@ export declare class RunDisplay {
|
|
|
84
84
|
private lastFrame;
|
|
85
85
|
private onSteer?;
|
|
86
86
|
private onAsk?;
|
|
87
|
-
/** Set or clear the debrief text shown in the interactive panel.
|
|
88
|
-
|
|
87
|
+
/** Set or clear the debrief text shown in the interactive panel.
|
|
88
|
+
* When a label is provided alongside resolved text, it's appended to
|
|
89
|
+
* the running history so expanded view shows all wave debriefs. */
|
|
90
|
+
setDebrief(text: string | undefined, label?: string): void;
|
|
89
91
|
constructor(runInfo: RunInfo, liveConfig?: LiveConfig, callbacks?: {
|
|
90
92
|
onSteer?: (text: string) => void;
|
|
91
93
|
onAsk?: (text: string) => void;
|
package/dist/ui.js
CHANGED
|
@@ -49,10 +49,16 @@ export class RunDisplay {
|
|
|
49
49
|
lastFrame = "";
|
|
50
50
|
onSteer;
|
|
51
51
|
onAsk;
|
|
52
|
-
/** Set or clear the debrief text shown in the interactive panel.
|
|
53
|
-
|
|
52
|
+
/** Set or clear the debrief text shown in the interactive panel.
|
|
53
|
+
* When a label is provided alongside resolved text, it's appended to
|
|
54
|
+
* the running history so expanded view shows all wave debriefs. */
|
|
55
|
+
setDebrief(text, label) {
|
|
54
56
|
if (text) {
|
|
55
57
|
this.panel.set({ mode: "debrief", header: "Debrief", preview: text, body: text });
|
|
58
|
+
// Append to accumulated history when we have the final text (not loading message)
|
|
59
|
+
if (label && !text.startsWith("Summarizing")) {
|
|
60
|
+
this.panel.appendHistory(label, text);
|
|
61
|
+
}
|
|
56
62
|
}
|
|
57
63
|
else if (this.panel.state.mode === "debrief") {
|
|
58
64
|
this.panel.set({ mode: "none", header: "", preview: "", body: "" });
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.25.
|
|
3
|
+
"version": "1.25.37",
|
|
4
4
|
"description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"dependencies": {
|
|
18
18
|
"@anthropic-ai/claude-agent-sdk": "^0.2.92",
|
|
19
19
|
"chalk": "^5.4.1",
|
|
20
|
-
"cursor-composer-in-claude": "0.9.
|
|
20
|
+
"cursor-composer-in-claude": "0.9.3",
|
|
21
21
|
"jsonwebtoken": "^9.0.2"
|
|
22
22
|
},
|
|
23
23
|
"devDependencies": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "1.25.
|
|
3
|
+
"version": "1.25.37",
|
|
4
4
|
"description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Francesco Fornace"
|