claude-overnight 1.25.46 → 1.25.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/cli.d.ts +3 -24
- package/dist/cli/cli.js +59 -92
- package/dist/cli/help.js +3 -1
- package/dist/core/_version.d.ts +1 -1
- package/dist/core/_version.js +1 -1
- package/dist/index.js +44 -4
- package/dist/planner/verifier.d.ts +66 -0
- package/dist/planner/verifier.js +117 -0
- package/dist/run/run.js +29 -0
- package/dist/run/wave-loop.d.ts +2 -0
- package/dist/run/wave-loop.js +9 -8
- package/dist/ui/input.d.ts +4 -4
- package/dist/ui/input.js +154 -166
- package/dist/ui/raw-input.d.ts +38 -0
- package/dist/ui/raw-input.js +241 -0
- package/package.json +1 -1
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
package/dist/cli/cli.d.ts
CHANGED
|
@@ -9,32 +9,11 @@ import { isJWTAuthError } from "../core/auth.js";
|
|
|
9
9
|
export declare const isAuthError: typeof isJWTAuthError;
|
|
10
10
|
export { isJWTAuthError };
|
|
11
11
|
export declare function fetchModels(timeoutMs?: number): Promise<ModelInfo[]>;
|
|
12
|
-
export declare const PASTE_START = "\u001B[200~";
|
|
13
|
-
export declare const PASTE_END = "\u001B[201~";
|
|
14
12
|
export declare const PASTE_PLACEHOLDER_MAX = 80;
|
|
15
|
-
export type InputSegment = {
|
|
16
|
-
type: "text";
|
|
17
|
-
content: string;
|
|
18
|
-
} | {
|
|
19
|
-
type: "paste";
|
|
20
|
-
content: string;
|
|
21
|
-
};
|
|
22
|
-
/** Split a raw stdin chunk into typed and pasted segments. */
|
|
23
|
-
export declare function splitPaste(chunk: string): Array<{
|
|
24
|
-
type: "typed" | "paste";
|
|
25
|
-
text: string;
|
|
26
|
-
}>;
|
|
27
|
-
export declare function segmentsToString(segs: InputSegment[]): string;
|
|
28
|
-
export declare function renderSegments(segs: InputSegment[]): string;
|
|
29
|
-
export declare function appendCharToSegments(segs: InputSegment[], ch: string): void;
|
|
30
|
-
/** Appends a pasted block. Short single-line pastes inline as text; the rest become placeholders. */
|
|
31
|
-
export declare function appendPasteToSegments(segs: InputSegment[], text: string): void;
|
|
32
|
-
/** Backspace removes one char, or an entire paste block atomically. */
|
|
33
|
-
export declare function backspaceSegments(segs: InputSegment[]): void;
|
|
34
13
|
/**
|
|
35
|
-
* Read a line from the user with bracketed-paste awareness.
|
|
36
|
-
*
|
|
37
|
-
*
|
|
14
|
+
* Read a line from the user with bracketed-paste awareness. Pasted multi-line
|
|
15
|
+
* text stays in the buffer as a single block -- only a typed Enter submits.
|
|
16
|
+
* Falls back to cooked readline when stdin isn't a TTY.
|
|
38
17
|
*/
|
|
39
18
|
export declare function ask(question: string): Promise<string>;
|
|
40
19
|
export declare function select<T>(label: string, items: {
|
package/dist/cli/cli.js
CHANGED
|
@@ -4,6 +4,7 @@ import { resolve } from "path";
|
|
|
4
4
|
import { createInterface } from "readline";
|
|
5
5
|
import chalk from "chalk";
|
|
6
6
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
7
|
+
import { parseChunk, setBracketedPaste, deleteWordBackward } from "../ui/raw-input.js";
|
|
7
8
|
// ── CLI flag parsing ──
|
|
8
9
|
export function parseCliFlags(argv) {
|
|
9
10
|
const known = new Set(["concurrency", "model", "timeout", "budget", "usage-cap", "extra-usage-budget", "merge"]);
|
|
@@ -66,69 +67,34 @@ export async function fetchModels(timeoutMs = 10_000) {
|
|
|
66
67
|
return [];
|
|
67
68
|
}
|
|
68
69
|
}
|
|
69
|
-
// ──
|
|
70
|
+
// ── Interactive primitives ──
|
|
70
71
|
//
|
|
71
|
-
//
|
|
72
|
-
//
|
|
73
|
-
//
|
|
74
|
-
//
|
|
75
|
-
|
|
76
|
-
|
|
72
|
+
// Text entry goes through the shared raw-input parser in `../ui/raw-input.ts`,
|
|
73
|
+
// which enforces the single invariant that used to be duplicated (and buggy)
|
|
74
|
+
// here and in the Ink overlay:
|
|
75
|
+
// - Typed Enter = a stdin chunk that is exactly "\r", "\n", or "\r\n".
|
|
76
|
+
// - Anything else with embedded newlines is a paste, not a submit.
|
|
77
|
+
// Multi-line pastes render as a compact `[Pasted +N lines]` placeholder while
|
|
78
|
+
// editing — the full content is substituted on submit.
|
|
77
79
|
export const PASTE_PLACEHOLDER_MAX = 80;
|
|
78
|
-
|
|
79
|
-
export function splitPaste(chunk) {
|
|
80
|
-
const out = [];
|
|
81
|
-
let i = 0;
|
|
82
|
-
while (i < chunk.length) {
|
|
83
|
-
const start = chunk.indexOf(PASTE_START, i);
|
|
84
|
-
if (start === -1) {
|
|
85
|
-
out.push({ type: "typed", text: chunk.slice(i) });
|
|
86
|
-
break;
|
|
87
|
-
}
|
|
88
|
-
if (start > i)
|
|
89
|
-
out.push({ type: "typed", text: chunk.slice(i, start) });
|
|
90
|
-
const bodyStart = start + PASTE_START.length;
|
|
91
|
-
const end = chunk.indexOf(PASTE_END, bodyStart);
|
|
92
|
-
if (end === -1) {
|
|
93
|
-
out.push({ type: "paste", text: chunk.slice(bodyStart) });
|
|
94
|
-
break;
|
|
95
|
-
}
|
|
96
|
-
out.push({ type: "paste", text: chunk.slice(bodyStart, end) });
|
|
97
|
-
i = end + PASTE_END.length;
|
|
98
|
-
}
|
|
99
|
-
return out;
|
|
100
|
-
}
|
|
101
|
-
export function segmentsToString(segs) {
|
|
102
|
-
return segs.map((s) => s.content).join("");
|
|
103
|
-
}
|
|
104
|
-
export function renderSegments(segs) {
|
|
105
|
-
return segs.map((s) => {
|
|
106
|
-
if (s.type === "text")
|
|
107
|
-
return s.content;
|
|
108
|
-
const lines = s.content.split("\n").length;
|
|
109
|
-
return chalk.dim(`[Pasted +${lines} line${lines === 1 ? "" : "s"}]`);
|
|
110
|
-
}).join("");
|
|
111
|
-
}
|
|
112
|
-
export function appendCharToSegments(segs, ch) {
|
|
80
|
+
function appendTypedChar(segs, ch) {
|
|
113
81
|
const last = segs[segs.length - 1];
|
|
114
82
|
if (last && last.type === "text")
|
|
115
83
|
last.content += ch;
|
|
116
84
|
else
|
|
117
85
|
segs.push({ type: "text", content: ch });
|
|
118
86
|
}
|
|
119
|
-
|
|
120
|
-
export function appendPasteToSegments(segs, text) {
|
|
87
|
+
function appendPaste(segs, text) {
|
|
121
88
|
if (!text)
|
|
122
89
|
return;
|
|
123
90
|
const norm = text.replace(/\r\n?/g, "\n");
|
|
124
91
|
if (!norm.includes("\n") && norm.length <= PASTE_PLACEHOLDER_MAX) {
|
|
125
|
-
|
|
92
|
+
appendTypedChar(segs, norm);
|
|
126
93
|
return;
|
|
127
94
|
}
|
|
128
95
|
segs.push({ type: "paste", content: norm });
|
|
129
96
|
}
|
|
130
|
-
|
|
131
|
-
export function backspaceSegments(segs) {
|
|
97
|
+
function backspaceSegs(segs) {
|
|
132
98
|
while (segs.length > 0) {
|
|
133
99
|
const last = segs[segs.length - 1];
|
|
134
100
|
if (last.type === "paste") {
|
|
@@ -143,14 +109,22 @@ export function backspaceSegments(segs) {
|
|
|
143
109
|
return;
|
|
144
110
|
}
|
|
145
111
|
}
|
|
112
|
+
function segsToString(segs) { return segs.map((s) => s.content).join(""); }
|
|
113
|
+
function renderSegs(segs) {
|
|
114
|
+
return segs.map((s) => {
|
|
115
|
+
if (s.type === "text")
|
|
116
|
+
return s.content;
|
|
117
|
+
const lines = s.content.split("\n").length;
|
|
118
|
+
return chalk.dim(`[Pasted +${lines} line${lines === 1 ? "" : "s"}]`);
|
|
119
|
+
}).join("");
|
|
120
|
+
}
|
|
146
121
|
function stripAnsi(s) {
|
|
147
122
|
return s.replace(/\x1B\[[0-9;]*[a-zA-Z]/g, "");
|
|
148
123
|
}
|
|
149
|
-
// ── Interactive primitives ──
|
|
150
124
|
/**
|
|
151
|
-
* Read a line from the user with bracketed-paste awareness.
|
|
152
|
-
*
|
|
153
|
-
*
|
|
125
|
+
* Read a line from the user with bracketed-paste awareness. Pasted multi-line
|
|
126
|
+
* text stays in the buffer as a single block -- only a typed Enter submits.
|
|
127
|
+
* Falls back to cooked readline when stdin isn't a TTY.
|
|
154
128
|
*/
|
|
155
129
|
export function ask(question) {
|
|
156
130
|
const { stdin, stdout } = process;
|
|
@@ -163,28 +137,25 @@ export function ask(question) {
|
|
|
163
137
|
const tail = question.split("\n").pop() ?? "";
|
|
164
138
|
const tailVisibleLen = stripAnsi(tail).length;
|
|
165
139
|
let prevWrapRows = 0;
|
|
166
|
-
// Only rewrite the input line (and any wrapped continuation rows). The
|
|
167
|
-
// question header above is never touched, so redraws can't stack copies
|
|
168
|
-
// even if the initial write scrolled the viewport.
|
|
169
140
|
const redraw = () => {
|
|
170
141
|
const cols = stdout.columns || 80;
|
|
171
142
|
if (prevWrapRows > 0)
|
|
172
143
|
stdout.write(`\x1B[${prevWrapRows}A`);
|
|
173
144
|
stdout.write("\r\x1B[J");
|
|
174
|
-
const rendered =
|
|
145
|
+
const rendered = renderSegs(segs);
|
|
175
146
|
stdout.write(tail + rendered);
|
|
176
147
|
const visible = tailVisibleLen + stripAnsi(rendered).length;
|
|
177
148
|
prevWrapRows = visible > 0 ? Math.floor((visible - 1) / cols) : 0;
|
|
178
149
|
};
|
|
179
150
|
stdout.write(question);
|
|
180
|
-
stdout
|
|
151
|
+
setBracketedPaste(stdout, true);
|
|
181
152
|
try {
|
|
182
153
|
stdin.setRawMode(true);
|
|
183
154
|
}
|
|
184
155
|
catch { }
|
|
185
156
|
stdin.resume();
|
|
186
157
|
const cleanup = () => {
|
|
187
|
-
stdout
|
|
158
|
+
setBracketedPaste(stdout, false);
|
|
188
159
|
try {
|
|
189
160
|
stdin.setRawMode(false);
|
|
190
161
|
}
|
|
@@ -192,48 +163,44 @@ export function ask(question) {
|
|
|
192
163
|
stdin.removeListener("data", onData);
|
|
193
164
|
stdin.pause();
|
|
194
165
|
};
|
|
166
|
+
const submit = () => { stdout.write("\n"); cleanup(); resolve(segsToString(segs).trim()); };
|
|
195
167
|
const onData = (buf) => {
|
|
196
|
-
const
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
168
|
+
for (const ev of parseChunk(buf.toString())) {
|
|
169
|
+
switch (ev.type) {
|
|
170
|
+
case "char":
|
|
171
|
+
appendTypedChar(segs, ev.text);
|
|
172
|
+
break;
|
|
173
|
+
case "paste":
|
|
174
|
+
appendPaste(segs, ev.text);
|
|
175
|
+
break;
|
|
176
|
+
case "backspace":
|
|
177
|
+
backspaceSegs(segs);
|
|
178
|
+
break;
|
|
179
|
+
case "word-delete": {
|
|
180
|
+
const s = segsToString(segs);
|
|
181
|
+
const next = deleteWordBackward(s);
|
|
182
|
+
segs.length = 0;
|
|
183
|
+
if (next)
|
|
184
|
+
segs.push({ type: "text", content: next });
|
|
185
|
+
break;
|
|
210
186
|
}
|
|
211
|
-
|
|
187
|
+
case "clear-line":
|
|
188
|
+
segs.length = 0;
|
|
189
|
+
break;
|
|
190
|
+
case "submit":
|
|
191
|
+
submit();
|
|
192
|
+
return;
|
|
193
|
+
case "cancel":
|
|
194
|
+
submit();
|
|
195
|
+
return; // lone ESC = submit, preserves old behavior
|
|
196
|
+
case "interrupt":
|
|
212
197
|
cleanup();
|
|
213
198
|
stdout.write("\n");
|
|
214
199
|
process.exit(130);
|
|
215
|
-
|
|
216
|
-
if (ch === "\x7F" || ch === "\b") {
|
|
217
|
-
backspaceSegments(segs);
|
|
218
|
-
redraw();
|
|
219
|
-
continue;
|
|
220
|
-
}
|
|
221
|
-
// ESC submits the current input (same as Enter)
|
|
222
|
-
if (ch === "\x1B") {
|
|
223
|
-
stdout.write("\n");
|
|
224
|
-
cleanup();
|
|
225
|
-
resolve(segmentsToString(segs).trim());
|
|
226
|
-
return;
|
|
227
|
-
}
|
|
228
|
-
const code = ch.charCodeAt(0);
|
|
229
|
-
if (code < 0x20)
|
|
230
|
-
continue; // control chars
|
|
231
|
-
if (code >= 0x7F && code < 0xA0)
|
|
232
|
-
continue; // DEL + C1 controls
|
|
233
|
-
appendCharToSegments(segs, ch);
|
|
200
|
+
// tab + nav: ignore during single-line prompts
|
|
234
201
|
}
|
|
235
|
-
redraw();
|
|
236
202
|
}
|
|
203
|
+
redraw();
|
|
237
204
|
};
|
|
238
205
|
stdin.on("data", onData);
|
|
239
206
|
});
|
package/dist/cli/help.js
CHANGED
|
@@ -16,6 +16,7 @@ export function printHelp() {
|
|
|
16
16
|
${chalk.cyan("Usage")}
|
|
17
17
|
claude-overnight ${chalk.dim("interactive mode")}
|
|
18
18
|
claude-overnight tasks.json ${chalk.dim("task file mode")}
|
|
19
|
+
claude-overnight plan.md ${chalk.dim("plan file mode (.md) — coach + flex")}
|
|
19
20
|
claude-overnight "fix auth" "add tests" ${chalk.dim("inline tasks")}
|
|
20
21
|
|
|
21
22
|
${chalk.cyan("Flags")}
|
|
@@ -30,7 +31,8 @@ export function printHelp() {
|
|
|
30
31
|
--allow-extra-usage Allow extra/overage usage ${chalk.dim("(default: stop when plan limits hit)")}
|
|
31
32
|
--extra-usage-budget=N Max $ for extra usage ${chalk.dim("(implies --allow-extra-usage)")}
|
|
32
33
|
--timeout=SECONDS Agent inactivity timeout ${chalk.dim("(default: 900s, nudges at timeout, kills at 2×)")}
|
|
33
|
-
--
|
|
34
|
+
--flex Force adaptive multi-wave planning ${chalk.dim("(steering between waves)")}
|
|
35
|
+
--no-flex Fixed plan mode ${chalk.dim("(verifier between waves, no re-planning)")}
|
|
34
36
|
--worktrees Force worktree isolation on ${chalk.dim("(default: auto-detect git repo)")}
|
|
35
37
|
--no-worktrees Disable worktree isolation ${chalk.dim("(all agents work in real cwd)")}
|
|
36
38
|
--merge=MODE Merge strategy: yolo or branch ${chalk.dim("(default: yolo)")}
|
package/dist/core/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.25.
|
|
1
|
+
export declare const VERSION = "1.25.48";
|
package/dist/core/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.25.
|
|
2
|
+
export const VERSION = "1.25.48";
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { setPlannerEnvResolver } from "./planner/query.js";
|
|
|
8
8
|
import { setTranscriptRunDir } from "./core/transcripts.js";
|
|
9
9
|
import { pickModel, loadProviders, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, warnMacCursorAgentShellPatchIfNeeded, } from "./providers/index.js";
|
|
10
10
|
import { executeRun } from "./run/run.js";
|
|
11
|
-
import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
|
|
11
|
+
import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, loadPlanFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
|
|
12
12
|
import { loadRunState, findOrphanedDesigns, backfillOrphanedPlans, readPreviousRunKnowledge, createRunDir, updateLatestSymlink, } from "./state/state.js";
|
|
13
13
|
import { runSetupCoach, loadUserSettings, saveUserSettings, COACH_MODEL } from "./planner/coach/coach.js";
|
|
14
14
|
import { editRunSettings, formatSettingsSummary } from "./cli/settings.js";
|
|
@@ -63,11 +63,21 @@ async function main() {
|
|
|
63
63
|
// ── Load tasks ──
|
|
64
64
|
let tasks = [];
|
|
65
65
|
let fileCfg;
|
|
66
|
+
let planFileContent;
|
|
66
67
|
const jsonFiles = args.filter(a => a.endsWith(".json"));
|
|
68
|
+
const mdFiles = args.filter(a => a.endsWith(".md"));
|
|
67
69
|
if (jsonFiles.length > 1) {
|
|
68
70
|
console.error(chalk.red(` Multiple task files provided. Only one .json file is supported.`));
|
|
69
71
|
process.exit(1);
|
|
70
72
|
}
|
|
73
|
+
if (mdFiles.length > 1) {
|
|
74
|
+
console.error(chalk.red(` Multiple plan files provided. Only one .md file is supported.`));
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
if (jsonFiles.length && mdFiles.length) {
|
|
78
|
+
console.error(chalk.red(` Cannot mix a .json task file with a .md plan file.`));
|
|
79
|
+
process.exit(1);
|
|
80
|
+
}
|
|
71
81
|
for (const arg of args) {
|
|
72
82
|
if (arg.endsWith(".json")) {
|
|
73
83
|
if (tasks.length > 0) {
|
|
@@ -77,8 +87,13 @@ async function main() {
|
|
|
77
87
|
fileCfg = loadTaskFile(arg);
|
|
78
88
|
tasks = fileCfg.tasks;
|
|
79
89
|
}
|
|
90
|
+
else if (arg.endsWith(".md")) {
|
|
91
|
+
const plan = loadPlanFile(arg);
|
|
92
|
+
planFileContent = plan.planContent;
|
|
93
|
+
fileCfg = { tasks: [], objective: plan.objective, flexiblePlan: true };
|
|
94
|
+
}
|
|
80
95
|
else if (!arg.startsWith("-") && existsSync(resolve(arg))) {
|
|
81
|
-
console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json. Rename it or quote the string.`));
|
|
96
|
+
console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json or .md. Rename it or quote the string.`));
|
|
82
97
|
process.exit(1);
|
|
83
98
|
}
|
|
84
99
|
else {
|
|
@@ -341,6 +356,30 @@ async function main() {
|
|
|
341
356
|
console.log(chalk.dim(` ╰${"─".repeat(innerLen + 4)}╯`));
|
|
342
357
|
}
|
|
343
358
|
else {
|
|
359
|
+
// ── Setup coach in confirm-only mode (task/plan file on a TTY) ──
|
|
360
|
+
let coachResult = null;
|
|
361
|
+
if (fileCfg?.objective && process.stdin.isTTY
|
|
362
|
+
&& !argv.includes("--no-coach") && !loadUserSettings().skipCoach) {
|
|
363
|
+
const settings = loadUserSettings();
|
|
364
|
+
const cModel = settings.coachModel ?? COACH_MODEL;
|
|
365
|
+
const cProvider = settings.coachProviderId
|
|
366
|
+
? loadProviders().find(p => p.id === settings.coachProviderId) : undefined;
|
|
367
|
+
coachResult = await runSetupCoach(fileCfg.objective, cwd, {
|
|
368
|
+
providers: loadProviders(), cliFlags, coachModel: cModel, coachProvider: cProvider,
|
|
369
|
+
planContent: planFileContent, confirmOnly: true,
|
|
370
|
+
});
|
|
371
|
+
if (coachResult) {
|
|
372
|
+
coachedOriginal = fileCfg.objective;
|
|
373
|
+
coachedAt = Date.now();
|
|
374
|
+
fileCfg.objective = coachResult.improvedObjective;
|
|
375
|
+
objective = coachResult.improvedObjective;
|
|
376
|
+
const rec = coachResult.recommended;
|
|
377
|
+
if (fileCfg.concurrency == null)
|
|
378
|
+
fileCfg.concurrency = rec.concurrency;
|
|
379
|
+
if (fileCfg.usageCap == null && rec.usageCap != null)
|
|
380
|
+
fileCfg.usageCap = Math.round(rec.usageCap * 100);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
344
383
|
let models = [];
|
|
345
384
|
if (!cliFlags.model && !fileCfg?.model)
|
|
346
385
|
models = await fetchModels(5_000);
|
|
@@ -374,7 +413,7 @@ async function main() {
|
|
|
374
413
|
}
|
|
375
414
|
}
|
|
376
415
|
concurrency = cliFlags.concurrency ? parseInt(cliFlags.concurrency) : (fileCfg?.concurrency ?? 5);
|
|
377
|
-
budget = cliFlags.budget ? parseInt(cliFlags.budget) :
|
|
416
|
+
budget = cliFlags.budget ? parseInt(cliFlags.budget) : coachResult?.recommended.budget;
|
|
378
417
|
if (budget != null && (isNaN(budget) || budget < 1)) {
|
|
379
418
|
console.error(chalk.red(` --budget must be a positive integer`));
|
|
380
419
|
process.exit(1);
|
|
@@ -442,7 +481,8 @@ async function main() {
|
|
|
442
481
|
console.log(chalk.dim(` ${workerModel} concurrency=${concurrency} worktrees=${useWorktrees} merge=${mergeStrategy}${capStr}${extraStr}`));
|
|
443
482
|
}
|
|
444
483
|
// ── Plan phase ──
|
|
445
|
-
const
|
|
484
|
+
const flexFlag = argv.includes("--flex") ? true : argv.includes("--no-flex") ? false : undefined;
|
|
485
|
+
const flex = objective != null && (flexFlag ?? ((fileCfg?.flexiblePlan ?? true) && (budget ?? 10) > 2));
|
|
446
486
|
const agentTimeoutMs = cliFlags.timeout ? parseFloat(cliFlags.timeout) * 1000 : undefined;
|
|
447
487
|
let thinkingUsed = 0, thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
|
|
448
488
|
let thinkingHistory;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { Task, SteerResult, WaveSummary } from "../core/types.js";
|
|
2
|
+
import { type PlannerLog } from "./query.js";
|
|
3
|
+
export declare const VERIFY_SCHEMA: {
|
|
4
|
+
type: "json_schema";
|
|
5
|
+
schema: {
|
|
6
|
+
type: string;
|
|
7
|
+
properties: {
|
|
8
|
+
done: {
|
|
9
|
+
type: string;
|
|
10
|
+
};
|
|
11
|
+
reasoning: {
|
|
12
|
+
type: string;
|
|
13
|
+
};
|
|
14
|
+
statusUpdate: {
|
|
15
|
+
type: string;
|
|
16
|
+
};
|
|
17
|
+
estimatedSessionsRemaining: {
|
|
18
|
+
type: string;
|
|
19
|
+
};
|
|
20
|
+
verifiedCount: {
|
|
21
|
+
type: string;
|
|
22
|
+
};
|
|
23
|
+
retryCount: {
|
|
24
|
+
type: string;
|
|
25
|
+
};
|
|
26
|
+
tasks: {
|
|
27
|
+
type: string;
|
|
28
|
+
items: {
|
|
29
|
+
type: string;
|
|
30
|
+
properties: {
|
|
31
|
+
prompt: {
|
|
32
|
+
type: string;
|
|
33
|
+
};
|
|
34
|
+
model: {
|
|
35
|
+
type: string;
|
|
36
|
+
};
|
|
37
|
+
noWorktree: {
|
|
38
|
+
type: string;
|
|
39
|
+
};
|
|
40
|
+
type: {
|
|
41
|
+
type: string;
|
|
42
|
+
enum: string[];
|
|
43
|
+
};
|
|
44
|
+
postcondition: {
|
|
45
|
+
type: string;
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
required: string[];
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
};
|
|
52
|
+
required: string[];
|
|
53
|
+
};
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Verify the previous wave and compose the next fixed batch of pending tasks.
|
|
57
|
+
*
|
|
58
|
+
* Unlike `steerWave`, the verifier does not invent new tasks — it:
|
|
59
|
+
* 1. Runs the project's build/smoke checks.
|
|
60
|
+
* 2. Fixes shallow regressions in the last wave (edits directly).
|
|
61
|
+
* 3. Picks the next N pending tasks from the user's fixed plan.
|
|
62
|
+
*
|
|
63
|
+
* The model has full tool access so it can actually repair broken commits,
|
|
64
|
+
* not just report on them.
|
|
65
|
+
*/
|
|
66
|
+
export declare function verifyWave(objective: string, pendingTasks: Task[], lastWave: WaveSummary | undefined, remainingBudget: number, cwd: string, plannerModel: string, concurrency: number, onLog: PlannerLog, transcriptName?: string): Promise<SteerResult>;
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { runPlannerQuery, attemptJsonParse, postProcess } from "./query.js";
|
|
2
|
+
import { createTurn, beginTurn, endTurn } from "../core/turns.js";
|
|
3
|
+
// Verifier schema — same shape as STEER_SCHEMA plus a `verifiedIds` list so
|
|
4
|
+
// the wave-loop can tell which of the prior wave's tasks actually shipped.
|
|
5
|
+
export const VERIFY_SCHEMA = {
|
|
6
|
+
type: "json_schema",
|
|
7
|
+
schema: {
|
|
8
|
+
type: "object",
|
|
9
|
+
properties: {
|
|
10
|
+
done: { type: "boolean" },
|
|
11
|
+
reasoning: { type: "string" },
|
|
12
|
+
statusUpdate: { type: "string" },
|
|
13
|
+
estimatedSessionsRemaining: { type: "number" },
|
|
14
|
+
verifiedCount: { type: "number" },
|
|
15
|
+
retryCount: { type: "number" },
|
|
16
|
+
tasks: {
|
|
17
|
+
type: "array",
|
|
18
|
+
items: {
|
|
19
|
+
type: "object",
|
|
20
|
+
properties: {
|
|
21
|
+
prompt: { type: "string" },
|
|
22
|
+
model: { type: "string" },
|
|
23
|
+
noWorktree: { type: "boolean" },
|
|
24
|
+
type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] },
|
|
25
|
+
postcondition: { type: "string" },
|
|
26
|
+
},
|
|
27
|
+
required: ["prompt"],
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
},
|
|
31
|
+
required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
function renderLastWave(w) {
|
|
35
|
+
if (!w)
|
|
36
|
+
return "(first wave — nothing to verify yet)";
|
|
37
|
+
const lines = w.tasks.map(t => {
|
|
38
|
+
const files = t.filesChanged ? ` (${t.filesChanged} files)` : " (0 files)";
|
|
39
|
+
const err = t.error ? ` — ${t.error}` : "";
|
|
40
|
+
return ` - [${t.status}] ${t.prompt.slice(0, 160)}${files}${err}`;
|
|
41
|
+
}).join("\n");
|
|
42
|
+
return `Wave ${w.wave + 1}:\n${lines}`;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Verify the previous wave and compose the next fixed batch of pending tasks.
|
|
46
|
+
*
|
|
47
|
+
* Unlike `steerWave`, the verifier does not invent new tasks — it:
|
|
48
|
+
* 1. Runs the project's build/smoke checks.
|
|
49
|
+
* 2. Fixes shallow regressions in the last wave (edits directly).
|
|
50
|
+
* 3. Picks the next N pending tasks from the user's fixed plan.
|
|
51
|
+
*
|
|
52
|
+
* The model has full tool access so it can actually repair broken commits,
|
|
53
|
+
* not just report on them.
|
|
54
|
+
*/
|
|
55
|
+
export async function verifyWave(objective, pendingTasks, lastWave, remainingBudget, cwd, plannerModel, concurrency, onLog, transcriptName = "verify") {
|
|
56
|
+
const pendingList = pendingTasks.length > 0
|
|
57
|
+
? pendingTasks.map((t, i) => ` ${i + 1}. ${t.prompt.slice(0, 200)}`).join("\n")
|
|
58
|
+
: "(none — every task from the original plan has been attempted)";
|
|
59
|
+
const prompt = `You are the verifier + fix gate between waves of a fixed-plan execution.
|
|
60
|
+
|
|
61
|
+
Objective: ${objective}
|
|
62
|
+
|
|
63
|
+
## What just happened
|
|
64
|
+
${renderLastWave(lastWave)}
|
|
65
|
+
|
|
66
|
+
## Remaining plan (pending tasks, in order)
|
|
67
|
+
${pendingList}
|
|
68
|
+
|
|
69
|
+
## Your job
|
|
70
|
+
|
|
71
|
+
1. Run the project's build and smoke checks. Use the tools you have (Bash, Read, Grep, Edit, Write).
|
|
72
|
+
2. For any regression the last wave introduced, make the fix directly. Don't delegate a fix to the next wave if you can do it in two edits.
|
|
73
|
+
3. Compose the next batch of pending tasks to dispatch — pick tasks with non-overlapping file scopes so ${concurrency} can run in parallel.
|
|
74
|
+
4. If the plan is complete AND the build passes AND one verify task has confirmed the app runs, set done=true.
|
|
75
|
+
|
|
76
|
+
## Output
|
|
77
|
+
|
|
78
|
+
Respond with ONLY a JSON object (no markdown fences):
|
|
79
|
+
{"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"verifiedCount":N,"retryCount":N,"tasks":[{"prompt":"...","type":"execute","postcondition":"..."}]}
|
|
80
|
+
|
|
81
|
+
Remaining budget: ${remainingBudget} agent sessions. Include retries inside tasks[] (same format) if a pending step needs a second attempt with corrected context.`;
|
|
82
|
+
onLog("Verifying last wave…", "status");
|
|
83
|
+
const turn = createTurn("steer", `Verify wave`, `verify-${lastWave?.wave ?? 0}`, plannerModel);
|
|
84
|
+
beginTurn(turn);
|
|
85
|
+
const resultText = await runPlannerQuery(prompt, {
|
|
86
|
+
cwd, model: plannerModel, outputFormat: VERIFY_SCHEMA,
|
|
87
|
+
transcriptName, turnId: turn.id, maxTurns: 80,
|
|
88
|
+
}, onLog);
|
|
89
|
+
const parsed = attemptJsonParse(resultText);
|
|
90
|
+
if (!parsed) {
|
|
91
|
+
endTurn(turn, "error");
|
|
92
|
+
throw new Error(`Could not parse verifier response (${resultText.length} chars): ${resultText.slice(0, 120)}`);
|
|
93
|
+
}
|
|
94
|
+
const isDone = parsed.done === true;
|
|
95
|
+
const statusUpdate = parsed.statusUpdate || undefined;
|
|
96
|
+
const estRaw = parsed.estimatedSessionsRemaining;
|
|
97
|
+
const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
|
|
98
|
+
let tasks = (parsed.tasks || []).map((t, i) => ({
|
|
99
|
+
id: String(i),
|
|
100
|
+
prompt: typeof t === "string" ? t : t.prompt,
|
|
101
|
+
...(t.noWorktree && { noWorktree: true }),
|
|
102
|
+
...(t.type && { type: t.type }),
|
|
103
|
+
...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
|
|
104
|
+
}));
|
|
105
|
+
tasks = postProcess(tasks, remainingBudget, onLog);
|
|
106
|
+
endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");
|
|
107
|
+
if (isDone) {
|
|
108
|
+
return {
|
|
109
|
+
done: true, tasks: [], reasoning: parsed.reasoning || "Plan complete and verified",
|
|
110
|
+
statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
return {
|
|
114
|
+
done: tasks.length === 0, tasks,
|
|
115
|
+
reasoning: parsed.reasoning || "", statusUpdate, estimatedSessionsRemaining,
|
|
116
|
+
};
|
|
117
|
+
}
|
package/dist/run/run.js
CHANGED
|
@@ -3,6 +3,7 @@ import { join } from "path";
|
|
|
3
3
|
import { execSync } from "child_process";
|
|
4
4
|
import chalk from "chalk";
|
|
5
5
|
import { steerWave, STEER_SCHEMA } from "../planner/steering.js";
|
|
6
|
+
import { verifyWave } from "../planner/verifier.js";
|
|
6
7
|
import { getTotalPlannerCost, getPlannerRateLimitInfo, runPlannerQuery, setPlannerEnvResolver, attemptJsonParse } from "../planner/query.js";
|
|
7
8
|
import { buildEnvResolver, isCursorProxyProvider } from "../providers/index.js";
|
|
8
9
|
import { RunDisplay } from "../ui/ui.js";
|
|
@@ -397,6 +398,33 @@ export async function executeRun(cfg) {
|
|
|
397
398
|
}
|
|
398
399
|
return steered;
|
|
399
400
|
};
|
|
401
|
+
// In non-flex mode with an objective, the verifier runs between waves instead of the steerer.
|
|
402
|
+
const runVerifier = async () => {
|
|
403
|
+
if (!objective)
|
|
404
|
+
return false;
|
|
405
|
+
const plannerCostBefore = getTotalPlannerCost();
|
|
406
|
+
try {
|
|
407
|
+
const result = await verifyWave(objective, currentTasks, waveHistory[waveHistory.length - 1], remaining, cwd, plannerModel, concurrency, steeringLog, `verify-wave-${waveNum}`);
|
|
408
|
+
accCost += getTotalPlannerCost() - plannerCostBefore;
|
|
409
|
+
syncRunInfo();
|
|
410
|
+
if (result.statusUpdate)
|
|
411
|
+
writeStatus(runDir, result.statusUpdate);
|
|
412
|
+
if (typeof result.estimatedSessionsRemaining === "number")
|
|
413
|
+
lastEstimate = result.estimatedSessionsRemaining;
|
|
414
|
+
if (result.done || result.tasks.length === 0) {
|
|
415
|
+
objectiveComplete = result.done;
|
|
416
|
+
remaining = 0;
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
currentTasks = result.tasks;
|
|
420
|
+
return true;
|
|
421
|
+
}
|
|
422
|
+
catch (err) {
|
|
423
|
+
accCost += getTotalPlannerCost() - plannerCostBefore;
|
|
424
|
+
display.appendSteeringEvent(`Verifier failed: ${err?.message?.slice(0, 200) || "(no details)"}`);
|
|
425
|
+
return false;
|
|
426
|
+
}
|
|
427
|
+
};
|
|
400
428
|
// Resume: steer immediately if no queued tasks
|
|
401
429
|
if (cfg.resuming && flex && currentTasks.length === 0 && remaining > 0) {
|
|
402
430
|
display.setSteering(rlGetter, buildSteeringContext());
|
|
@@ -465,6 +493,7 @@ export async function executeRun(cfg) {
|
|
|
465
493
|
lastEstimate,
|
|
466
494
|
display,
|
|
467
495
|
runSteering,
|
|
496
|
+
runVerifier,
|
|
468
497
|
buildSteeringContext,
|
|
469
498
|
rlGetter,
|
|
470
499
|
isStopping: () => stopping,
|
package/dist/run/wave-loop.d.ts
CHANGED
|
@@ -46,6 +46,8 @@ export interface WaveLoopCtx {
|
|
|
46
46
|
lastEstimate: number | undefined;
|
|
47
47
|
display: RunDisplay;
|
|
48
48
|
runSteering: () => Promise<boolean>;
|
|
49
|
+
/** Verifier invoked between waves in no-flex mode. Mirrors runSteering's contract. */
|
|
50
|
+
runVerifier?: () => Promise<boolean>;
|
|
49
51
|
buildSteeringContext: () => SteeringContext;
|
|
50
52
|
rlGetter: RLGetter;
|
|
51
53
|
isStopping: () => boolean;
|