claude-overnight 1.25.46 → 1.25.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/cli.d.ts CHANGED
@@ -9,32 +9,11 @@ import { isJWTAuthError } from "../core/auth.js";
9
9
  export declare const isAuthError: typeof isJWTAuthError;
10
10
  export { isJWTAuthError };
11
11
  export declare function fetchModels(timeoutMs?: number): Promise<ModelInfo[]>;
12
- export declare const PASTE_START = "\u001B[200~";
13
- export declare const PASTE_END = "\u001B[201~";
14
12
  export declare const PASTE_PLACEHOLDER_MAX = 80;
15
- export type InputSegment = {
16
- type: "text";
17
- content: string;
18
- } | {
19
- type: "paste";
20
- content: string;
21
- };
22
- /** Split a raw stdin chunk into typed and pasted segments. */
23
- export declare function splitPaste(chunk: string): Array<{
24
- type: "typed" | "paste";
25
- text: string;
26
- }>;
27
- export declare function segmentsToString(segs: InputSegment[]): string;
28
- export declare function renderSegments(segs: InputSegment[]): string;
29
- export declare function appendCharToSegments(segs: InputSegment[], ch: string): void;
30
- /** Appends a pasted block. Short single-line pastes inline as text; the rest become placeholders. */
31
- export declare function appendPasteToSegments(segs: InputSegment[], text: string): void;
32
- /** Backspace removes one char, or an entire paste block atomically. */
33
- export declare function backspaceSegments(segs: InputSegment[]): void;
34
13
  /**
35
- * Read a line from the user with bracketed-paste awareness.
36
- * Pasted multi-line text stays in the buffer as a single block -- only a typed
37
- * Enter submits. Falls back to cooked readline when stdin isn't a TTY.
14
+ * Read a line from the user with bracketed-paste awareness. Pasted multi-line
15
+ * text stays in the buffer as a single block -- only a typed Enter submits.
16
+ * Falls back to cooked readline when stdin isn't a TTY.
38
17
  */
39
18
  export declare function ask(question: string): Promise<string>;
40
19
  export declare function select<T>(label: string, items: {
package/dist/cli/cli.js CHANGED
@@ -4,6 +4,7 @@ import { resolve } from "path";
4
4
  import { createInterface } from "readline";
5
5
  import chalk from "chalk";
6
6
  import { query } from "@anthropic-ai/claude-agent-sdk";
7
+ import { parseChunk, setBracketedPaste, deleteWordBackward } from "../ui/raw-input.js";
7
8
  // ── CLI flag parsing ──
8
9
  export function parseCliFlags(argv) {
9
10
  const known = new Set(["concurrency", "model", "timeout", "budget", "usage-cap", "extra-usage-budget", "merge"]);
@@ -66,69 +67,34 @@ export async function fetchModels(timeoutMs = 10_000) {
66
67
  return [];
67
68
  }
68
69
  }
69
- // ── Bracketed paste + segment-based input ──
70
+ // ── Interactive primitives ──
70
71
  //
71
- // When the terminal is in bracketed paste mode, pasted content is wrapped with
72
- // \x1B[200~ ... \x1B[201~ so we can distinguish typed Enter from pasted newlines.
73
- // Multi-line or long pastes are stored as opaque segments and shown as a compact
74
- // [Pasted +N lines] placeholder while editing -- the full text is substituted on submit.
75
- export const PASTE_START = "\x1B[200~";
76
- export const PASTE_END = "\x1B[201~";
72
+ // Text entry goes through the shared raw-input parser in `../ui/raw-input.ts`,
73
+ // which enforces the single invariant that used to be duplicated (and buggy)
74
+ // here and in the Ink overlay:
75
+ // - Typed Enter = a stdin chunk that is exactly "\r", "\n", or "\r\n".
76
+ // - Anything else with embedded newlines is a paste, not a submit.
77
+ // Multi-line pastes render as a compact `[Pasted +N lines]` placeholder while
78
+ // editing — the full content is substituted on submit.
77
79
  export const PASTE_PLACEHOLDER_MAX = 80;
78
- /** Split a raw stdin chunk into typed and pasted segments. */
79
- export function splitPaste(chunk) {
80
- const out = [];
81
- let i = 0;
82
- while (i < chunk.length) {
83
- const start = chunk.indexOf(PASTE_START, i);
84
- if (start === -1) {
85
- out.push({ type: "typed", text: chunk.slice(i) });
86
- break;
87
- }
88
- if (start > i)
89
- out.push({ type: "typed", text: chunk.slice(i, start) });
90
- const bodyStart = start + PASTE_START.length;
91
- const end = chunk.indexOf(PASTE_END, bodyStart);
92
- if (end === -1) {
93
- out.push({ type: "paste", text: chunk.slice(bodyStart) });
94
- break;
95
- }
96
- out.push({ type: "paste", text: chunk.slice(bodyStart, end) });
97
- i = end + PASTE_END.length;
98
- }
99
- return out;
100
- }
101
- export function segmentsToString(segs) {
102
- return segs.map((s) => s.content).join("");
103
- }
104
- export function renderSegments(segs) {
105
- return segs.map((s) => {
106
- if (s.type === "text")
107
- return s.content;
108
- const lines = s.content.split("\n").length;
109
- return chalk.dim(`[Pasted +${lines} line${lines === 1 ? "" : "s"}]`);
110
- }).join("");
111
- }
112
- export function appendCharToSegments(segs, ch) {
80
+ function appendTypedChar(segs, ch) {
113
81
  const last = segs[segs.length - 1];
114
82
  if (last && last.type === "text")
115
83
  last.content += ch;
116
84
  else
117
85
  segs.push({ type: "text", content: ch });
118
86
  }
119
- /** Appends a pasted block. Short single-line pastes inline as text; the rest become placeholders. */
120
- export function appendPasteToSegments(segs, text) {
87
+ function appendPaste(segs, text) {
121
88
  if (!text)
122
89
  return;
123
90
  const norm = text.replace(/\r\n?/g, "\n");
124
91
  if (!norm.includes("\n") && norm.length <= PASTE_PLACEHOLDER_MAX) {
125
- appendCharToSegments(segs, norm);
92
+ appendTypedChar(segs, norm);
126
93
  return;
127
94
  }
128
95
  segs.push({ type: "paste", content: norm });
129
96
  }
130
- /** Backspace removes one char, or an entire paste block atomically. */
131
- export function backspaceSegments(segs) {
97
+ function backspaceSegs(segs) {
132
98
  while (segs.length > 0) {
133
99
  const last = segs[segs.length - 1];
134
100
  if (last.type === "paste") {
@@ -143,14 +109,22 @@ export function backspaceSegments(segs) {
143
109
  return;
144
110
  }
145
111
  }
112
+ function segsToString(segs) { return segs.map((s) => s.content).join(""); }
113
+ function renderSegs(segs) {
114
+ return segs.map((s) => {
115
+ if (s.type === "text")
116
+ return s.content;
117
+ const lines = s.content.split("\n").length;
118
+ return chalk.dim(`[Pasted +${lines} line${lines === 1 ? "" : "s"}]`);
119
+ }).join("");
120
+ }
146
121
  function stripAnsi(s) {
147
122
  return s.replace(/\x1B\[[0-9;]*[a-zA-Z]/g, "");
148
123
  }
149
- // ── Interactive primitives ──
150
124
  /**
151
- * Read a line from the user with bracketed-paste awareness.
152
- * Pasted multi-line text stays in the buffer as a single block -- only a typed
153
- * Enter submits. Falls back to cooked readline when stdin isn't a TTY.
125
+ * Read a line from the user with bracketed-paste awareness. Pasted multi-line
126
+ * text stays in the buffer as a single block -- only a typed Enter submits.
127
+ * Falls back to cooked readline when stdin isn't a TTY.
154
128
  */
155
129
  export function ask(question) {
156
130
  const { stdin, stdout } = process;
@@ -163,28 +137,25 @@ export function ask(question) {
163
137
  const tail = question.split("\n").pop() ?? "";
164
138
  const tailVisibleLen = stripAnsi(tail).length;
165
139
  let prevWrapRows = 0;
166
- // Only rewrite the input line (and any wrapped continuation rows). The
167
- // question header above is never touched, so redraws can't stack copies
168
- // even if the initial write scrolled the viewport.
169
140
  const redraw = () => {
170
141
  const cols = stdout.columns || 80;
171
142
  if (prevWrapRows > 0)
172
143
  stdout.write(`\x1B[${prevWrapRows}A`);
173
144
  stdout.write("\r\x1B[J");
174
- const rendered = renderSegments(segs);
145
+ const rendered = renderSegs(segs);
175
146
  stdout.write(tail + rendered);
176
147
  const visible = tailVisibleLen + stripAnsi(rendered).length;
177
148
  prevWrapRows = visible > 0 ? Math.floor((visible - 1) / cols) : 0;
178
149
  };
179
150
  stdout.write(question);
180
- stdout.write("\x1B[?2004h");
151
+ setBracketedPaste(stdout, true);
181
152
  try {
182
153
  stdin.setRawMode(true);
183
154
  }
184
155
  catch { }
185
156
  stdin.resume();
186
157
  const cleanup = () => {
187
- stdout.write("\x1B[?2004l");
158
+ setBracketedPaste(stdout, false);
188
159
  try {
189
160
  stdin.setRawMode(false);
190
161
  }
@@ -192,48 +163,44 @@ export function ask(question) {
192
163
  stdin.removeListener("data", onData);
193
164
  stdin.pause();
194
165
  };
166
+ const submit = () => { stdout.write("\n"); cleanup(); resolve(segsToString(segs).trim()); };
195
167
  const onData = (buf) => {
196
- const chunk = buf.toString();
197
- for (const seg of splitPaste(chunk)) {
198
- if (seg.type === "paste") {
199
- appendPasteToSegments(segs, seg.text);
200
- redraw();
201
- continue;
202
- }
203
- for (let ci = 0; ci < seg.text.length; ci++) {
204
- const ch = seg.text[ci];
205
- if (ch === "\r" || ch === "\n") {
206
- stdout.write("\n");
207
- cleanup();
208
- resolve(segmentsToString(segs).trim());
209
- return;
168
+ for (const ev of parseChunk(buf.toString())) {
169
+ switch (ev.type) {
170
+ case "char":
171
+ appendTypedChar(segs, ev.text);
172
+ break;
173
+ case "paste":
174
+ appendPaste(segs, ev.text);
175
+ break;
176
+ case "backspace":
177
+ backspaceSegs(segs);
178
+ break;
179
+ case "word-delete": {
180
+ const s = segsToString(segs);
181
+ const next = deleteWordBackward(s);
182
+ segs.length = 0;
183
+ if (next)
184
+ segs.push({ type: "text", content: next });
185
+ break;
210
186
  }
211
- if (ch === "\x03") {
187
+ case "clear-line":
188
+ segs.length = 0;
189
+ break;
190
+ case "submit":
191
+ submit();
192
+ return;
193
+ case "cancel":
194
+ submit();
195
+ return; // lone ESC = submit, preserves old behavior
196
+ case "interrupt":
212
197
  cleanup();
213
198
  stdout.write("\n");
214
199
  process.exit(130);
215
- }
216
- if (ch === "\x7F" || ch === "\b") {
217
- backspaceSegments(segs);
218
- redraw();
219
- continue;
220
- }
221
- // ESC submits the current input (same as Enter)
222
- if (ch === "\x1B") {
223
- stdout.write("\n");
224
- cleanup();
225
- resolve(segmentsToString(segs).trim());
226
- return;
227
- }
228
- const code = ch.charCodeAt(0);
229
- if (code < 0x20)
230
- continue; // control chars
231
- if (code >= 0x7F && code < 0xA0)
232
- continue; // DEL + C1 controls
233
- appendCharToSegments(segs, ch);
200
+ // tab + nav: ignore during single-line prompts
234
201
  }
235
- redraw();
236
202
  }
203
+ redraw();
237
204
  };
238
205
  stdin.on("data", onData);
239
206
  });
package/dist/cli/help.js CHANGED
@@ -16,6 +16,7 @@ export function printHelp() {
16
16
  ${chalk.cyan("Usage")}
17
17
  claude-overnight ${chalk.dim("interactive mode")}
18
18
  claude-overnight tasks.json ${chalk.dim("task file mode")}
19
+ claude-overnight plan.md ${chalk.dim("plan file mode (.md) — coach + flex")}
19
20
  claude-overnight "fix auth" "add tests" ${chalk.dim("inline tasks")}
20
21
 
21
22
  ${chalk.cyan("Flags")}
@@ -30,7 +31,8 @@ export function printHelp() {
30
31
  --allow-extra-usage Allow extra/overage usage ${chalk.dim("(default: stop when plan limits hit)")}
31
32
  --extra-usage-budget=N Max $ for extra usage ${chalk.dim("(implies --allow-extra-usage)")}
32
33
  --timeout=SECONDS Agent inactivity timeout ${chalk.dim("(default: 900s, nudges at timeout, kills at 2×)")}
33
- --no-flex Disable adaptive multi-wave planning ${chalk.dim("(run all tasks in one shot)")}
34
+ --flex Force adaptive multi-wave planning ${chalk.dim("(steering between waves)")}
35
+ --no-flex Fixed plan mode ${chalk.dim("(verifier between waves, no re-planning)")}
34
36
  --worktrees Force worktree isolation on ${chalk.dim("(default: auto-detect git repo)")}
35
37
  --no-worktrees Disable worktree isolation ${chalk.dim("(all agents work in real cwd)")}
36
38
  --merge=MODE Merge strategy: yolo or branch ${chalk.dim("(default: yolo)")}
@@ -1 +1 @@
1
- export declare const VERSION = "1.25.46";
1
+ export declare const VERSION = "1.25.48";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.25.46";
2
+ export const VERSION = "1.25.48";
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import { setPlannerEnvResolver } from "./planner/query.js";
8
8
  import { setTranscriptRunDir } from "./core/transcripts.js";
9
9
  import { pickModel, loadProviders, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, warnMacCursorAgentShellPatchIfNeeded, } from "./providers/index.js";
10
10
  import { executeRun } from "./run/run.js";
11
- import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
11
+ import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, loadPlanFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
12
12
  import { loadRunState, findOrphanedDesigns, backfillOrphanedPlans, readPreviousRunKnowledge, createRunDir, updateLatestSymlink, } from "./state/state.js";
13
13
  import { runSetupCoach, loadUserSettings, saveUserSettings, COACH_MODEL } from "./planner/coach/coach.js";
14
14
  import { editRunSettings, formatSettingsSummary } from "./cli/settings.js";
@@ -63,11 +63,21 @@ async function main() {
63
63
  // ── Load tasks ──
64
64
  let tasks = [];
65
65
  let fileCfg;
66
+ let planFileContent;
66
67
  const jsonFiles = args.filter(a => a.endsWith(".json"));
68
+ const mdFiles = args.filter(a => a.endsWith(".md"));
67
69
  if (jsonFiles.length > 1) {
68
70
  console.error(chalk.red(` Multiple task files provided. Only one .json file is supported.`));
69
71
  process.exit(1);
70
72
  }
73
+ if (mdFiles.length > 1) {
74
+ console.error(chalk.red(` Multiple plan files provided. Only one .md file is supported.`));
75
+ process.exit(1);
76
+ }
77
+ if (jsonFiles.length && mdFiles.length) {
78
+ console.error(chalk.red(` Cannot mix a .json task file with a .md plan file.`));
79
+ process.exit(1);
80
+ }
71
81
  for (const arg of args) {
72
82
  if (arg.endsWith(".json")) {
73
83
  if (tasks.length > 0) {
@@ -77,8 +87,13 @@ async function main() {
77
87
  fileCfg = loadTaskFile(arg);
78
88
  tasks = fileCfg.tasks;
79
89
  }
90
+ else if (arg.endsWith(".md")) {
91
+ const plan = loadPlanFile(arg);
92
+ planFileContent = plan.planContent;
93
+ fileCfg = { tasks: [], objective: plan.objective, flexiblePlan: true };
94
+ }
80
95
  else if (!arg.startsWith("-") && existsSync(resolve(arg))) {
81
- console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json. Rename it or quote the string.`));
96
+ console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json or .md. Rename it or quote the string.`));
82
97
  process.exit(1);
83
98
  }
84
99
  else {
@@ -341,6 +356,30 @@ async function main() {
341
356
  console.log(chalk.dim(` ╰${"─".repeat(innerLen + 4)}╯`));
342
357
  }
343
358
  else {
359
+ // ── Setup coach in confirm-only mode (task/plan file on a TTY) ──
360
+ let coachResult = null;
361
+ if (fileCfg?.objective && process.stdin.isTTY
362
+ && !argv.includes("--no-coach") && !loadUserSettings().skipCoach) {
363
+ const settings = loadUserSettings();
364
+ const cModel = settings.coachModel ?? COACH_MODEL;
365
+ const cProvider = settings.coachProviderId
366
+ ? loadProviders().find(p => p.id === settings.coachProviderId) : undefined;
367
+ coachResult = await runSetupCoach(fileCfg.objective, cwd, {
368
+ providers: loadProviders(), cliFlags, coachModel: cModel, coachProvider: cProvider,
369
+ planContent: planFileContent, confirmOnly: true,
370
+ });
371
+ if (coachResult) {
372
+ coachedOriginal = fileCfg.objective;
373
+ coachedAt = Date.now();
374
+ fileCfg.objective = coachResult.improvedObjective;
375
+ objective = coachResult.improvedObjective;
376
+ const rec = coachResult.recommended;
377
+ if (fileCfg.concurrency == null)
378
+ fileCfg.concurrency = rec.concurrency;
379
+ if (fileCfg.usageCap == null && rec.usageCap != null)
380
+ fileCfg.usageCap = Math.round(rec.usageCap * 100);
381
+ }
382
+ }
344
383
  let models = [];
345
384
  if (!cliFlags.model && !fileCfg?.model)
346
385
  models = await fetchModels(5_000);
@@ -374,7 +413,7 @@ async function main() {
374
413
  }
375
414
  }
376
415
  concurrency = cliFlags.concurrency ? parseInt(cliFlags.concurrency) : (fileCfg?.concurrency ?? 5);
377
- budget = cliFlags.budget ? parseInt(cliFlags.budget) : undefined;
416
+ budget = cliFlags.budget ? parseInt(cliFlags.budget) : coachResult?.recommended.budget;
378
417
  if (budget != null && (isNaN(budget) || budget < 1)) {
379
418
  console.error(chalk.red(` --budget must be a positive integer`));
380
419
  process.exit(1);
@@ -442,7 +481,8 @@ async function main() {
442
481
  console.log(chalk.dim(` ${workerModel} concurrency=${concurrency} worktrees=${useWorktrees} merge=${mergeStrategy}${capStr}${extraStr}`));
443
482
  }
444
483
  // ── Plan phase ──
445
- const flex = !argv.includes("--no-flex") && (fileCfg?.flexiblePlan ?? objective != null) && objective != null && (budget ?? 10) > 2;
484
+ const flexFlag = argv.includes("--flex") ? true : argv.includes("--no-flex") ? false : undefined;
485
+ const flex = objective != null && (flexFlag ?? ((fileCfg?.flexiblePlan ?? true) && (budget ?? 10) > 2));
446
486
  const agentTimeoutMs = cliFlags.timeout ? parseFloat(cliFlags.timeout) * 1000 : undefined;
447
487
  let thinkingUsed = 0, thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
448
488
  let thinkingHistory;
@@ -0,0 +1,66 @@
1
+ import type { Task, SteerResult, WaveSummary } from "../core/types.js";
2
+ import { type PlannerLog } from "./query.js";
3
+ export declare const VERIFY_SCHEMA: {
4
+ type: "json_schema";
5
+ schema: {
6
+ type: string;
7
+ properties: {
8
+ done: {
9
+ type: string;
10
+ };
11
+ reasoning: {
12
+ type: string;
13
+ };
14
+ statusUpdate: {
15
+ type: string;
16
+ };
17
+ estimatedSessionsRemaining: {
18
+ type: string;
19
+ };
20
+ verifiedCount: {
21
+ type: string;
22
+ };
23
+ retryCount: {
24
+ type: string;
25
+ };
26
+ tasks: {
27
+ type: string;
28
+ items: {
29
+ type: string;
30
+ properties: {
31
+ prompt: {
32
+ type: string;
33
+ };
34
+ model: {
35
+ type: string;
36
+ };
37
+ noWorktree: {
38
+ type: string;
39
+ };
40
+ type: {
41
+ type: string;
42
+ enum: string[];
43
+ };
44
+ postcondition: {
45
+ type: string;
46
+ };
47
+ };
48
+ required: string[];
49
+ };
50
+ };
51
+ };
52
+ required: string[];
53
+ };
54
+ };
55
+ /**
56
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
57
+ *
58
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
59
+ * 1. Runs the project's build/smoke checks.
60
+ * 2. Fixes shallow regressions in the last wave (edits directly).
61
+ * 3. Picks the next N pending tasks from the user's fixed plan.
62
+ *
63
+ * The model has full tool access so it can actually repair broken commits,
64
+ * not just report on them.
65
+ */
66
+ export declare function verifyWave(objective: string, pendingTasks: Task[], lastWave: WaveSummary | undefined, remainingBudget: number, cwd: string, plannerModel: string, concurrency: number, onLog: PlannerLog, transcriptName?: string): Promise<SteerResult>;
@@ -0,0 +1,117 @@
1
+ import { runPlannerQuery, attemptJsonParse, postProcess } from "./query.js";
2
+ import { createTurn, beginTurn, endTurn } from "../core/turns.js";
3
+ // Verifier schema — same shape as STEER_SCHEMA plus a `verifiedIds` list so
4
+ // the wave-loop can tell which of the prior wave's tasks actually shipped.
5
+ export const VERIFY_SCHEMA = {
6
+ type: "json_schema",
7
+ schema: {
8
+ type: "object",
9
+ properties: {
10
+ done: { type: "boolean" },
11
+ reasoning: { type: "string" },
12
+ statusUpdate: { type: "string" },
13
+ estimatedSessionsRemaining: { type: "number" },
14
+ verifiedCount: { type: "number" },
15
+ retryCount: { type: "number" },
16
+ tasks: {
17
+ type: "array",
18
+ items: {
19
+ type: "object",
20
+ properties: {
21
+ prompt: { type: "string" },
22
+ model: { type: "string" },
23
+ noWorktree: { type: "boolean" },
24
+ type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] },
25
+ postcondition: { type: "string" },
26
+ },
27
+ required: ["prompt"],
28
+ },
29
+ },
30
+ },
31
+ required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
32
+ },
33
+ };
34
+ function renderLastWave(w) {
35
+ if (!w)
36
+ return "(first wave — nothing to verify yet)";
37
+ const lines = w.tasks.map(t => {
38
+ const files = t.filesChanged ? ` (${t.filesChanged} files)` : " (0 files)";
39
+ const err = t.error ? ` — ${t.error}` : "";
40
+ return ` - [${t.status}] ${t.prompt.slice(0, 160)}${files}${err}`;
41
+ }).join("\n");
42
+ return `Wave ${w.wave + 1}:\n${lines}`;
43
+ }
44
+ /**
45
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
46
+ *
47
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
48
+ * 1. Runs the project's build/smoke checks.
49
+ * 2. Fixes shallow regressions in the last wave (edits directly).
50
+ * 3. Picks the next N pending tasks from the user's fixed plan.
51
+ *
52
+ * The model has full tool access so it can actually repair broken commits,
53
+ * not just report on them.
54
+ */
55
+ export async function verifyWave(objective, pendingTasks, lastWave, remainingBudget, cwd, plannerModel, concurrency, onLog, transcriptName = "verify") {
56
+ const pendingList = pendingTasks.length > 0
57
+ ? pendingTasks.map((t, i) => ` ${i + 1}. ${t.prompt.slice(0, 200)}`).join("\n")
58
+ : "(none — every task from the original plan has been attempted)";
59
+ const prompt = `You are the verifier + fix gate between waves of a fixed-plan execution.
60
+
61
+ Objective: ${objective}
62
+
63
+ ## What just happened
64
+ ${renderLastWave(lastWave)}
65
+
66
+ ## Remaining plan (pending tasks, in order)
67
+ ${pendingList}
68
+
69
+ ## Your job
70
+
71
+ 1. Run the project's build and smoke checks. Use the tools you have (Bash, Read, Grep, Edit, Write).
72
+ 2. For any regression the last wave introduced, make the fix directly. Don't delegate a fix to the next wave if you can do it in two edits.
73
+ 3. Compose the next batch of pending tasks to dispatch — pick tasks with non-overlapping file scopes so ${concurrency} can run in parallel.
74
+ 4. If the plan is complete AND the build passes AND one verify task has confirmed the app runs, set done=true.
75
+
76
+ ## Output
77
+
78
+ Respond with ONLY a JSON object (no markdown fences):
79
+ {"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"verifiedCount":N,"retryCount":N,"tasks":[{"prompt":"...","type":"execute","postcondition":"..."}]}
80
+
81
+ Remaining budget: ${remainingBudget} agent sessions. Include retries inside tasks[] (same format) if a pending step needs a second attempt with corrected context.`;
82
+ onLog("Verifying last wave…", "status");
83
+ const turn = createTurn("steer", `Verify wave`, `verify-${lastWave?.wave ?? 0}`, plannerModel);
84
+ beginTurn(turn);
85
+ const resultText = await runPlannerQuery(prompt, {
86
+ cwd, model: plannerModel, outputFormat: VERIFY_SCHEMA,
87
+ transcriptName, turnId: turn.id, maxTurns: 80,
88
+ }, onLog);
89
+ const parsed = attemptJsonParse(resultText);
90
+ if (!parsed) {
91
+ endTurn(turn, "error");
92
+ throw new Error(`Could not parse verifier response (${resultText.length} chars): ${resultText.slice(0, 120)}`);
93
+ }
94
+ const isDone = parsed.done === true;
95
+ const statusUpdate = parsed.statusUpdate || undefined;
96
+ const estRaw = parsed.estimatedSessionsRemaining;
97
+ const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
98
+ let tasks = (parsed.tasks || []).map((t, i) => ({
99
+ id: String(i),
100
+ prompt: typeof t === "string" ? t : t.prompt,
101
+ ...(t.noWorktree && { noWorktree: true }),
102
+ ...(t.type && { type: t.type }),
103
+ ...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
104
+ }));
105
+ tasks = postProcess(tasks, remainingBudget, onLog);
106
+ endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");
107
+ if (isDone) {
108
+ return {
109
+ done: true, tasks: [], reasoning: parsed.reasoning || "Plan complete and verified",
110
+ statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0,
111
+ };
112
+ }
113
+ return {
114
+ done: tasks.length === 0, tasks,
115
+ reasoning: parsed.reasoning || "", statusUpdate, estimatedSessionsRemaining,
116
+ };
117
+ }
package/dist/run/run.js CHANGED
@@ -3,6 +3,7 @@ import { join } from "path";
3
3
  import { execSync } from "child_process";
4
4
  import chalk from "chalk";
5
5
  import { steerWave, STEER_SCHEMA } from "../planner/steering.js";
6
+ import { verifyWave } from "../planner/verifier.js";
6
7
  import { getTotalPlannerCost, getPlannerRateLimitInfo, runPlannerQuery, setPlannerEnvResolver, attemptJsonParse } from "../planner/query.js";
7
8
  import { buildEnvResolver, isCursorProxyProvider } from "../providers/index.js";
8
9
  import { RunDisplay } from "../ui/ui.js";
@@ -397,6 +398,33 @@ export async function executeRun(cfg) {
397
398
  }
398
399
  return steered;
399
400
  };
401
+ // In non-flex mode with an objective, the verifier runs between waves instead of the steerer.
402
+ const runVerifier = async () => {
403
+ if (!objective)
404
+ return false;
405
+ const plannerCostBefore = getTotalPlannerCost();
406
+ try {
407
+ const result = await verifyWave(objective, currentTasks, waveHistory[waveHistory.length - 1], remaining, cwd, plannerModel, concurrency, steeringLog, `verify-wave-${waveNum}`);
408
+ accCost += getTotalPlannerCost() - plannerCostBefore;
409
+ syncRunInfo();
410
+ if (result.statusUpdate)
411
+ writeStatus(runDir, result.statusUpdate);
412
+ if (typeof result.estimatedSessionsRemaining === "number")
413
+ lastEstimate = result.estimatedSessionsRemaining;
414
+ if (result.done || result.tasks.length === 0) {
415
+ objectiveComplete = result.done;
416
+ remaining = 0;
417
+ return false;
418
+ }
419
+ currentTasks = result.tasks;
420
+ return true;
421
+ }
422
+ catch (err) {
423
+ accCost += getTotalPlannerCost() - plannerCostBefore;
424
+ display.appendSteeringEvent(`Verifier failed: ${err?.message?.slice(0, 200) || "(no details)"}`);
425
+ return false;
426
+ }
427
+ };
400
428
  // Resume: steer immediately if no queued tasks
401
429
  if (cfg.resuming && flex && currentTasks.length === 0 && remaining > 0) {
402
430
  display.setSteering(rlGetter, buildSteeringContext());
@@ -465,6 +493,7 @@ export async function executeRun(cfg) {
465
493
  lastEstimate,
466
494
  display,
467
495
  runSteering,
496
+ runVerifier,
468
497
  buildSteeringContext,
469
498
  rlGetter,
470
499
  isStopping: () => stopping,
@@ -46,6 +46,8 @@ export interface WaveLoopCtx {
46
46
  lastEstimate: number | undefined;
47
47
  display: RunDisplay;
48
48
  runSteering: () => Promise<boolean>;
49
+ /** Verifier invoked between waves in no-flex mode. Mirrors runSteering's contract. */
50
+ runVerifier?: () => Promise<boolean>;
49
51
  buildSteeringContext: () => SteeringContext;
50
52
  rlGetter: RLGetter;
51
53
  isStopping: () => boolean;