claude-overnight 1.25.45 → 1.25.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/cli.d.ts CHANGED
@@ -61,6 +61,11 @@ export interface FileArgs {
61
61
  usageCap?: number;
62
62
  flexiblePlan?: boolean;
63
63
  }
64
+ /** Load a markdown plan file. Extracts the first H1 as objective and returns the full body as planContent. */
65
+ export declare function loadPlanFile(file: string): {
66
+ objective: string;
67
+ planContent: string;
68
+ };
64
69
  export declare function loadTaskFile(file: string): FileArgs;
65
70
  export declare function validateConcurrency(value: unknown): asserts value is number;
66
71
  export declare function isGitRepo(cwd: string): boolean;
package/dist/cli/cli.js CHANGED
@@ -7,7 +7,7 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
7
7
  // ── CLI flag parsing ──
8
8
  export function parseCliFlags(argv) {
9
9
  const known = new Set(["concurrency", "model", "timeout", "budget", "usage-cap", "extra-usage-budget", "merge"]);
10
- const booleans = new Set(["--dry-run", "-h", "--help", "-v", "--version", "--no-flex", "--allow-extra-usage", "--worktrees", "--no-worktrees", "--yolo"]);
10
+ const booleans = new Set(["--dry-run", "-h", "--help", "-v", "--version", "--flex", "--no-flex", "--allow-extra-usage", "--worktrees", "--no-worktrees", "--yolo"]);
11
11
  const flags = {};
12
12
  const positional = [];
13
13
  for (let i = 0; i < argv.length; i++) {
@@ -334,6 +334,23 @@ export async function selectKey(label, options) {
334
334
  const KNOWN_TASK_FILE_KEYS = new Set([
335
335
  "tasks", "objective", "concurrency", "cwd", "model", "allowedTools", "beforeWave", "afterWave", "afterRun", "worktrees", "mergeStrategy", "usageCap", "flexiblePlan",
336
336
  ]);
337
+ /** Load a markdown plan file. Extracts the first H1 as objective and returns the full body as planContent. */
338
+ export function loadPlanFile(file) {
339
+ const path = resolve(file);
340
+ let raw;
341
+ try {
342
+ raw = readFileSync(path, "utf-8");
343
+ }
344
+ catch {
345
+ throw new Error(`Cannot read plan file: ${path}`);
346
+ }
347
+ const body = raw.trim();
348
+ if (!body)
349
+ throw new Error(`Plan file is empty: ${path}`);
350
+ const h1 = body.match(/^#\s+(.+)$/m);
351
+ const objective = (h1?.[1] ?? body.split("\n").find(l => l.trim())).trim();
352
+ return { objective, planContent: body };
353
+ }
337
354
  export function loadTaskFile(file) {
338
355
  const path = resolve(file);
339
356
  let raw;
package/dist/cli/help.js CHANGED
@@ -16,6 +16,7 @@ export function printHelp() {
16
16
  ${chalk.cyan("Usage")}
17
17
  claude-overnight ${chalk.dim("interactive mode")}
18
18
  claude-overnight tasks.json ${chalk.dim("task file mode")}
19
+ claude-overnight plan.md ${chalk.dim("plan file mode (.md) — coach + flex")}
19
20
  claude-overnight "fix auth" "add tests" ${chalk.dim("inline tasks")}
20
21
 
21
22
  ${chalk.cyan("Flags")}
@@ -30,7 +31,8 @@ export function printHelp() {
30
31
  --allow-extra-usage Allow extra/overage usage ${chalk.dim("(default: stop when plan limits hit)")}
31
32
  --extra-usage-budget=N Max $ for extra usage ${chalk.dim("(implies --allow-extra-usage)")}
32
33
  --timeout=SECONDS Agent inactivity timeout ${chalk.dim("(default: 900s, nudges at timeout, kills at 2×)")}
33
- --no-flex Disable adaptive multi-wave planning ${chalk.dim("(run all tasks in one shot)")}
34
+ --flex Force adaptive multi-wave planning ${chalk.dim("(steering between waves)")}
35
+ --no-flex Fixed plan mode ${chalk.dim("(verifier between waves, no re-planning)")}
34
36
  --worktrees Force worktree isolation on ${chalk.dim("(default: auto-detect git repo)")}
35
37
  --no-worktrees Disable worktree isolation ${chalk.dim("(all agents work in real cwd)")}
36
38
  --merge=MODE Merge strategy: yolo or branch ${chalk.dim("(default: yolo)")}
@@ -1 +1 @@
1
- export declare const VERSION = "1.25.45";
1
+ export declare const VERSION = "1.25.47";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.25.45";
2
+ export const VERSION = "1.25.47";
@@ -59,27 +59,29 @@ export function verifyToken(token, providerId) {
59
59
  */
60
60
  export function verifyTokenWithResult(token, options = {}) {
61
61
  const { providerId, model, baseURL } = options;
62
- // Unsafely decode the token to extract the `sub` claim so we can derive
63
- // the correct signing key. This does NOT verify the signature yet.
64
- const raw = jwt.decode(token);
65
- if (!raw || typeof raw !== "object") {
62
+ const header = jwt.decode(token, { complete: true });
63
+ if (!header || typeof header === "string") {
66
64
  return { valid: false, reason: "invalid_signature" };
67
65
  }
68
- const sub = raw.sub;
69
- if (typeof sub !== "string" || !sub) {
66
+ const loose = header.payload;
67
+ const subForKey = loose.sub;
68
+ if (!subForKey || typeof subForKey !== "string") {
69
+ return { valid: false, reason: "invalid_signature" };
70
+ }
71
+ let key;
72
+ try {
73
+ key = deriveKey(subForKey);
74
+ }
75
+ catch {
70
76
  return { valid: false, reason: "invalid_signature" };
71
77
  }
72
- const key = deriveKey(sub);
73
78
  try {
74
79
  const decoded = jwt.verify(token, key, {
75
80
  algorithms: ["HS256"],
76
- // Let jwt.verify check expiration for us
77
81
  });
78
- // Reject tokens from older versions
79
82
  if (decoded.ver !== TOKEN_VERSION) {
80
83
  return { valid: false, reason: "wrong_version" };
81
84
  }
82
- // Validate claims if expected values are provided
83
85
  if (providerId && decoded.sub !== providerId) {
84
86
  return { valid: false, reason: "claim_mismatch" };
85
87
  }
@@ -8,6 +8,12 @@ export interface RateLimiterConfig {
8
8
  windowMs: number;
9
9
  minIntervalMs?: number;
10
10
  }
11
+ export interface AcquireOptions {
12
+ /** When true, skip sliding-window / min-interval waits (caller still records after the request). */
13
+ skipWhen?: () => boolean;
14
+ /** Invoked once when `skipWhen()` returned true and the throttle was bypassed. */
15
+ onBypass?: () => void;
16
+ }
11
17
  export declare class RateLimiter {
12
18
  private readonly maxRequests;
13
19
  private readonly windowMs;
@@ -18,6 +24,8 @@ export declare class RateLimiter {
18
24
  record(): void;
19
25
  get currentCount(): number;
20
26
  canRequest(): boolean;
27
+ /** Wait until a request slot is available. Optional `skipWhen` bypasses the throttle entirely. */
28
+ acquire(options?: AcquireOptions): Promise<number>;
21
29
  waitIfNeeded(): Promise<number>;
22
30
  waitMs(): number;
23
31
  reset(): void;
@@ -29,6 +37,8 @@ export declare class RateLimiter {
29
37
  }
30
38
  /** Shared rate limiter for SDK query calls — enforced globally across all workers. */
31
39
  export declare const sdkQueryRateLimiter: RateLimiter;
40
+ /** Acquire SDK query slot. Skips the SDK sliding-window limiter when `CURSOR_PROXY_URL` is set (proxy has its own limiters). */
41
+ export declare function acquireSdkQueryRateLimit(): Promise<number>;
32
42
  /** Shared rate limiter for Cursor proxy direct fetches — enforced globally. */
33
43
  export declare const cursorProxyRateLimiter: RateLimiter;
34
44
  /** Shared rate limiter for direct API endpoint calls — guards against rapid
@@ -38,12 +38,20 @@ export class RateLimiter {
38
38
  return this.timestamps.length < this.maxRequests
39
39
  && (Date.now() - this.lastRequestAt) >= this.minIntervalMs;
40
40
  }
41
- async waitIfNeeded() {
41
+ /** Wait until a request slot is available. Optional `skipWhen` bypasses the throttle entirely. */
42
+ async acquire(options) {
43
+ if (options?.skipWhen?.()) {
44
+ options.onBypass?.();
45
+ return 0;
46
+ }
42
47
  const waited = this.waitMs();
43
48
  if (waited > 0)
44
49
  await new Promise(r => setTimeout(r, waited));
45
50
  return waited;
46
51
  }
52
+ async waitIfNeeded() {
53
+ return this.acquire();
54
+ }
47
55
  waitMs() {
48
56
  this.evict();
49
57
  const volumeWait = this.timestamps.length >= this.maxRequests
@@ -86,6 +94,15 @@ const _cursorProxyLimiter = new RateLimiter({ maxRequests: 4, windowMs: 10_000 }
86
94
  const _apiEndpointLimiter = new RateLimiter({ maxRequests: 6, windowMs: 15_000, minIntervalMs: 1_000 });
87
95
  /** Shared rate limiter for SDK query calls — enforced globally across all workers. */
88
96
  export const sdkQueryRateLimiter = _sdkQueryLimiter;
97
+ /** Acquire SDK query slot. Skips the SDK sliding-window limiter when `CURSOR_PROXY_URL` is set (proxy has its own limiters). */
98
+ export async function acquireSdkQueryRateLimit() {
99
+ return _sdkQueryLimiter.acquire({
100
+ skipWhen: () => !!process.env.CURSOR_PROXY_URL,
101
+ onBypass: () => {
102
+ console.log("[rate-limiter] Skipping SDK rate limit (Cursor proxy has its own limiter)");
103
+ },
104
+ });
105
+ }
89
106
  /** Shared rate limiter for Cursor proxy direct fetches — enforced globally. */
90
107
  export const cursorProxyRateLimiter = _cursorProxyLimiter;
91
108
  /** Shared rate limiter for direct API endpoint calls — guards against rapid
@@ -21,7 +21,7 @@ export function getCachedToken(providerId) {
21
21
  const entry = tokenCache.get(providerId);
22
22
  if (!entry)
23
23
  return null;
24
- if (isRevoked(entry.sessionId)) {
24
+ if (isSessionRevoked(entry.sessionId)) {
25
25
  tokenCache.delete(providerId);
26
26
  return null;
27
27
  }
@@ -51,7 +51,7 @@ export function tryRefreshCachedToken(providerId, refresher) {
51
51
  const entry = tokenCache.get(providerId);
52
52
  if (!entry)
53
53
  return null;
54
- if (isRevoked(entry.sessionId)) {
54
+ if (isSessionRevoked(entry.sessionId)) {
55
55
  tokenCache.delete(providerId);
56
56
  return null;
57
57
  }
@@ -99,11 +99,6 @@ export function clearRevocations() {
99
99
  export function getRevocationCount() {
100
100
  return revokedSessions.size;
101
101
  }
102
- /** Check if a session ID has been revoked, pruning expired entries first. */
103
- function isRevoked(sessionId) {
104
- pruneRevocations();
105
- return revokedSessions.has(sessionId);
106
- }
107
102
  /** Remove expired revocation entries and enforce max size. */
108
103
  function pruneRevocations() {
109
104
  if (revokedSessions.size === 0)
@@ -65,10 +65,9 @@ export function refreshToken(oldToken, providerId) {
65
65
  */
66
66
  export function verifyBearerToken(token, providerId) {
67
67
  const result = verifyTokenWithResult(token, { providerId });
68
- if (!result.valid)
68
+ if (!result.valid || !result.payload)
69
69
  return result;
70
- // Reject if the session was explicitly revoked (check token's jti, not cache)
71
- if (result.payload && isSessionRevoked(result.payload.jti)) {
70
+ if (isSessionRevoked(result.payload.jti)) {
72
71
  return { valid: false, reason: "revoked" };
73
72
  }
74
73
  return result;
@@ -100,11 +99,15 @@ function tryPeekAndRevoke(providerId) {
100
99
  * reducing false positives from unrelated 401/403 responses.
101
100
  */
102
101
  export function isJWTAuthError(err) {
103
- const msg = err instanceof Error ? err.message
104
- : (err !== null && typeof err === "object" && "message" in err && typeof err.message === "string")
102
+ const msg = err instanceof Error
103
+ ? err.message
104
+ : err && typeof err === "object" && "message" in err && typeof err.message === "string"
105
105
  ? err.message
106
106
  : String(err);
107
107
  const lower = msg.toLowerCase();
108
+ if (lower.includes("bearer") && lower.includes("token") && lower.includes("invalid")) {
109
+ return true;
110
+ }
108
111
  // JWT-specific indicators (high confidence)
109
112
  const jwtIndicators = [
110
113
  "token expired", "invalid_token", "jwt", "signature",
@@ -88,6 +88,8 @@ export interface AgentState {
88
88
  peakContextTokens?: number;
89
89
  /** Resolved model this agent is running (task override or swarm default). */
90
90
  model?: string;
91
+ /** Unix timestamp (ms) of the last assistant stream content (text, tool deltas, etc.). Used to detect SDK streams that yield no content. */
92
+ lastContentTimestamp?: number;
91
93
  }
92
94
  /** A timestamped log line from an agent's execution. */
93
95
  export interface LogEntry {
@@ -173,8 +175,13 @@ export interface WaveSummary {
173
175
  status: string;
174
176
  type?: string;
175
177
  filesChanged?: number;
178
+ toolCalls?: number;
176
179
  error?: string;
177
180
  }[];
181
+ /** Sum of `toolCalls` across all agents in this wave (diagnostics). */
182
+ totalToolCalls?: number;
183
+ /** Non-heal tasks landed 0 files but agents invoked tools — possible worktree/merge bug. */
184
+ suspectedInfraFailure?: boolean;
178
185
  }
179
186
  /** Result from the steering function. */
180
187
  export interface SteerResult {
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import { setPlannerEnvResolver } from "./planner/query.js";
8
8
  import { setTranscriptRunDir } from "./core/transcripts.js";
9
9
  import { pickModel, loadProviders, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, warnMacCursorAgentShellPatchIfNeeded, } from "./providers/index.js";
10
10
  import { executeRun } from "./run/run.js";
11
- import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
11
+ import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, loadPlanFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
12
12
  import { loadRunState, findOrphanedDesigns, backfillOrphanedPlans, readPreviousRunKnowledge, createRunDir, updateLatestSymlink, } from "./state/state.js";
13
13
  import { runSetupCoach, loadUserSettings, saveUserSettings, COACH_MODEL } from "./planner/coach/coach.js";
14
14
  import { editRunSettings, formatSettingsSummary } from "./cli/settings.js";
@@ -63,11 +63,21 @@ async function main() {
63
63
  // ── Load tasks ──
64
64
  let tasks = [];
65
65
  let fileCfg;
66
+ let planFileContent;
66
67
  const jsonFiles = args.filter(a => a.endsWith(".json"));
68
+ const mdFiles = args.filter(a => a.endsWith(".md"));
67
69
  if (jsonFiles.length > 1) {
68
70
  console.error(chalk.red(` Multiple task files provided. Only one .json file is supported.`));
69
71
  process.exit(1);
70
72
  }
73
+ if (mdFiles.length > 1) {
74
+ console.error(chalk.red(` Multiple plan files provided. Only one .md file is supported.`));
75
+ process.exit(1);
76
+ }
77
+ if (jsonFiles.length && mdFiles.length) {
78
+ console.error(chalk.red(` Cannot mix a .json task file with a .md plan file.`));
79
+ process.exit(1);
80
+ }
71
81
  for (const arg of args) {
72
82
  if (arg.endsWith(".json")) {
73
83
  if (tasks.length > 0) {
@@ -77,8 +87,13 @@ async function main() {
77
87
  fileCfg = loadTaskFile(arg);
78
88
  tasks = fileCfg.tasks;
79
89
  }
90
+ else if (arg.endsWith(".md")) {
91
+ const plan = loadPlanFile(arg);
92
+ planFileContent = plan.planContent;
93
+ fileCfg = { tasks: [], objective: plan.objective, flexiblePlan: true };
94
+ }
80
95
  else if (!arg.startsWith("-") && existsSync(resolve(arg))) {
81
- console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json. Rename it or quote the string.`));
96
+ console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json or .md. Rename it or quote the string.`));
82
97
  process.exit(1);
83
98
  }
84
99
  else {
@@ -341,6 +356,30 @@ async function main() {
341
356
  console.log(chalk.dim(` ╰${"─".repeat(innerLen + 4)}╯`));
342
357
  }
343
358
  else {
359
+ // ── Setup coach in confirm-only mode (task/plan file on a TTY) ──
360
+ let coachResult = null;
361
+ if (fileCfg?.objective && process.stdin.isTTY
362
+ && !argv.includes("--no-coach") && !loadUserSettings().skipCoach) {
363
+ const settings = loadUserSettings();
364
+ const cModel = settings.coachModel ?? COACH_MODEL;
365
+ const cProvider = settings.coachProviderId
366
+ ? loadProviders().find(p => p.id === settings.coachProviderId) : undefined;
367
+ coachResult = await runSetupCoach(fileCfg.objective, cwd, {
368
+ providers: loadProviders(), cliFlags, coachModel: cModel, coachProvider: cProvider,
369
+ planContent: planFileContent, confirmOnly: true,
370
+ });
371
+ if (coachResult) {
372
+ coachedOriginal = fileCfg.objective;
373
+ coachedAt = Date.now();
374
+ fileCfg.objective = coachResult.improvedObjective;
375
+ objective = coachResult.improvedObjective;
376
+ const rec = coachResult.recommended;
377
+ if (fileCfg.concurrency == null)
378
+ fileCfg.concurrency = rec.concurrency;
379
+ if (fileCfg.usageCap == null && rec.usageCap != null)
380
+ fileCfg.usageCap = Math.round(rec.usageCap * 100);
381
+ }
382
+ }
344
383
  let models = [];
345
384
  if (!cliFlags.model && !fileCfg?.model)
346
385
  models = await fetchModels(5_000);
@@ -374,7 +413,7 @@ async function main() {
374
413
  }
375
414
  }
376
415
  concurrency = cliFlags.concurrency ? parseInt(cliFlags.concurrency) : (fileCfg?.concurrency ?? 5);
377
- budget = cliFlags.budget ? parseInt(cliFlags.budget) : undefined;
416
+ budget = cliFlags.budget ? parseInt(cliFlags.budget) : coachResult?.recommended.budget;
378
417
  if (budget != null && (isNaN(budget) || budget < 1)) {
379
418
  console.error(chalk.red(` --budget must be a positive integer`));
380
419
  process.exit(1);
@@ -442,7 +481,8 @@ async function main() {
442
481
  console.log(chalk.dim(` ${workerModel} concurrency=${concurrency} worktrees=${useWorktrees} merge=${mergeStrategy}${capStr}${extraStr}`));
443
482
  }
444
483
  // ── Plan phase ──
445
- const flex = !argv.includes("--no-flex") && (fileCfg?.flexiblePlan ?? objective != null) && objective != null && (budget ?? 10) > 2;
484
+ const flexFlag = argv.includes("--flex") ? true : argv.includes("--no-flex") ? false : undefined;
485
+ const flex = objective != null && (flexFlag ?? ((fileCfg?.flexiblePlan ?? true) && (budget ?? 10) > 2));
446
486
  const agentTimeoutMs = cliFlags.timeout ? parseFloat(cliFlags.timeout) * 1000 : undefined;
447
487
  let thinkingUsed = 0, thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
448
488
  let thinkingHistory;
@@ -11,5 +11,9 @@ export interface CoachContext {
11
11
  log?: PlannerLog;
12
12
  coachModel?: string;
13
13
  coachProvider?: ProviderConfig;
14
+ /** Full markdown plan content (e.g. from a .md plan file). Overrides URL fetching. */
15
+ planContent?: string;
16
+ /** When true, show only accept/skip and do not persist user settings. */
17
+ confirmOnly?: boolean;
14
18
  }
15
19
  export declare function runSetupCoach(rawObjective: string, cwd: string, ctx: CoachContext): Promise<CoachResult | null>;
@@ -47,13 +47,15 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
47
47
  const facts = collectRepoFacts(cwd);
48
48
  if (facts.srcFileCount > 1_000_000)
49
49
  return null;
50
- const urls = rawObjective.match(URL_REGEX) ?? [];
51
- let planContent = null;
52
- if (urls.length > 0) {
53
- const results = await Promise.all(urls.map(u => fetchUrlContent(u, 4_000)));
54
- const fetched = results.filter(Boolean);
55
- if (fetched.length > 0) {
56
- planContent = fetched.map((c, i) => `[URL ${i + 1}: ${urls[i]}]\n${c}`).join("\n\n---\n\n");
50
+ let planContent = ctx.planContent ?? null;
51
+ if (!planContent) {
52
+ const urls = rawObjective.match(URL_REGEX) ?? [];
53
+ if (urls.length > 0) {
54
+ const results = await Promise.all(urls.map(u => fetchUrlContent(u, 4_000)));
55
+ const fetched = results.filter(Boolean);
56
+ if (fetched.length > 0) {
57
+ planContent = fetched.map((c, i) => `[URL ${i + 1}: ${urls[i]}]\n${c}`).join("\n\n---\n\n");
58
+ }
57
59
  }
58
60
  }
59
61
  const userMessage = renderRepoFacts(facts, rawObjective, ctx.providers, ctx.cliFlags, planContent);
@@ -120,14 +122,20 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
120
122
  return null;
121
123
  }
122
124
  renderCoachBlock(result, elapsedMs, model);
123
- const choice = await selectKey("", [
124
- { key: "y", desc: " accept" },
125
- { key: "e", desc: "dit objective" },
126
- { key: "s", desc: "kip coach" },
127
- { key: "x", desc: " skip coach forever" },
128
- ]);
125
+ const choice = ctx.confirmOnly
126
+ ? await selectKey("", [
127
+ { key: "y", desc: " accept" },
128
+ { key: "s", desc: "kip" },
129
+ ])
130
+ : await selectKey("", [
131
+ { key: "y", desc: " accept" },
132
+ { key: "e", desc: "dit objective" },
133
+ { key: "s", desc: "kip coach" },
134
+ { key: "x", desc: " skip coach forever" },
135
+ ]);
129
136
  if (choice === "y") {
130
- saveUserSettings({ ...loadUserSettings(), lastCoachedAt: Date.now() });
137
+ if (!ctx.confirmOnly)
138
+ saveUserSettings({ ...loadUserSettings(), lastCoachedAt: Date.now() });
131
139
  return result;
132
140
  }
133
141
  if (choice === "e") {
@@ -3,7 +3,7 @@ import { NudgeError, extractToolTarget, sumUsageTokens } from "../core/types.js"
3
3
  import { writeTranscriptEvent } from "../core/transcripts.js";
4
4
  import { getTurn, updateTurn } from "../core/turns.js";
5
5
  import { isRateLimitError, throttlePlanner, addPlannerCost, recordPeakContext, resetPlannerRateLimit, setContextTokens, applyRateLimitEvent, getPlannerRateLimitInfo, } from "./throttle.js";
6
- import { cursorProxyRateLimiter, sdkQueryRateLimiter, apiEndpointLimiter } from "../core/rate-limiter.js";
6
+ import { cursorProxyRateLimiter, sdkQueryRateLimiter, apiEndpointLimiter, acquireSdkQueryRateLimit } from "../core/rate-limiter.js";
7
7
  export { getTotalPlannerCost, getPeakPlannerContext, getPlannerRateLimitInfo, } from "./throttle.js";
8
8
  export { attemptJsonParse, extractTaskJson } from "./json.js";
9
9
  export { postProcess } from "./postprocess.js";
@@ -126,7 +126,7 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
126
126
  promptBytes: prompt.length,
127
127
  });
128
128
  }
129
- await rl.waitIfNeeded();
129
+ await acquireSdkQueryRateLimit();
130
130
  const pq = query({
131
131
  prompt,
132
132
  options: {
@@ -0,0 +1,66 @@
1
+ import type { Task, SteerResult, WaveSummary } from "../core/types.js";
2
+ import { type PlannerLog } from "./query.js";
3
+ export declare const VERIFY_SCHEMA: {
4
+ type: "json_schema";
5
+ schema: {
6
+ type: string;
7
+ properties: {
8
+ done: {
9
+ type: string;
10
+ };
11
+ reasoning: {
12
+ type: string;
13
+ };
14
+ statusUpdate: {
15
+ type: string;
16
+ };
17
+ estimatedSessionsRemaining: {
18
+ type: string;
19
+ };
20
+ verifiedCount: {
21
+ type: string;
22
+ };
23
+ retryCount: {
24
+ type: string;
25
+ };
26
+ tasks: {
27
+ type: string;
28
+ items: {
29
+ type: string;
30
+ properties: {
31
+ prompt: {
32
+ type: string;
33
+ };
34
+ model: {
35
+ type: string;
36
+ };
37
+ noWorktree: {
38
+ type: string;
39
+ };
40
+ type: {
41
+ type: string;
42
+ enum: string[];
43
+ };
44
+ postcondition: {
45
+ type: string;
46
+ };
47
+ };
48
+ required: string[];
49
+ };
50
+ };
51
+ };
52
+ required: string[];
53
+ };
54
+ };
55
+ /**
56
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
57
+ *
58
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
59
+ * 1. Runs the project's build/smoke checks.
60
+ * 2. Fixes shallow regressions in the last wave (edits directly).
61
+ * 3. Picks the next N pending tasks from the user's fixed plan.
62
+ *
63
+ * The model has full tool access so it can actually repair broken commits,
64
+ * not just report on them.
65
+ */
66
+ export declare function verifyWave(objective: string, pendingTasks: Task[], lastWave: WaveSummary | undefined, remainingBudget: number, cwd: string, plannerModel: string, concurrency: number, onLog: PlannerLog, transcriptName?: string): Promise<SteerResult>;
@@ -0,0 +1,117 @@
1
+ import { runPlannerQuery, attemptJsonParse, postProcess } from "./query.js";
2
+ import { createTurn, beginTurn, endTurn } from "../core/turns.js";
3
+ // Verifier schema — same shape as STEER_SCHEMA plus a `verifiedIds` list so
4
+ // the wave-loop can tell which of the prior wave's tasks actually shipped.
5
+ export const VERIFY_SCHEMA = {
6
+ type: "json_schema",
7
+ schema: {
8
+ type: "object",
9
+ properties: {
10
+ done: { type: "boolean" },
11
+ reasoning: { type: "string" },
12
+ statusUpdate: { type: "string" },
13
+ estimatedSessionsRemaining: { type: "number" },
14
+ verifiedCount: { type: "number" },
15
+ retryCount: { type: "number" },
16
+ tasks: {
17
+ type: "array",
18
+ items: {
19
+ type: "object",
20
+ properties: {
21
+ prompt: { type: "string" },
22
+ model: { type: "string" },
23
+ noWorktree: { type: "boolean" },
24
+ type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] },
25
+ postcondition: { type: "string" },
26
+ },
27
+ required: ["prompt"],
28
+ },
29
+ },
30
+ },
31
+ required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
32
+ },
33
+ };
34
+ function renderLastWave(w) {
35
+ if (!w)
36
+ return "(first wave — nothing to verify yet)";
37
+ const lines = w.tasks.map(t => {
38
+ const files = t.filesChanged ? ` (${t.filesChanged} files)` : " (0 files)";
39
+ const err = t.error ? ` — ${t.error}` : "";
40
+ return ` - [${t.status}] ${t.prompt.slice(0, 160)}${files}${err}`;
41
+ }).join("\n");
42
+ return `Wave ${w.wave + 1}:\n${lines}`;
43
+ }
44
+ /**
45
+ * Verify the previous wave and compose the next fixed batch of pending tasks.
46
+ *
47
+ * Unlike `steerWave`, the verifier does not invent new tasks — it:
48
+ * 1. Runs the project's build/smoke checks.
49
+ * 2. Fixes shallow regressions in the last wave (edits directly).
50
+ * 3. Picks the next N pending tasks from the user's fixed plan.
51
+ *
52
+ * The model has full tool access so it can actually repair broken commits,
53
+ * not just report on them.
54
+ */
55
+ export async function verifyWave(objective, pendingTasks, lastWave, remainingBudget, cwd, plannerModel, concurrency, onLog, transcriptName = "verify") {
56
+ const pendingList = pendingTasks.length > 0
57
+ ? pendingTasks.map((t, i) => ` ${i + 1}. ${t.prompt.slice(0, 200)}`).join("\n")
58
+ : "(none — every task from the original plan has been attempted)";
59
+ const prompt = `You are the verifier + fix gate between waves of a fixed-plan execution.
60
+
61
+ Objective: ${objective}
62
+
63
+ ## What just happened
64
+ ${renderLastWave(lastWave)}
65
+
66
+ ## Remaining plan (pending tasks, in order)
67
+ ${pendingList}
68
+
69
+ ## Your job
70
+
71
+ 1. Run the project's build and smoke checks. Use the tools you have (Bash, Read, Grep, Edit, Write).
72
+ 2. For any regression the last wave introduced, make the fix directly. Don't delegate a fix to the next wave if you can do it in two edits.
73
+ 3. Compose the next batch of pending tasks to dispatch — pick tasks with non-overlapping file scopes so ${concurrency} can run in parallel.
74
+ 4. If the plan is complete AND the build passes AND one verify task has confirmed the app runs, set done=true.
75
+
76
+ ## Output
77
+
78
+ Respond with ONLY a JSON object (no markdown fences):
79
+ {"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"verifiedCount":N,"retryCount":N,"tasks":[{"prompt":"...","type":"execute","postcondition":"..."}]}
80
+
81
+ Remaining budget: ${remainingBudget} agent sessions. Include retries inside tasks[] (same format) if a pending step needs a second attempt with corrected context.`;
82
+ onLog("Verifying last wave…", "status");
83
+ const turn = createTurn("steer", `Verify wave`, `verify-${lastWave?.wave ?? 0}`, plannerModel);
84
+ beginTurn(turn);
85
+ const resultText = await runPlannerQuery(prompt, {
86
+ cwd, model: plannerModel, outputFormat: VERIFY_SCHEMA,
87
+ transcriptName, turnId: turn.id, maxTurns: 80,
88
+ }, onLog);
89
+ const parsed = attemptJsonParse(resultText);
90
+ if (!parsed) {
91
+ endTurn(turn, "error");
92
+ throw new Error(`Could not parse verifier response (${resultText.length} chars): ${resultText.slice(0, 120)}`);
93
+ }
94
+ const isDone = parsed.done === true;
95
+ const statusUpdate = parsed.statusUpdate || undefined;
96
+ const estRaw = parsed.estimatedSessionsRemaining;
97
+ const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
98
+ let tasks = (parsed.tasks || []).map((t, i) => ({
99
+ id: String(i),
100
+ prompt: typeof t === "string" ? t : t.prompt,
101
+ ...(t.noWorktree && { noWorktree: true }),
102
+ ...(t.type && { type: t.type }),
103
+ ...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
104
+ }));
105
+ tasks = postProcess(tasks, remainingBudget, onLog);
106
+ endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");
107
+ if (isDone) {
108
+ return {
109
+ done: true, tasks: [], reasoning: parsed.reasoning || "Plan complete and verified",
110
+ statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0,
111
+ };
112
+ }
113
+ return {
114
+ done: tasks.length === 0, tasks,
115
+ reasoning: parsed.reasoning || "", statusUpdate, estimatedSessionsRemaining,
116
+ };
117
+ }
@@ -11,7 +11,7 @@ import { DEFAULT_MODEL } from "../core/models.js";
11
11
  import { isCursorProxyProvider, resolveCursorAgentToken, cachedAgentPaths, } from "./cursor-env.js";
12
12
  import { preflightCursorProxyViaHttp } from "./cursor-proxy.js";
13
13
  import { pickCursorModel } from "./cursor-picker.js";
14
- import { sdkQueryRateLimiter } from "../core/rate-limiter.js";
14
+ import { sdkQueryRateLimiter, acquireSdkQueryRateLimit } from "../core/rate-limiter.js";
15
15
  // Re-export Cursor utilities so callers can keep a single import point.
16
16
  export { PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, readCursorProxyLogTail, warnMacCursorAgentShellPatchIfNeeded, hasCursorAgentToken, getCursorAgentToken, } from "./cursor-env.js";
17
17
  export { healthCheckCursorProxy, ensureCursorProxyRunning } from "./cursor-proxy.js";
@@ -243,7 +243,7 @@ export async function preflightProvider(p, cwd, timeoutMs = 20_000, opts) {
243
243
  let pq;
244
244
  const rl = sdkQueryRateLimiter;
245
245
  try {
246
- await rl.waitIfNeeded();
246
+ await acquireSdkQueryRateLimit();
247
247
  pq = query({
248
248
  prompt: "Reply with exactly the word ok and nothing else.",
249
249
  options: {