claude-overnight 1.25.45 → 1.25.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/cli.d.ts +5 -0
- package/dist/cli/cli.js +18 -1
- package/dist/cli/help.js +3 -1
- package/dist/core/_version.d.ts +1 -1
- package/dist/core/_version.js +1 -1
- package/dist/core/jwt-signer.js +12 -10
- package/dist/core/rate-limiter.d.ts +10 -0
- package/dist/core/rate-limiter.js +18 -1
- package/dist/core/token-cache.js +2 -7
- package/dist/core/token-manager.js +8 -5
- package/dist/core/types.d.ts +7 -0
- package/dist/index.js +44 -4
- package/dist/planner/coach/coach.d.ts +4 -0
- package/dist/planner/coach/coach.js +22 -14
- package/dist/planner/query.js +2 -2
- package/dist/planner/verifier.d.ts +66 -0
- package/dist/planner/verifier.js +117 -0
- package/dist/providers/index.js +2 -2
- package/dist/run/circuit-breaker-state.d.ts +16 -0
- package/dist/run/circuit-breaker-state.js +18 -0
- package/dist/run/run.js +29 -0
- package/dist/run/wave-loop.d.ts +2 -0
- package/dist/run/wave-loop.js +48 -32
- package/dist/swarm/agent-run.js +19 -9
- package/dist/swarm/config.d.ts +7 -0
- package/dist/swarm/config.js +15 -0
- package/dist/swarm/errors.d.ts +7 -0
- package/dist/swarm/errors.js +15 -1
- package/dist/swarm/message-handler.d.ts +4 -0
- package/dist/swarm/message-handler.js +20 -0
- package/dist/swarm/swarm.js +3 -0
- package/dist/ui/footer.js +3 -1
- package/dist/ui/header.js +38 -12
- package/dist/ui/input.d.ts +7 -0
- package/dist/ui/input.js +131 -31
- package/dist/ui/overlay.js +22 -10
- package/package.json +1 -1
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
package/dist/cli/cli.d.ts
CHANGED
|
@@ -61,6 +61,11 @@ export interface FileArgs {
|
|
|
61
61
|
usageCap?: number;
|
|
62
62
|
flexiblePlan?: boolean;
|
|
63
63
|
}
|
|
64
|
+
/** Load a markdown plan file. Extracts the first H1 as objective and returns the full body as planContent. */
|
|
65
|
+
export declare function loadPlanFile(file: string): {
|
|
66
|
+
objective: string;
|
|
67
|
+
planContent: string;
|
|
68
|
+
};
|
|
64
69
|
export declare function loadTaskFile(file: string): FileArgs;
|
|
65
70
|
export declare function validateConcurrency(value: unknown): asserts value is number;
|
|
66
71
|
export declare function isGitRepo(cwd: string): boolean;
|
package/dist/cli/cli.js
CHANGED
|
@@ -7,7 +7,7 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
|
7
7
|
// ── CLI flag parsing ──
|
|
8
8
|
export function parseCliFlags(argv) {
|
|
9
9
|
const known = new Set(["concurrency", "model", "timeout", "budget", "usage-cap", "extra-usage-budget", "merge"]);
|
|
10
|
-
const booleans = new Set(["--dry-run", "-h", "--help", "-v", "--version", "--no-flex", "--allow-extra-usage", "--worktrees", "--no-worktrees", "--yolo"]);
|
|
10
|
+
const booleans = new Set(["--dry-run", "-h", "--help", "-v", "--version", "--flex", "--no-flex", "--allow-extra-usage", "--worktrees", "--no-worktrees", "--yolo"]);
|
|
11
11
|
const flags = {};
|
|
12
12
|
const positional = [];
|
|
13
13
|
for (let i = 0; i < argv.length; i++) {
|
|
@@ -334,6 +334,23 @@ export async function selectKey(label, options) {
|
|
|
334
334
|
const KNOWN_TASK_FILE_KEYS = new Set([
|
|
335
335
|
"tasks", "objective", "concurrency", "cwd", "model", "allowedTools", "beforeWave", "afterWave", "afterRun", "worktrees", "mergeStrategy", "usageCap", "flexiblePlan",
|
|
336
336
|
]);
|
|
337
|
+
/** Load a markdown plan file. Extracts the first H1 as objective and returns the full body as planContent. */
|
|
338
|
+
export function loadPlanFile(file) {
|
|
339
|
+
const path = resolve(file);
|
|
340
|
+
let raw;
|
|
341
|
+
try {
|
|
342
|
+
raw = readFileSync(path, "utf-8");
|
|
343
|
+
}
|
|
344
|
+
catch {
|
|
345
|
+
throw new Error(`Cannot read plan file: ${path}`);
|
|
346
|
+
}
|
|
347
|
+
const body = raw.trim();
|
|
348
|
+
if (!body)
|
|
349
|
+
throw new Error(`Plan file is empty: ${path}`);
|
|
350
|
+
const h1 = body.match(/^#\s+(.+)$/m);
|
|
351
|
+
const objective = (h1?.[1] ?? body.split("\n").find(l => l.trim())).trim();
|
|
352
|
+
return { objective, planContent: body };
|
|
353
|
+
}
|
|
337
354
|
export function loadTaskFile(file) {
|
|
338
355
|
const path = resolve(file);
|
|
339
356
|
let raw;
|
package/dist/cli/help.js
CHANGED
|
@@ -16,6 +16,7 @@ export function printHelp() {
|
|
|
16
16
|
${chalk.cyan("Usage")}
|
|
17
17
|
claude-overnight ${chalk.dim("interactive mode")}
|
|
18
18
|
claude-overnight tasks.json ${chalk.dim("task file mode")}
|
|
19
|
+
claude-overnight plan.md ${chalk.dim("plan file mode (.md) — coach + flex")}
|
|
19
20
|
claude-overnight "fix auth" "add tests" ${chalk.dim("inline tasks")}
|
|
20
21
|
|
|
21
22
|
${chalk.cyan("Flags")}
|
|
@@ -30,7 +31,8 @@ export function printHelp() {
|
|
|
30
31
|
--allow-extra-usage Allow extra/overage usage ${chalk.dim("(default: stop when plan limits hit)")}
|
|
31
32
|
--extra-usage-budget=N Max $ for extra usage ${chalk.dim("(implies --allow-extra-usage)")}
|
|
32
33
|
--timeout=SECONDS Agent inactivity timeout ${chalk.dim("(default: 900s, nudges at timeout, kills at 2×)")}
|
|
33
|
-
--
|
|
34
|
+
--flex Force adaptive multi-wave planning ${chalk.dim("(steering between waves)")}
|
|
35
|
+
--no-flex Fixed plan mode ${chalk.dim("(verifier between waves, no re-planning)")}
|
|
34
36
|
--worktrees Force worktree isolation on ${chalk.dim("(default: auto-detect git repo)")}
|
|
35
37
|
--no-worktrees Disable worktree isolation ${chalk.dim("(all agents work in real cwd)")}
|
|
36
38
|
--merge=MODE Merge strategy: yolo or branch ${chalk.dim("(default: yolo)")}
|
package/dist/core/_version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION = "1.25.
|
|
1
|
+
export declare const VERSION = "1.25.47";
|
package/dist/core/_version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Auto-generated by build — do not edit manually.
|
|
2
|
-
export const VERSION = "1.25.
|
|
2
|
+
export const VERSION = "1.25.47";
|
package/dist/core/jwt-signer.js
CHANGED
|
@@ -59,27 +59,29 @@ export function verifyToken(token, providerId) {
|
|
|
59
59
|
*/
|
|
60
60
|
export function verifyTokenWithResult(token, options = {}) {
|
|
61
61
|
const { providerId, model, baseURL } = options;
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const raw = jwt.decode(token);
|
|
65
|
-
if (!raw || typeof raw !== "object") {
|
|
62
|
+
const header = jwt.decode(token, { complete: true });
|
|
63
|
+
if (!header || typeof header === "string") {
|
|
66
64
|
return { valid: false, reason: "invalid_signature" };
|
|
67
65
|
}
|
|
68
|
-
const
|
|
69
|
-
|
|
66
|
+
const loose = header.payload;
|
|
67
|
+
const subForKey = loose.sub;
|
|
68
|
+
if (!subForKey || typeof subForKey !== "string") {
|
|
69
|
+
return { valid: false, reason: "invalid_signature" };
|
|
70
|
+
}
|
|
71
|
+
let key;
|
|
72
|
+
try {
|
|
73
|
+
key = deriveKey(subForKey);
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
70
76
|
return { valid: false, reason: "invalid_signature" };
|
|
71
77
|
}
|
|
72
|
-
const key = deriveKey(sub);
|
|
73
78
|
try {
|
|
74
79
|
const decoded = jwt.verify(token, key, {
|
|
75
80
|
algorithms: ["HS256"],
|
|
76
|
-
// Let jwt.verify check expiration for us
|
|
77
81
|
});
|
|
78
|
-
// Reject tokens from older versions
|
|
79
82
|
if (decoded.ver !== TOKEN_VERSION) {
|
|
80
83
|
return { valid: false, reason: "wrong_version" };
|
|
81
84
|
}
|
|
82
|
-
// Validate claims if expected values are provided
|
|
83
85
|
if (providerId && decoded.sub !== providerId) {
|
|
84
86
|
return { valid: false, reason: "claim_mismatch" };
|
|
85
87
|
}
|
|
@@ -8,6 +8,12 @@ export interface RateLimiterConfig {
|
|
|
8
8
|
windowMs: number;
|
|
9
9
|
minIntervalMs?: number;
|
|
10
10
|
}
|
|
11
|
+
export interface AcquireOptions {
|
|
12
|
+
/** When true, skip sliding-window / min-interval waits (caller still records after the request). */
|
|
13
|
+
skipWhen?: () => boolean;
|
|
14
|
+
/** Invoked once when `skipWhen()` returned true and the throttle was bypassed. */
|
|
15
|
+
onBypass?: () => void;
|
|
16
|
+
}
|
|
11
17
|
export declare class RateLimiter {
|
|
12
18
|
private readonly maxRequests;
|
|
13
19
|
private readonly windowMs;
|
|
@@ -18,6 +24,8 @@ export declare class RateLimiter {
|
|
|
18
24
|
record(): void;
|
|
19
25
|
get currentCount(): number;
|
|
20
26
|
canRequest(): boolean;
|
|
27
|
+
/** Wait until a request slot is available. Optional `skipWhen` bypasses the throttle entirely. */
|
|
28
|
+
acquire(options?: AcquireOptions): Promise<number>;
|
|
21
29
|
waitIfNeeded(): Promise<number>;
|
|
22
30
|
waitMs(): number;
|
|
23
31
|
reset(): void;
|
|
@@ -29,6 +37,8 @@ export declare class RateLimiter {
|
|
|
29
37
|
}
|
|
30
38
|
/** Shared rate limiter for SDK query calls — enforced globally across all workers. */
|
|
31
39
|
export declare const sdkQueryRateLimiter: RateLimiter;
|
|
40
|
+
/** Acquire SDK query slot. Skips the SDK sliding-window limiter when `CURSOR_PROXY_URL` is set (proxy has its own limiters). */
|
|
41
|
+
export declare function acquireSdkQueryRateLimit(): Promise<number>;
|
|
32
42
|
/** Shared rate limiter for Cursor proxy direct fetches — enforced globally. */
|
|
33
43
|
export declare const cursorProxyRateLimiter: RateLimiter;
|
|
34
44
|
/** Shared rate limiter for direct API endpoint calls — guards against rapid
|
|
@@ -38,12 +38,20 @@ export class RateLimiter {
|
|
|
38
38
|
return this.timestamps.length < this.maxRequests
|
|
39
39
|
&& (Date.now() - this.lastRequestAt) >= this.minIntervalMs;
|
|
40
40
|
}
|
|
41
|
-
|
|
41
|
+
/** Wait until a request slot is available. Optional `skipWhen` bypasses the throttle entirely. */
|
|
42
|
+
async acquire(options) {
|
|
43
|
+
if (options?.skipWhen?.()) {
|
|
44
|
+
options.onBypass?.();
|
|
45
|
+
return 0;
|
|
46
|
+
}
|
|
42
47
|
const waited = this.waitMs();
|
|
43
48
|
if (waited > 0)
|
|
44
49
|
await new Promise(r => setTimeout(r, waited));
|
|
45
50
|
return waited;
|
|
46
51
|
}
|
|
52
|
+
async waitIfNeeded() {
|
|
53
|
+
return this.acquire();
|
|
54
|
+
}
|
|
47
55
|
waitMs() {
|
|
48
56
|
this.evict();
|
|
49
57
|
const volumeWait = this.timestamps.length >= this.maxRequests
|
|
@@ -86,6 +94,15 @@ const _cursorProxyLimiter = new RateLimiter({ maxRequests: 4, windowMs: 10_000 }
|
|
|
86
94
|
const _apiEndpointLimiter = new RateLimiter({ maxRequests: 6, windowMs: 15_000, minIntervalMs: 1_000 });
|
|
87
95
|
/** Shared rate limiter for SDK query calls — enforced globally across all workers. */
|
|
88
96
|
export const sdkQueryRateLimiter = _sdkQueryLimiter;
|
|
97
|
+
/** Acquire SDK query slot. Skips the SDK sliding-window limiter when `CURSOR_PROXY_URL` is set (proxy has its own limiters). */
|
|
98
|
+
export async function acquireSdkQueryRateLimit() {
|
|
99
|
+
return _sdkQueryLimiter.acquire({
|
|
100
|
+
skipWhen: () => !!process.env.CURSOR_PROXY_URL,
|
|
101
|
+
onBypass: () => {
|
|
102
|
+
console.log("[rate-limiter] Skipping SDK rate limit (Cursor proxy has its own limiter)");
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
}
|
|
89
106
|
/** Shared rate limiter for Cursor proxy direct fetches — enforced globally. */
|
|
90
107
|
export const cursorProxyRateLimiter = _cursorProxyLimiter;
|
|
91
108
|
/** Shared rate limiter for direct API endpoint calls — guards against rapid
|
package/dist/core/token-cache.js
CHANGED
|
@@ -21,7 +21,7 @@ export function getCachedToken(providerId) {
|
|
|
21
21
|
const entry = tokenCache.get(providerId);
|
|
22
22
|
if (!entry)
|
|
23
23
|
return null;
|
|
24
|
-
if (
|
|
24
|
+
if (isSessionRevoked(entry.sessionId)) {
|
|
25
25
|
tokenCache.delete(providerId);
|
|
26
26
|
return null;
|
|
27
27
|
}
|
|
@@ -51,7 +51,7 @@ export function tryRefreshCachedToken(providerId, refresher) {
|
|
|
51
51
|
const entry = tokenCache.get(providerId);
|
|
52
52
|
if (!entry)
|
|
53
53
|
return null;
|
|
54
|
-
if (
|
|
54
|
+
if (isSessionRevoked(entry.sessionId)) {
|
|
55
55
|
tokenCache.delete(providerId);
|
|
56
56
|
return null;
|
|
57
57
|
}
|
|
@@ -99,11 +99,6 @@ export function clearRevocations() {
|
|
|
99
99
|
export function getRevocationCount() {
|
|
100
100
|
return revokedSessions.size;
|
|
101
101
|
}
|
|
102
|
-
/** Check if a session ID has been revoked, pruning expired entries first. */
|
|
103
|
-
function isRevoked(sessionId) {
|
|
104
|
-
pruneRevocations();
|
|
105
|
-
return revokedSessions.has(sessionId);
|
|
106
|
-
}
|
|
107
102
|
/** Remove expired revocation entries and enforce max size. */
|
|
108
103
|
function pruneRevocations() {
|
|
109
104
|
if (revokedSessions.size === 0)
|
|
@@ -65,10 +65,9 @@ export function refreshToken(oldToken, providerId) {
|
|
|
65
65
|
*/
|
|
66
66
|
export function verifyBearerToken(token, providerId) {
|
|
67
67
|
const result = verifyTokenWithResult(token, { providerId });
|
|
68
|
-
if (!result.valid)
|
|
68
|
+
if (!result.valid || !result.payload)
|
|
69
69
|
return result;
|
|
70
|
-
|
|
71
|
-
if (result.payload && isSessionRevoked(result.payload.jti)) {
|
|
70
|
+
if (isSessionRevoked(result.payload.jti)) {
|
|
72
71
|
return { valid: false, reason: "revoked" };
|
|
73
72
|
}
|
|
74
73
|
return result;
|
|
@@ -100,11 +99,15 @@ function tryPeekAndRevoke(providerId) {
|
|
|
100
99
|
* reducing false positives from unrelated 401/403 responses.
|
|
101
100
|
*/
|
|
102
101
|
export function isJWTAuthError(err) {
|
|
103
|
-
const msg = err instanceof Error
|
|
104
|
-
|
|
102
|
+
const msg = err instanceof Error
|
|
103
|
+
? err.message
|
|
104
|
+
: err && typeof err === "object" && "message" in err && typeof err.message === "string"
|
|
105
105
|
? err.message
|
|
106
106
|
: String(err);
|
|
107
107
|
const lower = msg.toLowerCase();
|
|
108
|
+
if (lower.includes("bearer") && lower.includes("token") && lower.includes("invalid")) {
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
108
111
|
// JWT-specific indicators (high confidence)
|
|
109
112
|
const jwtIndicators = [
|
|
110
113
|
"token expired", "invalid_token", "jwt", "signature",
|
package/dist/core/types.d.ts
CHANGED
|
@@ -88,6 +88,8 @@ export interface AgentState {
|
|
|
88
88
|
peakContextTokens?: number;
|
|
89
89
|
/** Resolved model this agent is running (task override or swarm default). */
|
|
90
90
|
model?: string;
|
|
91
|
+
/** Unix timestamp (ms) of the last assistant stream content (text, tool deltas, etc.). Used to detect SDK streams that yield no content. */
|
|
92
|
+
lastContentTimestamp?: number;
|
|
91
93
|
}
|
|
92
94
|
/** A timestamped log line from an agent's execution. */
|
|
93
95
|
export interface LogEntry {
|
|
@@ -173,8 +175,13 @@ export interface WaveSummary {
|
|
|
173
175
|
status: string;
|
|
174
176
|
type?: string;
|
|
175
177
|
filesChanged?: number;
|
|
178
|
+
toolCalls?: number;
|
|
176
179
|
error?: string;
|
|
177
180
|
}[];
|
|
181
|
+
/** Sum of `toolCalls` across all agents in this wave (diagnostics). */
|
|
182
|
+
totalToolCalls?: number;
|
|
183
|
+
/** Non-heal tasks landed 0 files but agents invoked tools — possible worktree/merge bug. */
|
|
184
|
+
suspectedInfraFailure?: boolean;
|
|
178
185
|
}
|
|
179
186
|
/** Result from the steering function. */
|
|
180
187
|
export interface SteerResult {
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { setPlannerEnvResolver } from "./planner/query.js";
|
|
|
8
8
|
import { setTranscriptRunDir } from "./core/transcripts.js";
|
|
9
9
|
import { pickModel, loadProviders, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, warnMacCursorAgentShellPatchIfNeeded, } from "./providers/index.js";
|
|
10
10
|
import { executeRun } from "./run/run.js";
|
|
11
|
-
import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
|
|
11
|
+
import { parseCliFlags, fetchModels, ask, select, selectKey, loadTaskFile, loadPlanFile, validateConcurrency, isGitRepo, validateGitRepo, showPlan, } from "./cli/cli.js";
|
|
12
12
|
import { loadRunState, findOrphanedDesigns, backfillOrphanedPlans, readPreviousRunKnowledge, createRunDir, updateLatestSymlink, } from "./state/state.js";
|
|
13
13
|
import { runSetupCoach, loadUserSettings, saveUserSettings, COACH_MODEL } from "./planner/coach/coach.js";
|
|
14
14
|
import { editRunSettings, formatSettingsSummary } from "./cli/settings.js";
|
|
@@ -63,11 +63,21 @@ async function main() {
|
|
|
63
63
|
// ── Load tasks ──
|
|
64
64
|
let tasks = [];
|
|
65
65
|
let fileCfg;
|
|
66
|
+
let planFileContent;
|
|
66
67
|
const jsonFiles = args.filter(a => a.endsWith(".json"));
|
|
68
|
+
const mdFiles = args.filter(a => a.endsWith(".md"));
|
|
67
69
|
if (jsonFiles.length > 1) {
|
|
68
70
|
console.error(chalk.red(` Multiple task files provided. Only one .json file is supported.`));
|
|
69
71
|
process.exit(1);
|
|
70
72
|
}
|
|
73
|
+
if (mdFiles.length > 1) {
|
|
74
|
+
console.error(chalk.red(` Multiple plan files provided. Only one .md file is supported.`));
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
if (jsonFiles.length && mdFiles.length) {
|
|
78
|
+
console.error(chalk.red(` Cannot mix a .json task file with a .md plan file.`));
|
|
79
|
+
process.exit(1);
|
|
80
|
+
}
|
|
71
81
|
for (const arg of args) {
|
|
72
82
|
if (arg.endsWith(".json")) {
|
|
73
83
|
if (tasks.length > 0) {
|
|
@@ -77,8 +87,13 @@ async function main() {
|
|
|
77
87
|
fileCfg = loadTaskFile(arg);
|
|
78
88
|
tasks = fileCfg.tasks;
|
|
79
89
|
}
|
|
90
|
+
else if (arg.endsWith(".md")) {
|
|
91
|
+
const plan = loadPlanFile(arg);
|
|
92
|
+
planFileContent = plan.planContent;
|
|
93
|
+
fileCfg = { tasks: [], objective: plan.objective, flexiblePlan: true };
|
|
94
|
+
}
|
|
80
95
|
else if (!arg.startsWith("-") && existsSync(resolve(arg))) {
|
|
81
|
-
console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json. Rename it or quote the string.`));
|
|
96
|
+
console.error(chalk.red(` "${arg}" looks like a file but doesn't end in .json or .md. Rename it or quote the string.`));
|
|
82
97
|
process.exit(1);
|
|
83
98
|
}
|
|
84
99
|
else {
|
|
@@ -341,6 +356,30 @@ async function main() {
|
|
|
341
356
|
console.log(chalk.dim(` ╰${"─".repeat(innerLen + 4)}╯`));
|
|
342
357
|
}
|
|
343
358
|
else {
|
|
359
|
+
// ── Setup coach in confirm-only mode (task/plan file on a TTY) ──
|
|
360
|
+
let coachResult = null;
|
|
361
|
+
if (fileCfg?.objective && process.stdin.isTTY
|
|
362
|
+
&& !argv.includes("--no-coach") && !loadUserSettings().skipCoach) {
|
|
363
|
+
const settings = loadUserSettings();
|
|
364
|
+
const cModel = settings.coachModel ?? COACH_MODEL;
|
|
365
|
+
const cProvider = settings.coachProviderId
|
|
366
|
+
? loadProviders().find(p => p.id === settings.coachProviderId) : undefined;
|
|
367
|
+
coachResult = await runSetupCoach(fileCfg.objective, cwd, {
|
|
368
|
+
providers: loadProviders(), cliFlags, coachModel: cModel, coachProvider: cProvider,
|
|
369
|
+
planContent: planFileContent, confirmOnly: true,
|
|
370
|
+
});
|
|
371
|
+
if (coachResult) {
|
|
372
|
+
coachedOriginal = fileCfg.objective;
|
|
373
|
+
coachedAt = Date.now();
|
|
374
|
+
fileCfg.objective = coachResult.improvedObjective;
|
|
375
|
+
objective = coachResult.improvedObjective;
|
|
376
|
+
const rec = coachResult.recommended;
|
|
377
|
+
if (fileCfg.concurrency == null)
|
|
378
|
+
fileCfg.concurrency = rec.concurrency;
|
|
379
|
+
if (fileCfg.usageCap == null && rec.usageCap != null)
|
|
380
|
+
fileCfg.usageCap = Math.round(rec.usageCap * 100);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
344
383
|
let models = [];
|
|
345
384
|
if (!cliFlags.model && !fileCfg?.model)
|
|
346
385
|
models = await fetchModels(5_000);
|
|
@@ -374,7 +413,7 @@ async function main() {
|
|
|
374
413
|
}
|
|
375
414
|
}
|
|
376
415
|
concurrency = cliFlags.concurrency ? parseInt(cliFlags.concurrency) : (fileCfg?.concurrency ?? 5);
|
|
377
|
-
budget = cliFlags.budget ? parseInt(cliFlags.budget) :
|
|
416
|
+
budget = cliFlags.budget ? parseInt(cliFlags.budget) : coachResult?.recommended.budget;
|
|
378
417
|
if (budget != null && (isNaN(budget) || budget < 1)) {
|
|
379
418
|
console.error(chalk.red(` --budget must be a positive integer`));
|
|
380
419
|
process.exit(1);
|
|
@@ -442,7 +481,8 @@ async function main() {
|
|
|
442
481
|
console.log(chalk.dim(` ${workerModel} concurrency=${concurrency} worktrees=${useWorktrees} merge=${mergeStrategy}${capStr}${extraStr}`));
|
|
443
482
|
}
|
|
444
483
|
// ── Plan phase ──
|
|
445
|
-
const
|
|
484
|
+
const flexFlag = argv.includes("--flex") ? true : argv.includes("--no-flex") ? false : undefined;
|
|
485
|
+
const flex = objective != null && (flexFlag ?? ((fileCfg?.flexiblePlan ?? true) && (budget ?? 10) > 2));
|
|
446
486
|
const agentTimeoutMs = cliFlags.timeout ? parseFloat(cliFlags.timeout) * 1000 : undefined;
|
|
447
487
|
let thinkingUsed = 0, thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
|
|
448
488
|
let thinkingHistory;
|
|
@@ -11,5 +11,9 @@ export interface CoachContext {
|
|
|
11
11
|
log?: PlannerLog;
|
|
12
12
|
coachModel?: string;
|
|
13
13
|
coachProvider?: ProviderConfig;
|
|
14
|
+
/** Full markdown plan content (e.g. from a .md plan file). Overrides URL fetching. */
|
|
15
|
+
planContent?: string;
|
|
16
|
+
/** When true, show only accept/skip and do not persist user settings. */
|
|
17
|
+
confirmOnly?: boolean;
|
|
14
18
|
}
|
|
15
19
|
export declare function runSetupCoach(rawObjective: string, cwd: string, ctx: CoachContext): Promise<CoachResult | null>;
|
|
@@ -47,13 +47,15 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
|
|
|
47
47
|
const facts = collectRepoFacts(cwd);
|
|
48
48
|
if (facts.srcFileCount > 1_000_000)
|
|
49
49
|
return null;
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
50
|
+
let planContent = ctx.planContent ?? null;
|
|
51
|
+
if (!planContent) {
|
|
52
|
+
const urls = rawObjective.match(URL_REGEX) ?? [];
|
|
53
|
+
if (urls.length > 0) {
|
|
54
|
+
const results = await Promise.all(urls.map(u => fetchUrlContent(u, 4_000)));
|
|
55
|
+
const fetched = results.filter(Boolean);
|
|
56
|
+
if (fetched.length > 0) {
|
|
57
|
+
planContent = fetched.map((c, i) => `[URL ${i + 1}: ${urls[i]}]\n${c}`).join("\n\n---\n\n");
|
|
58
|
+
}
|
|
57
59
|
}
|
|
58
60
|
}
|
|
59
61
|
const userMessage = renderRepoFacts(facts, rawObjective, ctx.providers, ctx.cliFlags, planContent);
|
|
@@ -120,14 +122,20 @@ export async function runSetupCoach(rawObjective, cwd, ctx) {
|
|
|
120
122
|
return null;
|
|
121
123
|
}
|
|
122
124
|
renderCoachBlock(result, elapsedMs, model);
|
|
123
|
-
const choice =
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
125
|
+
const choice = ctx.confirmOnly
|
|
126
|
+
? await selectKey("", [
|
|
127
|
+
{ key: "y", desc: " accept" },
|
|
128
|
+
{ key: "s", desc: "kip" },
|
|
129
|
+
])
|
|
130
|
+
: await selectKey("", [
|
|
131
|
+
{ key: "y", desc: " accept" },
|
|
132
|
+
{ key: "e", desc: "dit objective" },
|
|
133
|
+
{ key: "s", desc: "kip coach" },
|
|
134
|
+
{ key: "x", desc: " skip coach forever" },
|
|
135
|
+
]);
|
|
129
136
|
if (choice === "y") {
|
|
130
|
-
|
|
137
|
+
if (!ctx.confirmOnly)
|
|
138
|
+
saveUserSettings({ ...loadUserSettings(), lastCoachedAt: Date.now() });
|
|
131
139
|
return result;
|
|
132
140
|
}
|
|
133
141
|
if (choice === "e") {
|
package/dist/planner/query.js
CHANGED
|
@@ -3,7 +3,7 @@ import { NudgeError, extractToolTarget, sumUsageTokens } from "../core/types.js"
|
|
|
3
3
|
import { writeTranscriptEvent } from "../core/transcripts.js";
|
|
4
4
|
import { getTurn, updateTurn } from "../core/turns.js";
|
|
5
5
|
import { isRateLimitError, throttlePlanner, addPlannerCost, recordPeakContext, resetPlannerRateLimit, setContextTokens, applyRateLimitEvent, getPlannerRateLimitInfo, } from "./throttle.js";
|
|
6
|
-
import { cursorProxyRateLimiter, sdkQueryRateLimiter, apiEndpointLimiter } from "../core/rate-limiter.js";
|
|
6
|
+
import { cursorProxyRateLimiter, sdkQueryRateLimiter, apiEndpointLimiter, acquireSdkQueryRateLimit } from "../core/rate-limiter.js";
|
|
7
7
|
export { getTotalPlannerCost, getPeakPlannerContext, getPlannerRateLimitInfo, } from "./throttle.js";
|
|
8
8
|
export { attemptJsonParse, extractTaskJson } from "./json.js";
|
|
9
9
|
export { postProcess } from "./postprocess.js";
|
|
@@ -126,7 +126,7 @@ async function runPlannerQueryOnce(prompt, opts, onLog) {
|
|
|
126
126
|
promptBytes: prompt.length,
|
|
127
127
|
});
|
|
128
128
|
}
|
|
129
|
-
await
|
|
129
|
+
await acquireSdkQueryRateLimit();
|
|
130
130
|
const pq = query({
|
|
131
131
|
prompt,
|
|
132
132
|
options: {
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { Task, SteerResult, WaveSummary } from "../core/types.js";
|
|
2
|
+
import { type PlannerLog } from "./query.js";
|
|
3
|
+
export declare const VERIFY_SCHEMA: {
|
|
4
|
+
type: "json_schema";
|
|
5
|
+
schema: {
|
|
6
|
+
type: string;
|
|
7
|
+
properties: {
|
|
8
|
+
done: {
|
|
9
|
+
type: string;
|
|
10
|
+
};
|
|
11
|
+
reasoning: {
|
|
12
|
+
type: string;
|
|
13
|
+
};
|
|
14
|
+
statusUpdate: {
|
|
15
|
+
type: string;
|
|
16
|
+
};
|
|
17
|
+
estimatedSessionsRemaining: {
|
|
18
|
+
type: string;
|
|
19
|
+
};
|
|
20
|
+
verifiedCount: {
|
|
21
|
+
type: string;
|
|
22
|
+
};
|
|
23
|
+
retryCount: {
|
|
24
|
+
type: string;
|
|
25
|
+
};
|
|
26
|
+
tasks: {
|
|
27
|
+
type: string;
|
|
28
|
+
items: {
|
|
29
|
+
type: string;
|
|
30
|
+
properties: {
|
|
31
|
+
prompt: {
|
|
32
|
+
type: string;
|
|
33
|
+
};
|
|
34
|
+
model: {
|
|
35
|
+
type: string;
|
|
36
|
+
};
|
|
37
|
+
noWorktree: {
|
|
38
|
+
type: string;
|
|
39
|
+
};
|
|
40
|
+
type: {
|
|
41
|
+
type: string;
|
|
42
|
+
enum: string[];
|
|
43
|
+
};
|
|
44
|
+
postcondition: {
|
|
45
|
+
type: string;
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
required: string[];
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
};
|
|
52
|
+
required: string[];
|
|
53
|
+
};
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Verify the previous wave and compose the next fixed batch of pending tasks.
|
|
57
|
+
*
|
|
58
|
+
* Unlike `steerWave`, the verifier does not invent new tasks — it:
|
|
59
|
+
* 1. Runs the project's build/smoke checks.
|
|
60
|
+
* 2. Fixes shallow regressions in the last wave (edits directly).
|
|
61
|
+
* 3. Picks the next N pending tasks from the user's fixed plan.
|
|
62
|
+
*
|
|
63
|
+
* The model has full tool access so it can actually repair broken commits,
|
|
64
|
+
* not just report on them.
|
|
65
|
+
*/
|
|
66
|
+
export declare function verifyWave(objective: string, pendingTasks: Task[], lastWave: WaveSummary | undefined, remainingBudget: number, cwd: string, plannerModel: string, concurrency: number, onLog: PlannerLog, transcriptName?: string): Promise<SteerResult>;
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { runPlannerQuery, attemptJsonParse, postProcess } from "./query.js";
|
|
2
|
+
import { createTurn, beginTurn, endTurn } from "../core/turns.js";
|
|
3
|
+
// Verifier schema — same shape as STEER_SCHEMA plus a `verifiedIds` list so
|
|
4
|
+
// the wave-loop can tell which of the prior wave's tasks actually shipped.
|
|
5
|
+
export const VERIFY_SCHEMA = {
|
|
6
|
+
type: "json_schema",
|
|
7
|
+
schema: {
|
|
8
|
+
type: "object",
|
|
9
|
+
properties: {
|
|
10
|
+
done: { type: "boolean" },
|
|
11
|
+
reasoning: { type: "string" },
|
|
12
|
+
statusUpdate: { type: "string" },
|
|
13
|
+
estimatedSessionsRemaining: { type: "number" },
|
|
14
|
+
verifiedCount: { type: "number" },
|
|
15
|
+
retryCount: { type: "number" },
|
|
16
|
+
tasks: {
|
|
17
|
+
type: "array",
|
|
18
|
+
items: {
|
|
19
|
+
type: "object",
|
|
20
|
+
properties: {
|
|
21
|
+
prompt: { type: "string" },
|
|
22
|
+
model: { type: "string" },
|
|
23
|
+
noWorktree: { type: "boolean" },
|
|
24
|
+
type: { type: "string", enum: ["execute", "explore", "critique", "synthesize", "verify", "user-test", "polish"] },
|
|
25
|
+
postcondition: { type: "string" },
|
|
26
|
+
},
|
|
27
|
+
required: ["prompt"],
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
},
|
|
31
|
+
required: ["done", "tasks", "reasoning", "statusUpdate", "estimatedSessionsRemaining"],
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
function renderLastWave(w) {
|
|
35
|
+
if (!w)
|
|
36
|
+
return "(first wave — nothing to verify yet)";
|
|
37
|
+
const lines = w.tasks.map(t => {
|
|
38
|
+
const files = t.filesChanged ? ` (${t.filesChanged} files)` : " (0 files)";
|
|
39
|
+
const err = t.error ? ` — ${t.error}` : "";
|
|
40
|
+
return ` - [${t.status}] ${t.prompt.slice(0, 160)}${files}${err}`;
|
|
41
|
+
}).join("\n");
|
|
42
|
+
return `Wave ${w.wave + 1}:\n${lines}`;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Verify the previous wave and compose the next fixed batch of pending tasks.
|
|
46
|
+
*
|
|
47
|
+
* Unlike `steerWave`, the verifier does not invent new tasks — it:
|
|
48
|
+
* 1. Runs the project's build/smoke checks.
|
|
49
|
+
* 2. Fixes shallow regressions in the last wave (edits directly).
|
|
50
|
+
* 3. Picks the next N pending tasks from the user's fixed plan.
|
|
51
|
+
*
|
|
52
|
+
* The model has full tool access so it can actually repair broken commits,
|
|
53
|
+
* not just report on them.
|
|
54
|
+
*/
|
|
55
|
+
export async function verifyWave(objective, pendingTasks, lastWave, remainingBudget, cwd, plannerModel, concurrency, onLog, transcriptName = "verify") {
|
|
56
|
+
const pendingList = pendingTasks.length > 0
|
|
57
|
+
? pendingTasks.map((t, i) => ` ${i + 1}. ${t.prompt.slice(0, 200)}`).join("\n")
|
|
58
|
+
: "(none — every task from the original plan has been attempted)";
|
|
59
|
+
const prompt = `You are the verifier + fix gate between waves of a fixed-plan execution.
|
|
60
|
+
|
|
61
|
+
Objective: ${objective}
|
|
62
|
+
|
|
63
|
+
## What just happened
|
|
64
|
+
${renderLastWave(lastWave)}
|
|
65
|
+
|
|
66
|
+
## Remaining plan (pending tasks, in order)
|
|
67
|
+
${pendingList}
|
|
68
|
+
|
|
69
|
+
## Your job
|
|
70
|
+
|
|
71
|
+
1. Run the project's build and smoke checks. Use the tools you have (Bash, Read, Grep, Edit, Write).
|
|
72
|
+
2. For any regression the last wave introduced, make the fix directly. Don't delegate a fix to the next wave if you can do it in two edits.
|
|
73
|
+
3. Compose the next batch of pending tasks to dispatch — pick tasks with non-overlapping file scopes so ${concurrency} can run in parallel.
|
|
74
|
+
4. If the plan is complete AND the build passes AND one verify task has confirmed the app runs, set done=true.
|
|
75
|
+
|
|
76
|
+
## Output
|
|
77
|
+
|
|
78
|
+
Respond with ONLY a JSON object (no markdown fences):
|
|
79
|
+
{"done":boolean,"reasoning":"...","statusUpdate":"REQUIRED","estimatedSessionsRemaining":N,"verifiedCount":N,"retryCount":N,"tasks":[{"prompt":"...","type":"execute","postcondition":"..."}]}
|
|
80
|
+
|
|
81
|
+
Remaining budget: ${remainingBudget} agent sessions. Include retries inside tasks[] (same format) if a pending step needs a second attempt with corrected context.`;
|
|
82
|
+
onLog("Verifying last wave…", "status");
|
|
83
|
+
const turn = createTurn("steer", `Verify wave`, `verify-${lastWave?.wave ?? 0}`, plannerModel);
|
|
84
|
+
beginTurn(turn);
|
|
85
|
+
const resultText = await runPlannerQuery(prompt, {
|
|
86
|
+
cwd, model: plannerModel, outputFormat: VERIFY_SCHEMA,
|
|
87
|
+
transcriptName, turnId: turn.id, maxTurns: 80,
|
|
88
|
+
}, onLog);
|
|
89
|
+
const parsed = attemptJsonParse(resultText);
|
|
90
|
+
if (!parsed) {
|
|
91
|
+
endTurn(turn, "error");
|
|
92
|
+
throw new Error(`Could not parse verifier response (${resultText.length} chars): ${resultText.slice(0, 120)}`);
|
|
93
|
+
}
|
|
94
|
+
const isDone = parsed.done === true;
|
|
95
|
+
const statusUpdate = parsed.statusUpdate || undefined;
|
|
96
|
+
const estRaw = parsed.estimatedSessionsRemaining;
|
|
97
|
+
const estimatedSessionsRemaining = typeof estRaw === "number" && estRaw >= 0 ? Math.round(estRaw) : undefined;
|
|
98
|
+
let tasks = (parsed.tasks || []).map((t, i) => ({
|
|
99
|
+
id: String(i),
|
|
100
|
+
prompt: typeof t === "string" ? t : t.prompt,
|
|
101
|
+
...(t.noWorktree && { noWorktree: true }),
|
|
102
|
+
...(t.type && { type: t.type }),
|
|
103
|
+
...(typeof t.postcondition === "string" && t.postcondition.trim() && { postcondition: t.postcondition.trim() }),
|
|
104
|
+
}));
|
|
105
|
+
tasks = postProcess(tasks, remainingBudget, onLog);
|
|
106
|
+
endTurn(turn, tasks.length === 0 && !isDone ? "error" : "done");
|
|
107
|
+
if (isDone) {
|
|
108
|
+
return {
|
|
109
|
+
done: true, tasks: [], reasoning: parsed.reasoning || "Plan complete and verified",
|
|
110
|
+
statusUpdate, estimatedSessionsRemaining: estimatedSessionsRemaining ?? 0,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
return {
|
|
114
|
+
done: tasks.length === 0, tasks,
|
|
115
|
+
reasoning: parsed.reasoning || "", statusUpdate, estimatedSessionsRemaining,
|
|
116
|
+
};
|
|
117
|
+
}
|
package/dist/providers/index.js
CHANGED
|
@@ -11,7 +11,7 @@ import { DEFAULT_MODEL } from "../core/models.js";
|
|
|
11
11
|
import { isCursorProxyProvider, resolveCursorAgentToken, cachedAgentPaths, } from "./cursor-env.js";
|
|
12
12
|
import { preflightCursorProxyViaHttp } from "./cursor-proxy.js";
|
|
13
13
|
import { pickCursorModel } from "./cursor-picker.js";
|
|
14
|
-
import { sdkQueryRateLimiter } from "../core/rate-limiter.js";
|
|
14
|
+
import { sdkQueryRateLimiter, acquireSdkQueryRateLimit } from "../core/rate-limiter.js";
|
|
15
15
|
// Re-export Cursor utilities so callers can keep a single import point.
|
|
16
16
|
export { PROXY_DEFAULT_URL, isCursorProxyProvider, bundledComposerProxyShellCommand, readCursorProxyLogTail, warnMacCursorAgentShellPatchIfNeeded, hasCursorAgentToken, getCursorAgentToken, } from "./cursor-env.js";
|
|
17
17
|
export { healthCheckCursorProxy, ensureCursorProxyRunning } from "./cursor-proxy.js";
|
|
@@ -243,7 +243,7 @@ export async function preflightProvider(p, cwd, timeoutMs = 20_000, opts) {
|
|
|
243
243
|
let pq;
|
|
244
244
|
const rl = sdkQueryRateLimiter;
|
|
245
245
|
try {
|
|
246
|
-
await
|
|
246
|
+
await acquireSdkQueryRateLimit();
|
|
247
247
|
pq = query({
|
|
248
248
|
prompt: "Reply with exactly the word ok and nothing else.",
|
|
249
249
|
options: {
|