claude-overnight 1.19.0 → 1.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_version.d.ts +1 -0
- package/dist/_version.js +2 -0
- package/dist/cursor-models.js +5 -7
- package/dist/index.js +23 -12
- package/dist/models.d.ts +20 -24
- package/dist/models.js +86 -54
- package/dist/planner-query.d.ts +0 -2
- package/dist/planner-query.js +1 -4
- package/dist/planner.d.ts +1 -1
- package/dist/planner.js +20 -43
- package/dist/providers.d.ts +26 -1
- package/dist/providers.js +458 -93
- package/dist/run.js +46 -85
- package/dist/steering.js +4 -3
- package/dist/swarm.d.ts +2 -0
- package/dist/swarm.js +13 -1
- package/dist/types.d.ts +1 -1
- package/package.json +2 -2
- package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const VERSION = "1.23.0";
|
package/dist/_version.js
ADDED
package/dist/cursor-models.js
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
// The `priority` models always appear at the top of the picker in this order.
|
|
12
12
|
// `known` models appear after them. Anything the proxy returns dynamically
|
|
13
13
|
// that isn't in this list goes into a "more..." sub-menu.
|
|
14
|
-
import {
|
|
14
|
+
import { modelDisplayName, formatContextWindow } from "./models.js";
|
|
15
15
|
export const CURSOR_PRIORITY_MODELS = [
|
|
16
16
|
{ id: "composer-2", label: "composer-2", hint: "Cursor Composer 2 — latest, strongest Cursor model" },
|
|
17
17
|
{ id: "composer-2-fast", label: "composer-2-fast", hint: "Cursor Composer 2 Fast — faster, cheaper variant" },
|
|
@@ -34,12 +34,10 @@ export function cursorModelHint(modelId) {
|
|
|
34
34
|
}
|
|
35
35
|
if (m.startsWith("composer"))
|
|
36
36
|
return "Cursor Composer model";
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if (
|
|
40
|
-
return
|
|
41
|
-
if (m.includes("haiku"))
|
|
42
|
-
return CURSOR_MODEL_HINTS.haiku;
|
|
37
|
+
// For Claude variants through Cursor, use the shared display name
|
|
38
|
+
const displayName = modelDisplayName(modelId);
|
|
39
|
+
if (displayName !== modelId)
|
|
40
|
+
return `${displayName} via Cursor · ${formatContextWindow(modelId)} context`;
|
|
43
41
|
if (m.startsWith("gpt-5"))
|
|
44
42
|
return "GPT model via Cursor";
|
|
45
43
|
if (m.startsWith("gemini"))
|
package/dist/index.js
CHANGED
|
@@ -2,16 +2,14 @@
|
|
|
2
2
|
import { readFileSync, existsSync, readdirSync, mkdirSync } from "fs";
|
|
3
3
|
import { resolve, dirname, join } from "path";
|
|
4
4
|
import { fileURLToPath } from "url";
|
|
5
|
-
import { createRequire } from "module";
|
|
6
5
|
import chalk from "chalk";
|
|
7
|
-
|
|
8
|
-
const VERSION = pkg.version;
|
|
6
|
+
import { VERSION } from "./_version.js";
|
|
9
7
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
10
8
|
import { Swarm } from "./swarm.js";
|
|
11
9
|
import { planTasks, refinePlan, identifyThemes, buildThinkingTasks, orchestrate, salvageFromFile } from "./planner.js";
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import { pickModel, loadProviders, preflightProvider, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider } from "./providers.js";
|
|
10
|
+
import { modelDisplayName, formatContextWindow, DEFAULT_MODEL } from "./models.js";
|
|
11
|
+
import { setPlannerEnvResolver } from "./planner-query.js";
|
|
12
|
+
import { pickModel, loadProviders, preflightProvider, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, ensureCursorProxyRunning } from "./providers.js";
|
|
15
13
|
import { RunDisplay } from "./ui.js";
|
|
16
14
|
import { renderSummary } from "./render.js";
|
|
17
15
|
import { executeRun } from "./run.js";
|
|
@@ -69,7 +67,6 @@ async function promptResumeOverrides(state, cliFlags, argv, noTTY, runDir) {
|
|
|
69
67
|
const modelsPromise = fetchModels(20_000).catch(() => []);
|
|
70
68
|
// ── Interactive review ──
|
|
71
69
|
const fmtSummary = () => {
|
|
72
|
-
const tier = detectModelTier(state.workerModel);
|
|
73
70
|
const remaining = Math.max(1, state.remaining);
|
|
74
71
|
const capStr = state.usageCap != null ? `${Math.round(state.usageCap * 100)}%` : "unlimited";
|
|
75
72
|
const extraStr = state.allowExtraUsage
|
|
@@ -78,7 +75,7 @@ async function promptResumeOverrides(state, cliFlags, argv, noTTY, runDir) {
|
|
|
78
75
|
console.log();
|
|
79
76
|
console.log(` ${chalk.dim("Resume settings")}`);
|
|
80
77
|
console.log(` ${chalk.dim("─".repeat(40))}`);
|
|
81
|
-
console.log(` ${chalk.dim("model ")}${chalk.white(state.workerModel)} ${chalk.dim(`(${
|
|
78
|
+
console.log(` ${chalk.dim("model ")}${chalk.white(state.workerModel)} ${chalk.dim(`(${formatContextWindow(state.workerModel)} context)`)}`);
|
|
82
79
|
console.log(` ${chalk.dim("remaining ")}${chalk.white(String(remaining))} ${chalk.dim("sessions")}`);
|
|
83
80
|
console.log(` ${chalk.dim("concur ")}${chalk.white(String(state.concurrency))}`);
|
|
84
81
|
console.log(` ${chalk.dim("usage cap ")}${chalk.white(capStr)}`);
|
|
@@ -627,11 +624,11 @@ async function main() {
|
|
|
627
624
|
}
|
|
628
625
|
const parts = [];
|
|
629
626
|
if (fastModel)
|
|
630
|
-
parts.push(`${
|
|
627
|
+
parts.push(`${modelDisplayName(plannerModel)} → ${modelDisplayName(workerModel)} + ${modelDisplayName(fastModel)}`);
|
|
631
628
|
else if (workerModel !== plannerModel)
|
|
632
|
-
parts.push(`${
|
|
629
|
+
parts.push(`${modelDisplayName(workerModel)} → ${modelDisplayName(plannerModel)}`);
|
|
633
630
|
else
|
|
634
|
-
parts.push(
|
|
631
|
+
parts.push(modelDisplayName(workerModel));
|
|
635
632
|
parts.push(`budget ${budget}`, `${concurrency}×`);
|
|
636
633
|
if (budget > 2)
|
|
637
634
|
parts.push("flex");
|
|
@@ -751,18 +748,31 @@ async function main() {
|
|
|
751
748
|
["fast", fastProvider],
|
|
752
749
|
];
|
|
753
750
|
const pending = [];
|
|
751
|
+
const cursorProxies = [];
|
|
754
752
|
for (const [role, p] of all) {
|
|
755
753
|
if (p && !seen.has(p.id)) {
|
|
756
754
|
seen.add(p.id);
|
|
757
755
|
pending.push([role, p]);
|
|
756
|
+
if (isCursorProxyProvider(p))
|
|
757
|
+
cursorProxies.push(p);
|
|
758
758
|
}
|
|
759
759
|
}
|
|
760
|
+
// Auto-start cursor proxy before pinging
|
|
761
|
+
if (cursorProxies.length > 0) {
|
|
762
|
+
await ensureCursorProxyRunning();
|
|
763
|
+
}
|
|
760
764
|
process.stdout.write(` ${chalk.dim(`◆ Pinging ${pending.map(([r, p]) => `${r} (${p.displayName})`).join(", ")}...`)}\n`);
|
|
761
765
|
const results = await Promise.all(pending.map(async ([role, p]) => ({ role, provider: p, result: await preflightProvider(p, cwd) })));
|
|
762
766
|
for (const { role, provider, result } of results) {
|
|
763
767
|
if (!result.ok) {
|
|
764
768
|
console.error(chalk.red(` ✗ ${role} preflight failed: ${chalk.dim(result.error)}`));
|
|
765
|
-
|
|
769
|
+
if (isCursorProxyProvider(provider)) {
|
|
770
|
+
console.error(chalk.yellow(` The proxy at ${PROXY_DEFAULT_URL} may have crashed. Start it: npx cursor-api-proxy`));
|
|
771
|
+
}
|
|
772
|
+
else {
|
|
773
|
+
console.error(chalk.red(` Fix the provider at ~/.claude/claude-overnight/providers.json and retry.`));
|
|
774
|
+
}
|
|
775
|
+
console.error("");
|
|
766
776
|
process.exit(1);
|
|
767
777
|
}
|
|
768
778
|
console.log(` ${chalk.green(`✓ ${role} ready`)} ${chalk.dim(`· ${provider.displayName} · ${provider.model}`)}`);
|
|
@@ -899,6 +909,7 @@ async function main() {
|
|
|
899
909
|
tasks: thinkingTasks, concurrency, cwd, model: plannerModel, permissionMode,
|
|
900
910
|
useWorktrees: false, mergeStrategy: "yolo", agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
|
|
901
911
|
envForModel,
|
|
912
|
+
cursorProxy: [plannerProvider, workerProvider, fastProvider].some(p => p && isCursorProxyProvider(p)),
|
|
902
913
|
});
|
|
903
914
|
const thinkRunInfo = { accIn: 0, accOut: 0, accCost: 0, accCompleted: 0, accFailed: 0, sessionsBudget: budget ?? 10, waveNum: -1, remaining: budget ?? 10, model: plannerModel, startedAt: Date.now() };
|
|
904
915
|
const thinkDisplay = new RunDisplay(thinkRunInfo, { remaining: 0, usageCap, concurrency, paused: false, dirty: false });
|
package/dist/models.d.ts
CHANGED
|
@@ -1,27 +1,23 @@
|
|
|
1
|
-
export
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export interface TierDetectionRule {
|
|
7
|
-
match: (model: string) => boolean;
|
|
8
|
-
tier: ModelTier;
|
|
1
|
+
export interface ModelCapability {
|
|
2
|
+
contextWindow: number;
|
|
3
|
+
contextConstraint: "tight" | "moderate" | "relaxed";
|
|
4
|
+
/** Human-readable label for UI display. Falls back to the model key if absent. */
|
|
5
|
+
displayName?: string;
|
|
9
6
|
}
|
|
10
|
-
export declare const
|
|
11
|
-
export declare function detectModelTier(model: string): ModelTier;
|
|
12
|
-
export declare const MODEL_CAPABILITY_DESCRIPTIONS: Record<ModelTier, string>;
|
|
13
|
-
export declare const UNKNOWN_MODEL_CAPABILITIES: Record<string, string>;
|
|
14
|
-
export declare function modelCapabilityBlock(model: string): string;
|
|
7
|
+
export declare const MODEL_CAPABILITIES: Record<string, ModelCapability>;
|
|
15
8
|
export declare const DEFAULT_MODEL = "claude-sonnet-4-6";
|
|
16
9
|
export declare const FALLBACK_MODEL = "claude-opus-4-6";
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Find capability info for a model string. Tries: exact match → lowercase
|
|
12
|
+
* exact → substring match. Falls back to "unknown" entry.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getModelCapability(model: string): ModelCapability;
|
|
15
|
+
/** Human-readable model name for display (e.g. in run labels). */
|
|
16
|
+
export declare function modelDisplayName(model: string): string;
|
|
17
|
+
/**
|
|
18
|
+
* Context constraint instruction injected into planner prompts.
|
|
19
|
+
* Tells the planner how to scope tasks based on the worker model's context.
|
|
20
|
+
*/
|
|
21
|
+
export declare function contextConstraintNote(model: string): string;
|
|
22
|
+
/** Format context window for display (e.g. "256K"). */
|
|
23
|
+
export declare function formatContextWindow(model: string): string;
|
package/dist/models.js
CHANGED
|
@@ -1,60 +1,92 @@
|
|
|
1
|
-
// ── Model
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
export
|
|
1
|
+
// ── Model capability catalog ──
|
|
2
|
+
//
|
|
3
|
+
// Single source of truth for model capabilities. Update this when new models
|
|
4
|
+
// arrive (which happens basically daily). Each entry describes what the model
|
|
5
|
+
// can handle in terms of context and task scoping.
|
|
6
|
+
//
|
|
7
|
+
// contextConstraint:
|
|
8
|
+
// "tight" — small context window. Model is lazy and error-prone on big
|
|
9
|
+
// tasks. Needs surgical, hyper-specific instructions.
|
|
10
|
+
// "moderate" — decent context. Can handle focused missions but may lose
|
|
11
|
+
// thread on sprawling codebases.
|
|
12
|
+
// "relaxed" — large context. Can read most of the codebase at once,
|
|
13
|
+
// reliably own multi-file features with autonomy.
|
|
14
|
+
export const MODEL_CAPABILITIES = {
|
|
15
|
+
// ── Anthropic Claude 4.5 / 4.6 ──
|
|
16
|
+
"claude-sonnet-4-6": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Sonnet 4.6" },
|
|
17
|
+
"claude-sonnet-4-5": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Sonnet 4.5" },
|
|
18
|
+
"claude-opus-4-6": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4.6" },
|
|
19
|
+
"claude-opus-4-5": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4.5" },
|
|
20
|
+
"claude-opus-4-20250514": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4" },
|
|
21
|
+
"claude-haiku-4-5": { contextWindow: 200_000, contextConstraint: "moderate", displayName: "Haiku 4.5" },
|
|
22
|
+
"claude-haiku-4-5-20251001": { contextWindow: 200_000, contextConstraint: "moderate", displayName: "Haiku 4.5" },
|
|
23
|
+
// ── Cursor models ──
|
|
24
|
+
"auto": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Cursor Auto" },
|
|
25
|
+
"composer-2": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Composer 2" },
|
|
26
|
+
"composer-2-fast": { contextWindow: 128_000, contextConstraint: "moderate", displayName: "Composer 2 Fast" },
|
|
27
|
+
"composer": { contextWindow: 128_000, contextConstraint: "moderate", displayName: "Composer" },
|
|
28
|
+
// ── Qwen (via DashScope / custom provider) ──
|
|
29
|
+
"qwen3.6-plus": { contextWindow: 131_072, contextConstraint: "moderate", displayName: "Qwen 3.6 Plus" },
|
|
30
|
+
"qwen3-coder": { contextWindow: 262_144, contextConstraint: "relaxed", displayName: "Qwen 3 Coder" },
|
|
31
|
+
"qwen-max": { contextWindow: 32_768, contextConstraint: "tight", displayName: "Qwen Max" },
|
|
32
|
+
// ── Fallback for unknown models ──
|
|
33
|
+
"unknown": { contextWindow: 128_000, contextConstraint: "moderate" },
|
|
34
|
+
};
|
|
35
|
+
// ── Default / fallback models ──
|
|
36
|
+
export const DEFAULT_MODEL = "claude-sonnet-4-6";
|
|
37
|
+
export const FALLBACK_MODEL = "claude-opus-4-6"; // used for planner + worker recovery
|
|
38
|
+
// ── Lookup ──
|
|
39
|
+
/**
|
|
40
|
+
* Find capability info for a model string. Tries: exact match → lowercase
|
|
41
|
+
* exact → substring match. Falls back to "unknown" entry.
|
|
42
|
+
*/
|
|
43
|
+
export function getModelCapability(model) {
|
|
15
44
|
const m = model.toLowerCase();
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
45
|
+
if (MODEL_CAPABILITIES[m])
|
|
46
|
+
return MODEL_CAPABILITIES[m];
|
|
47
|
+
if (MODEL_CAPABILITIES[model])
|
|
48
|
+
return MODEL_CAPABILITIES[model];
|
|
49
|
+
for (const [key, cap] of Object.entries(MODEL_CAPABILITIES)) {
|
|
50
|
+
if (key !== "unknown" && m.includes(key))
|
|
51
|
+
return cap;
|
|
19
52
|
}
|
|
20
|
-
return
|
|
53
|
+
return MODEL_CAPABILITIES.unknown;
|
|
21
54
|
}
|
|
22
|
-
|
|
23
|
-
export
|
|
24
|
-
opus: "Each agent runs Claude Opus with 1M context -- a powerhouse. It can own entire epics, do deep codebase research, make architectural decisions, implement complex multi-file systems end-to-end, use browser tools for analysis, and deliver expert-level work. These agents can work for 30+ minutes on the most complex tasks. Do NOT waste them on trivial edits -- give them ownership and autonomy.",
|
|
25
|
-
sonnet: "Each agent runs Claude Sonnet -- capable of substantial implementation, refactoring, testing, and design work. Can work autonomously for 10-20 minutes on complex tasks. Give agents meaningful scope -- not just single-line edits.",
|
|
26
|
-
haiku: "Each agent runs Claude Haiku -- fast and efficient, best for focused, well-specified tasks. Be explicit about files, functions, and expected changes. Keep tasks scoped to a clear, concrete deliverable.",
|
|
27
|
-
unknown: "", // handled by UNKNOWN_MODEL_CAPABILITIES below
|
|
28
|
-
};
|
|
29
|
-
export const UNKNOWN_MODEL_CAPABILITIES = {
|
|
30
|
-
composer: "Each agent runs a Cursor Composer model with full codebase access. Capable of focused implementation work. Be explicit about files, functions, and expected changes.",
|
|
31
|
-
"gpt-5": "Each agent runs a GPT model via Cursor with full codebase access. Capable of focused implementation work. Be explicit about files, functions, and expected changes.",
|
|
32
|
-
gemini: "Each agent runs a Gemini model via Cursor with full codebase access. Be explicit about files, functions, and expected changes.",
|
|
33
|
-
grok: "Each agent runs a Grok model via Cursor with full codebase access. Be explicit about files, functions, and expected changes.",
|
|
34
|
-
};
|
|
35
|
-
export function modelCapabilityBlock(model) {
|
|
36
|
-
const tier = detectModelTier(model);
|
|
37
|
-
const cap = MODEL_CAPABILITY_DESCRIPTIONS[tier];
|
|
38
|
-
if (cap)
|
|
39
|
-
return cap;
|
|
55
|
+
/** Human-readable model name for display (e.g. in run labels). */
|
|
56
|
+
export function modelDisplayName(model) {
|
|
40
57
|
const m = model.toLowerCase();
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
58
|
+
// Exact match
|
|
59
|
+
if (MODEL_CAPABILITIES[m]?.displayName)
|
|
60
|
+
return MODEL_CAPABILITIES[m].displayName;
|
|
61
|
+
if (MODEL_CAPABILITIES[model]?.displayName)
|
|
62
|
+
return MODEL_CAPABILITIES[model].displayName;
|
|
63
|
+
// Substring match
|
|
64
|
+
for (const [key, cap] of Object.entries(MODEL_CAPABILITIES)) {
|
|
65
|
+
if (key !== "unknown" && m.includes(key) && cap.displayName)
|
|
66
|
+
return cap.displayName;
|
|
44
67
|
}
|
|
45
|
-
return
|
|
68
|
+
return model;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Context constraint instruction injected into planner prompts.
|
|
72
|
+
* Tells the planner how to scope tasks based on the worker model's context.
|
|
73
|
+
*/
|
|
74
|
+
export function contextConstraintNote(model) {
|
|
75
|
+
const cap = getModelCapability(model);
|
|
76
|
+
const ctx = Math.round(cap.contextWindow / 1000);
|
|
77
|
+
switch (cap.contextConstraint) {
|
|
78
|
+
case "tight":
|
|
79
|
+
return `Worker agents have a TIGHT context window (~${ctx}K tokens). They are prone losing thread on large tasks. Be hyper-specific: name exact files, functions, and changes. One narrow deliverable per task. No ambiguity.`;
|
|
80
|
+
case "moderate":
|
|
81
|
+
return `Worker agents have a moderate context window (~${ctx}K tokens). They can handle focused missions but may struggle with sprawling codebases. Be specific about files and expected outcomes. Scope tasks to clear, concrete deliverables.`;
|
|
82
|
+
case "relaxed":
|
|
83
|
+
return `Worker agents have a large context window (~${ctx}K tokens). They can read most of the codebase at once and reliably own multi-file features. Give them missions with full autonomy — "Design and implement X" not "edit line 42 of Y.ts".`;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
/** Format context window for display (e.g. "256K"). */
|
|
87
|
+
export function formatContextWindow(model) {
|
|
88
|
+
const cap = getModelCapability(model);
|
|
89
|
+
if (cap.contextWindow >= 1_000_000)
|
|
90
|
+
return `${(cap.contextWindow / 1_000_000).toFixed(1)}M`;
|
|
91
|
+
return `${Math.round(cap.contextWindow / 1000)}K`;
|
|
46
92
|
}
|
|
47
|
-
// ── Default / fallback models ──
|
|
48
|
-
export const DEFAULT_MODEL = "claude-sonnet-4-6";
|
|
49
|
-
export const FALLBACK_MODEL = "claude-opus-4-6"; // used for planner + worker recovery
|
|
50
|
-
// ── Planner thresholds (opus-tuned vs default) ──
|
|
51
|
-
export const PLANNER_THRESHOLDS = {
|
|
52
|
-
opus: { small: 5, medium: 30 },
|
|
53
|
-
default: { small: 15, medium: 50 },
|
|
54
|
-
};
|
|
55
|
-
// ── Cursor model hints ──
|
|
56
|
-
export const CURSOR_MODEL_HINTS = {
|
|
57
|
-
opus: "Opus-tier model via Cursor",
|
|
58
|
-
sonnet: "Sonnet-tier model via Cursor",
|
|
59
|
-
haiku: "Haiku-tier model via Cursor (fast)",
|
|
60
|
-
};
|
package/dist/planner-query.d.ts
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import type { Task, PermMode, RateLimitWindow } from "./types.js";
|
|
2
|
-
import { detectModelTier, modelCapabilityBlock } from "./models.js";
|
|
3
|
-
export { detectModelTier, modelCapabilityBlock };
|
|
4
2
|
/**
|
|
5
3
|
* Logging callback used by planner/steering queries.
|
|
6
4
|
* `kind` distinguishes ephemeral status updates (heartbeat ticker) from
|
package/dist/planner-query.js
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
2
2
|
import { readFileSync } from "fs";
|
|
3
3
|
import { NudgeError } from "./types.js";
|
|
4
|
-
import { detectModelTier, modelCapabilityBlock } from "./models.js";
|
|
5
|
-
// Re-export for consumers that import from planner-query (steering.ts, index.ts).
|
|
6
|
-
export { detectModelTier, modelCapabilityBlock };
|
|
7
4
|
// ── Shared env resolver (set once at run start, used by every planner query) ──
|
|
8
5
|
//
|
|
9
6
|
// Swarm and planner calls share a model→env map so a custom provider configured
|
|
@@ -25,7 +22,7 @@ let _plannerRateLimitInfo = {
|
|
|
25
22
|
utilization: 0, status: "", isUsingOverage: false, windows: new Map(), costUsd: 0,
|
|
26
23
|
};
|
|
27
24
|
export function getPlannerRateLimitInfo() { return _plannerRateLimitInfo; }
|
|
28
|
-
// ── Proactive
|
|
25
|
+
// ── Proactive rate-limit gate ──
|
|
29
26
|
/**
|
|
30
27
|
* Proactive rate-limit gate. Called before each planner/steering query to
|
|
31
28
|
* prevent hammering the API when we're already near a limit.
|
package/dist/planner.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Task, PermMode } from "./types.js";
|
|
2
2
|
export declare function salvageFromFile(outFile: string | undefined, budget: number | undefined, onLog: (text: string, kind?: "status" | "event") => void, why: string): Task[] | null;
|
|
3
|
-
export declare const DESIGN_THINKING = "\nHOW TO THINK ABOUT EVERY TASK:\n\nStart from the user's job. What is someone hiring this product to do? \"I need to send money abroad cheaply\" -- not \"I need a currency conversion API.\" Every decision -- what to build, how fast it
|
|
3
|
+
export declare const DESIGN_THINKING = "\nHOW TO THINK ABOUT EVERY TASK:\n\nStart from the user's job. What is someone hiring this product to do? \"I need to send money abroad cheaply\" -- not \"I need a currency conversion API.\" Every decision -- what to build, how fast it needs to respond, what happens on error -- flows from the job.\n\nThe experience IS the product. A 200ms server response is not a \"performance metric\" -- it's the difference between an app that feels alive and one that feels broken. A loading state is not \"polish\" -- it's the user knowing the app heard them. An error message is not \"error handling\" -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render -- they're all one experience the user either trusts or doesn't.\n\nBuild the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.\n\nConsistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.\n";
|
|
4
4
|
export declare function planTasks(objective: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string): Promise<Task[]>;
|
|
5
5
|
export declare function identifyThemes(objective: string, count: number, cwd: string, model: string, permissionMode: PermMode, onLog?: (text: string) => void): Promise<string[]>;
|
|
6
6
|
export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string, previousKnowledge?: string): Task[];
|
package/dist/planner.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { readFileSync } from "fs";
|
|
2
2
|
import { runPlannerQuery, extractTaskJson, attemptJsonParse, postProcess } from "./planner-query.js";
|
|
3
|
-
import {
|
|
3
|
+
import { contextConstraintNote } from "./models.js";
|
|
4
4
|
// Resilience: if the planner query throws but the agent already wrote valid
|
|
5
5
|
// tasks to `outFile` (via its Write tool), salvage them instead of discarding
|
|
6
6
|
// expensive work. Returns salvaged tasks on success, null if nothing usable on
|
|
@@ -29,7 +29,7 @@ export function salvageFromFile(outFile, budget, onLog, why) {
|
|
|
29
29
|
export const DESIGN_THINKING = `
|
|
30
30
|
HOW TO THINK ABOUT EVERY TASK:
|
|
31
31
|
|
|
32
|
-
Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply" -- not "I need a currency conversion API." Every decision -- what to build, how fast it
|
|
32
|
+
Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply" -- not "I need a currency conversion API." Every decision -- what to build, how fast it needs to respond, what happens on error -- flows from the job.
|
|
33
33
|
|
|
34
34
|
The experience IS the product. A 200ms server response is not a "performance metric" -- it's the difference between an app that feels alive and one that feels broken. A loading state is not "polish" -- it's the user knowing the app heard them. An error message is not "error handling" -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render -- they're all one experience the user either trusts or doesn't.
|
|
35
35
|
|
|
@@ -54,53 +54,30 @@ const THEMES_SCHEMA = {
|
|
|
54
54
|
required: ["themes"],
|
|
55
55
|
},
|
|
56
56
|
};
|
|
57
|
-
// ── Budget
|
|
57
|
+
// ── Budget breakpoints for prompt strategy ──
|
|
58
|
+
const BUDGET_FOCUSED = 10; // ≤ this → surgical, file-specific tasks
|
|
59
|
+
const BUDGET_SUBSTANTIAL = 30; // ≤ this → mission-level, autonomous agents
|
|
60
|
+
// ── Context-aware prompt strategy ──
|
|
58
61
|
function plannerPrompt(objective, workerModel, budget, concurrency, flexNote) {
|
|
59
|
-
const b = budget ??
|
|
60
|
-
const
|
|
61
|
-
const capability = modelCapabilityBlock(workerModel);
|
|
62
|
+
const b = budget ?? BUDGET_FOCUSED;
|
|
63
|
+
const constraint = contextConstraintNote(workerModel);
|
|
62
64
|
const concLine = concurrency
|
|
63
65
|
? `\n- ${concurrency} agents run in parallel -- tasks that run concurrently must touch DIFFERENT files to avoid merge conflicts`
|
|
64
66
|
: "";
|
|
65
67
|
const flexLine = flexNote ? `\n\n${flexNote}` : "";
|
|
66
|
-
if (
|
|
68
|
+
if (b <= BUDGET_FOCUSED) {
|
|
67
69
|
return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
|
|
68
70
|
|
|
69
71
|
Objective: ${objective}
|
|
70
72
|
|
|
71
|
-
|
|
73
|
+
${constraint}
|
|
72
74
|
|
|
73
75
|
Requirements:
|
|
74
76
|
- Target exactly ~${b} tasks
|
|
75
77
|
- Each task MUST be independent -- no task depends on another
|
|
76
78
|
- Each task should target specific files/areas to avoid merge conflicts
|
|
77
79
|
- Be specific: mention exact file paths, function names, what to change
|
|
78
|
-
- Keep tasks focused: one concrete change per task
|
|
79
|
-
|
|
80
|
-
Respond with ONLY a JSON object (no markdown fences):
|
|
81
|
-
{
|
|
82
|
-
"tasks": [
|
|
83
|
-
{ "prompt": "In src/foo.ts, refactor the bar() function to..." },
|
|
84
|
-
{ "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
|
|
85
|
-
]
|
|
86
|
-
}`;
|
|
87
|
-
}
|
|
88
|
-
const thresholds = tier === MODEL_TIER_OPUS ? PLANNER_THRESHOLDS.opus : PLANNER_THRESHOLDS.default;
|
|
89
|
-
const smallThreshold = thresholds.small;
|
|
90
|
-
const mediumThreshold = thresholds.medium;
|
|
91
|
-
if (b <= smallThreshold) {
|
|
92
|
-
return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
|
|
93
|
-
|
|
94
|
-
Objective: ${objective}
|
|
95
|
-
|
|
96
|
-
AGENT CAPABILITY: ${capability}
|
|
97
|
-
|
|
98
|
-
Requirements:
|
|
99
|
-
- Each task MUST be independent -- no task depends on another
|
|
100
|
-
- Each task should target specific files/areas to avoid merge conflicts
|
|
101
|
-
- Be specific: mention exact file paths, function names, what to change
|
|
102
|
-
- Keep tasks focused: one logical change per task
|
|
103
|
-
- Target exactly ~${b} tasks${concLine}${flexLine}
|
|
80
|
+
- Keep tasks focused: one concrete change per task${concLine}${flexLine}
|
|
104
81
|
|
|
105
82
|
Respond with ONLY a JSON object (no markdown fences):
|
|
106
83
|
{
|
|
@@ -110,18 +87,18 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
110
87
|
]
|
|
111
88
|
}`;
|
|
112
89
|
}
|
|
113
|
-
if (b <=
|
|
90
|
+
if (b <= BUDGET_SUBSTANTIAL) {
|
|
114
91
|
return `You are a task coordinator for a parallel agent system with ${b} agent sessions available.
|
|
115
92
|
|
|
116
93
|
Objective: ${objective}
|
|
117
94
|
|
|
118
|
-
|
|
95
|
+
${constraint}
|
|
119
96
|
|
|
120
97
|
Do NOT over-specify. Give each agent a MISSION, not step-by-step instructions. Let agents make their own decisions about implementation details.
|
|
121
98
|
|
|
122
99
|
Requirements:
|
|
123
100
|
- Target exactly ~${b} tasks
|
|
124
|
-
- Each task should be a substantial piece of work
|
|
101
|
+
- Each task should be a substantial piece of work
|
|
125
102
|
- Each task MUST be independent -- no task depends on another
|
|
126
103
|
- Tasks that run concurrently must touch DIFFERENT files/areas to avoid merge conflicts
|
|
127
104
|
- Give agents scope and autonomy: "Design and implement X" not "In file Y, add function Z"
|
|
@@ -140,7 +117,7 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
140
117
|
|
|
141
118
|
Objective: ${objective}
|
|
142
119
|
|
|
143
|
-
|
|
120
|
+
${constraint}
|
|
144
121
|
|
|
145
122
|
With ${b} sessions, you should think BIG:
|
|
146
123
|
- Full feature implementations spanning multiple files
|
|
@@ -156,7 +133,7 @@ With ${b} sessions, you should think BIG:
|
|
|
156
133
|
|
|
157
134
|
Requirements:
|
|
158
135
|
- Target exactly ~${b} tasks
|
|
159
|
-
- Each task should be substantial:
|
|
136
|
+
- Each task should be substantial: significant autonomous agent work
|
|
160
137
|
- Each task MUST be independent -- no task depends on another
|
|
161
138
|
- Tasks that run concurrently must target DIFFERENT files/areas to avoid merge conflicts
|
|
162
139
|
- Give agents missions with full autonomy: "Own the entire X subsystem" not "edit line 42 of Y.ts"
|
|
@@ -245,7 +222,7 @@ Be thorough -- your findings drive the execution plan.`,
|
|
|
245
222
|
}));
|
|
246
223
|
}
|
|
247
224
|
export async function orchestrate(objective, designDocs, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote, outFile) {
|
|
248
|
-
const
|
|
225
|
+
const constraint = contextConstraintNote(workerModel);
|
|
249
226
|
const flexLine = flexNote ? `\n\n${flexNote}` : "";
|
|
250
227
|
const fileInstruction = outFile ? `\n\nAFTER generating the JSON, also write it to ${outFile} using the Write tool.` : "";
|
|
251
228
|
const prompt = `You are a tech lead planning a sprint based on your team's codebase research.
|
|
@@ -256,7 +233,7 @@ Your architects explored the codebase and found:
|
|
|
256
233
|
|
|
257
234
|
${designDocs}
|
|
258
235
|
|
|
259
|
-
|
|
236
|
+
${constraint}
|
|
260
237
|
${DESIGN_THINKING}
|
|
261
238
|
Create exactly ~${budget} concrete execution tasks based on these findings.
|
|
262
239
|
|
|
@@ -298,7 +275,7 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
298
275
|
export async function refinePlan(objective, previousTasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog) {
|
|
299
276
|
onLog("Refining plan...");
|
|
300
277
|
const prev = previousTasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n");
|
|
301
|
-
const
|
|
278
|
+
const constraint = contextConstraintNote(workerModel);
|
|
302
279
|
const b = budget ?? 10;
|
|
303
280
|
const scaleNote = b > 50 ? `This is a LARGE budget (${b} sessions). Think big -- missions, not micro-tasks.`
|
|
304
281
|
: b > 15 ? `Each of the ${b} sessions is a capable AI agent. Give substantial missions, not trivial edits.`
|
|
@@ -312,7 +289,7 @@ ${prev}
|
|
|
312
289
|
|
|
313
290
|
The user wants changes: ${feedback}
|
|
314
291
|
|
|
315
|
-
|
|
292
|
+
${constraint}
|
|
316
293
|
|
|
317
294
|
${scaleNote} ${concurrency} agents run in parallel. Update the plan accordingly. Keep tasks independent and targeting different files/areas.
|
|
318
295
|
|
package/dist/providers.d.ts
CHANGED
|
@@ -58,6 +58,7 @@ export declare const PROXY_DEFAULT_URL = "http://127.0.0.1:8765";
|
|
|
58
58
|
export declare function isCursorProxyProvider(p: ProviderConfig): boolean;
|
|
59
59
|
/**
|
|
60
60
|
* Health check: GET /health on the proxy. Returns true if proxy is reachable.
|
|
61
|
+
* Passes the stored API key so the /health endpoint doesn't return 401.
|
|
61
62
|
*/
|
|
62
63
|
export declare function healthCheckCursorProxy(baseUrl?: string): Promise<boolean>;
|
|
63
64
|
/**
|
|
@@ -66,8 +67,32 @@ export declare function healthCheckCursorProxy(baseUrl?: string): Promise<boolea
|
|
|
66
67
|
*/
|
|
67
68
|
export declare function fetchCursorModels(baseUrl?: string): Promise<string[]>;
|
|
68
69
|
/**
|
|
69
|
-
*
|
|
70
|
+
* Auto-start the cursor-api-proxy as a detached background process.
|
|
71
|
+
*
|
|
72
|
+
* When the proxy is started, we also configure it to use system Node.js
|
|
73
|
+
* for spawning the cursor-agent subprocess. The agent's bundled Node.js
|
|
74
|
+
* segfaults with --list-models on macOS (exit 139), so we resolve the
|
|
75
|
+
* system `node` binary and the agent's index.js, patch the proxy's env.js
|
|
76
|
+
* to respect CURSOR_AGENT_NODE/SCRIPT on Unix, and pass those env vars.
|
|
77
|
+
*
|
|
78
|
+
* Handles:
|
|
79
|
+
* - Proxy already running and verified → returns true immediately
|
|
80
|
+
* - Something on the port but not our proxy → warns, skips spawn
|
|
81
|
+
* - Port in use by nothing responsive → returns true (something bound it)
|
|
82
|
+
* - Proxy not running → spawns `npx cursor-api-proxy` detached, waits for health
|
|
83
|
+
* - Spawn fails (not installed) → returns false, caller falls back to manual instructions
|
|
84
|
+
*
|
|
85
|
+
* When `forceRestart` is true and a stale process is on the port, it will be
|
|
86
|
+
* killed and the proxy restarted.
|
|
87
|
+
*
|
|
88
|
+
* Returns true when the proxy is reachable at PROXY_DEFAULT_URL.
|
|
89
|
+
*/
|
|
90
|
+
export declare function ensureCursorProxyRunning(baseUrl?: string, forceRestart?: boolean): Promise<boolean>;
|
|
91
|
+
/**
|
|
92
|
+
* Full install + configure flow for cursor-api-proxy.
|
|
70
93
|
* Walks through CLI install, API key config, and proxy start.
|
|
94
|
+
* Only needed when the quick auto-start (`ensureCursorProxyRunning`) fails —
|
|
95
|
+
* e.g. npx can't find the package or the user has no API key yet.
|
|
71
96
|
* Returns true when proxy is running and healthy.
|
|
72
97
|
*/
|
|
73
98
|
export declare function setupCursorProxy(): Promise<boolean>;
|