claude-overnight 1.19.0 → 1.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ export declare const VERSION = "1.19.1";
@@ -0,0 +1,2 @@
1
+ // Auto-generated by build — do not edit manually.
2
+ export const VERSION = "1.19.1";
@@ -11,7 +11,7 @@
11
11
  // The `priority` models always appear at the top of the picker in this order.
12
12
  // `known` models appear after them. Anything the proxy returns dynamically
13
13
  // that isn't in this list goes into a "more..." sub-menu.
14
- import { CURSOR_MODEL_HINTS } from "./models.js";
14
+ import { modelDisplayName, formatContextWindow } from "./models.js";
15
15
  export const CURSOR_PRIORITY_MODELS = [
16
16
  { id: "composer-2", label: "composer-2", hint: "Cursor Composer 2 — latest, strongest Cursor model" },
17
17
  { id: "composer-2-fast", label: "composer-2-fast", hint: "Cursor Composer 2 Fast — faster, cheaper variant" },
@@ -34,12 +34,10 @@ export function cursorModelHint(modelId) {
34
34
  }
35
35
  if (m.startsWith("composer"))
36
36
  return "Cursor Composer model";
37
- if (m.includes("opus"))
38
- return CURSOR_MODEL_HINTS.opus;
39
- if (m.includes("sonnet"))
40
- return CURSOR_MODEL_HINTS.sonnet;
41
- if (m.includes("haiku"))
42
- return CURSOR_MODEL_HINTS.haiku;
37
+ // For Claude variants through Cursor, use the shared display name
38
+ const displayName = modelDisplayName(modelId);
39
+ if (displayName !== modelId)
40
+ return `${displayName} via Cursor · ${formatContextWindow(modelId)} context`;
43
41
  if (m.startsWith("gpt-5"))
44
42
  return "GPT model via Cursor";
45
43
  if (m.startsWith("gemini"))
package/dist/index.js CHANGED
@@ -2,16 +2,14 @@
2
2
  import { readFileSync, existsSync, readdirSync, mkdirSync } from "fs";
3
3
  import { resolve, dirname, join } from "path";
4
4
  import { fileURLToPath } from "url";
5
- import { createRequire } from "module";
6
5
  import chalk from "chalk";
7
- const pkg = createRequire(import.meta.url)("../package.json");
8
- const VERSION = pkg.version;
6
+ import { VERSION } from "./_version.js";
9
7
  import { query } from "@anthropic-ai/claude-agent-sdk";
10
8
  import { Swarm } from "./swarm.js";
11
9
  import { planTasks, refinePlan, identifyThemes, buildThinkingTasks, orchestrate, salvageFromFile } from "./planner.js";
12
- import { detectModelTier, setPlannerEnvResolver } from "./planner-query.js";
13
- import { DEFAULT_MODEL } from "./models.js";
14
- import { pickModel, loadProviders, preflightProvider, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider } from "./providers.js";
10
+ import { modelDisplayName, formatContextWindow, DEFAULT_MODEL } from "./models.js";
11
+ import { setPlannerEnvResolver } from "./planner-query.js";
12
+ import { pickModel, loadProviders, preflightProvider, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, ensureCursorProxyRunning } from "./providers.js";
15
13
  import { RunDisplay } from "./ui.js";
16
14
  import { renderSummary } from "./render.js";
17
15
  import { executeRun } from "./run.js";
@@ -69,7 +67,6 @@ async function promptResumeOverrides(state, cliFlags, argv, noTTY, runDir) {
69
67
  const modelsPromise = fetchModels(20_000).catch(() => []);
70
68
  // ── Interactive review ──
71
69
  const fmtSummary = () => {
72
- const tier = detectModelTier(state.workerModel);
73
70
  const remaining = Math.max(1, state.remaining);
74
71
  const capStr = state.usageCap != null ? `${Math.round(state.usageCap * 100)}%` : "unlimited";
75
72
  const extraStr = state.allowExtraUsage
@@ -78,7 +75,7 @@ async function promptResumeOverrides(state, cliFlags, argv, noTTY, runDir) {
78
75
  console.log();
79
76
  console.log(` ${chalk.dim("Resume settings")}`);
80
77
  console.log(` ${chalk.dim("─".repeat(40))}`);
81
- console.log(` ${chalk.dim("model ")}${chalk.white(state.workerModel)} ${chalk.dim(`(${tier})`)}`);
78
+ console.log(` ${chalk.dim("model ")}${chalk.white(state.workerModel)} ${chalk.dim(`(${formatContextWindow(state.workerModel)} context)`)}`);
82
79
  console.log(` ${chalk.dim("remaining ")}${chalk.white(String(remaining))} ${chalk.dim("sessions")}`);
83
80
  console.log(` ${chalk.dim("concur ")}${chalk.white(String(state.concurrency))}`);
84
81
  console.log(` ${chalk.dim("usage cap ")}${chalk.white(capStr)}`);
@@ -627,11 +624,11 @@ async function main() {
627
624
  }
628
625
  const parts = [];
629
626
  if (fastModel)
630
- parts.push(`${detectModelTier(plannerModel)} → ${detectModelTier(workerModel)} + ${detectModelTier(fastModel)}`);
627
+ parts.push(`${modelDisplayName(plannerModel)} → ${modelDisplayName(workerModel)} + ${modelDisplayName(fastModel)}`);
631
628
  else if (workerModel !== plannerModel)
632
- parts.push(`${detectModelTier(workerModel)} → ${detectModelTier(plannerModel)}`);
629
+ parts.push(`${modelDisplayName(workerModel)} → ${modelDisplayName(plannerModel)}`);
633
630
  else
634
- parts.push(detectModelTier(workerModel));
631
+ parts.push(modelDisplayName(workerModel));
635
632
  parts.push(`budget ${budget}`, `${concurrency}×`);
636
633
  if (budget > 2)
637
634
  parts.push("flex");
@@ -751,18 +748,31 @@ async function main() {
751
748
  ["fast", fastProvider],
752
749
  ];
753
750
  const pending = [];
751
+ const cursorProxies = [];
754
752
  for (const [role, p] of all) {
755
753
  if (p && !seen.has(p.id)) {
756
754
  seen.add(p.id);
757
755
  pending.push([role, p]);
756
+ if (isCursorProxyProvider(p))
757
+ cursorProxies.push(p);
758
758
  }
759
759
  }
760
+ // Auto-start cursor proxy before pinging
761
+ if (cursorProxies.length > 0) {
762
+ await ensureCursorProxyRunning();
763
+ }
760
764
  process.stdout.write(` ${chalk.dim(`◆ Pinging ${pending.map(([r, p]) => `${r} (${p.displayName})`).join(", ")}...`)}\n`);
761
765
  const results = await Promise.all(pending.map(async ([role, p]) => ({ role, provider: p, result: await preflightProvider(p, cwd) })));
762
766
  for (const { role, provider, result } of results) {
763
767
  if (!result.ok) {
764
768
  console.error(chalk.red(` ✗ ${role} preflight failed: ${chalk.dim(result.error)}`));
765
- console.error(chalk.red(`\n Fix the provider at ~/.claude/claude-overnight/providers.json and retry.\n`));
769
+ if (isCursorProxyProvider(provider)) {
770
+ console.error(chalk.yellow(` The proxy at ${PROXY_DEFAULT_URL} may have crashed. Start it: npx cursor-api-proxy`));
771
+ }
772
+ else {
773
+ console.error(chalk.red(` Fix the provider at ~/.claude/claude-overnight/providers.json and retry.`));
774
+ }
775
+ console.error("");
766
776
  process.exit(1);
767
777
  }
768
778
  console.log(` ${chalk.green(`✓ ${role} ready`)} ${chalk.dim(`· ${provider.displayName} · ${provider.model}`)}`);
@@ -899,6 +909,7 @@ async function main() {
899
909
  tasks: thinkingTasks, concurrency, cwd, model: plannerModel, permissionMode,
900
910
  useWorktrees: false, mergeStrategy: "yolo", agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
901
911
  envForModel,
912
+ cursorProxy: [plannerProvider, workerProvider, fastProvider].some(p => p && isCursorProxyProvider(p)),
902
913
  });
903
914
  const thinkRunInfo = { accIn: 0, accOut: 0, accCost: 0, accCompleted: 0, accFailed: 0, sessionsBudget: budget ?? 10, waveNum: -1, remaining: budget ?? 10, model: plannerModel, startedAt: Date.now() };
904
915
  const thinkDisplay = new RunDisplay(thinkRunInfo, { remaining: 0, usageCap, concurrency, paused: false, dirty: false });
package/dist/models.d.ts CHANGED
@@ -1,27 +1,23 @@
1
- export declare const MODEL_TIER_OPUS = "opus";
2
- export declare const MODEL_TIER_SONNET = "sonnet";
3
- export declare const MODEL_TIER_HAIKU = "haiku";
4
- export declare const MODEL_TIER_UNKNOWN = "unknown";
5
- export type ModelTier = typeof MODEL_TIER_OPUS | typeof MODEL_TIER_SONNET | typeof MODEL_TIER_HAIKU | typeof MODEL_TIER_UNKNOWN;
6
- export interface TierDetectionRule {
7
- match: (model: string) => boolean;
8
- tier: ModelTier;
1
+ export interface ModelCapability {
2
+ contextWindow: number;
3
+ contextConstraint: "tight" | "moderate" | "relaxed";
4
+ /** Human-readable label for UI display. Falls back to the model key if absent. */
5
+ displayName?: string;
9
6
  }
10
- export declare const TIER_DETECTION_RULES: TierDetectionRule[];
11
- export declare function detectModelTier(model: string): ModelTier;
12
- export declare const MODEL_CAPABILITY_DESCRIPTIONS: Record<ModelTier, string>;
13
- export declare const UNKNOWN_MODEL_CAPABILITIES: Record<string, string>;
14
- export declare function modelCapabilityBlock(model: string): string;
7
+ export declare const MODEL_CAPABILITIES: Record<string, ModelCapability>;
15
8
  export declare const DEFAULT_MODEL = "claude-sonnet-4-6";
16
9
  export declare const FALLBACK_MODEL = "claude-opus-4-6";
17
- export declare const PLANNER_THRESHOLDS: {
18
- opus: {
19
- small: number;
20
- medium: number;
21
- };
22
- default: {
23
- small: number;
24
- medium: number;
25
- };
26
- };
27
- export declare const CURSOR_MODEL_HINTS: Record<string, string>;
10
+ /**
11
+ * Find capability info for a model string. Tries: exact match → lowercase
12
+ * exact → substring match. Falls back to "unknown" entry.
13
+ */
14
+ export declare function getModelCapability(model: string): ModelCapability;
15
+ /** Human-readable model name for display (e.g. in run labels). */
16
+ export declare function modelDisplayName(model: string): string;
17
+ /**
18
+ * Context constraint instruction injected into planner prompts.
19
+ * Tells the planner how to scope tasks based on the worker model's context.
20
+ */
21
+ export declare function contextConstraintNote(model: string): string;
22
+ /** Format context window for display (e.g. "256K"). */
23
+ export declare function formatContextWindow(model: string): string;
package/dist/models.js CHANGED
@@ -1,60 +1,92 @@
1
- // ── Model tier constants ──
2
- export const MODEL_TIER_OPUS = "opus";
3
- export const MODEL_TIER_SONNET = "sonnet";
4
- export const MODEL_TIER_HAIKU = "haiku";
5
- export const MODEL_TIER_UNKNOWN = "unknown";
6
- export const TIER_DETECTION_RULES = [
7
- { match: m => m === "default" || m.includes("opus"), tier: MODEL_TIER_OPUS },
8
- { match: m => m.includes("sonnet"), tier: MODEL_TIER_SONNET },
9
- { match: m => m.includes("haiku"), tier: MODEL_TIER_HAIKU },
10
- { match: m => m === "auto", tier: MODEL_TIER_UNKNOWN },
11
- { match: m => m.startsWith("composer"), tier: MODEL_TIER_SONNET },
12
- { match: m => m.startsWith("gpt-5") || m.startsWith("gemini") || m.startsWith("grok"), tier: MODEL_TIER_SONNET },
13
- ];
14
- export function detectModelTier(model) {
1
+ // ── Model capability catalog ──
2
+ //
3
+ // Single source of truth for model capabilities. Update this when new models
4
+ // arrive (which happens basically daily). Each entry describes what the model
5
+ // can handle in terms of context and task scoping.
6
+ //
7
+ // contextConstraint:
8
+ // "tight" small context window. Model is lazy and error-prone on big
9
+ // tasks. Needs surgical, hyper-specific instructions.
10
+ // "moderate" decent context. Can handle focused missions but may lose
11
+ // thread on sprawling codebases.
12
+ // "relaxed" — large context. Can read most of the codebase at once,
13
+ // reliably own multi-file features with autonomy.
14
+ export const MODEL_CAPABILITIES = {
15
+ // ── Anthropic Claude 4.5 / 4.6 ──
16
+ "claude-sonnet-4-6": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Sonnet 4.6" },
17
+ "claude-sonnet-4-5": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Sonnet 4.5" },
18
+ "claude-opus-4-6": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4.6" },
19
+ "claude-opus-4-5": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4.5" },
20
+ "claude-opus-4-20250514": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4" },
21
+ "claude-haiku-4-5": { contextWindow: 200_000, contextConstraint: "moderate", displayName: "Haiku 4.5" },
22
+ "claude-haiku-4-5-20251001": { contextWindow: 200_000, contextConstraint: "moderate", displayName: "Haiku 4.5" },
23
+ // ── Cursor models ──
24
+ "auto": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Cursor Auto" },
25
+ "composer-2": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Composer 2" },
26
+ "composer-2-fast": { contextWindow: 128_000, contextConstraint: "moderate", displayName: "Composer 2 Fast" },
27
+ "composer": { contextWindow: 128_000, contextConstraint: "moderate", displayName: "Composer" },
28
+ // ── Qwen (via DashScope / custom provider) ──
29
+ "qwen3.6-plus": { contextWindow: 131_072, contextConstraint: "moderate", displayName: "Qwen 3.6 Plus" },
30
+ "qwen3-coder": { contextWindow: 262_144, contextConstraint: "relaxed", displayName: "Qwen 3 Coder" },
31
+ "qwen-max": { contextWindow: 32_768, contextConstraint: "tight", displayName: "Qwen Max" },
32
+ // ── Fallback for unknown models ──
33
+ "unknown": { contextWindow: 128_000, contextConstraint: "moderate" },
34
+ };
35
+ // ── Default / fallback models ──
36
+ export const DEFAULT_MODEL = "claude-sonnet-4-6";
37
+ export const FALLBACK_MODEL = "claude-opus-4-6"; // used for planner + worker recovery
38
+ // ── Lookup ──
39
+ /**
40
+ * Find capability info for a model string. Tries: exact match → lowercase
41
+ * exact → substring match. Falls back to "unknown" entry.
42
+ */
43
+ export function getModelCapability(model) {
15
44
  const m = model.toLowerCase();
16
- for (const rule of TIER_DETECTION_RULES) {
17
- if (rule.match(m))
18
- return rule.tier;
45
+ if (MODEL_CAPABILITIES[m])
46
+ return MODEL_CAPABILITIES[m];
47
+ if (MODEL_CAPABILITIES[model])
48
+ return MODEL_CAPABILITIES[model];
49
+ for (const [key, cap] of Object.entries(MODEL_CAPABILITIES)) {
50
+ if (key !== "unknown" && m.includes(key))
51
+ return cap;
19
52
  }
20
- return MODEL_TIER_UNKNOWN;
53
+ return MODEL_CAPABILITIES.unknown;
21
54
  }
22
- // ── Capability descriptions ──
23
- export const MODEL_CAPABILITY_DESCRIPTIONS = {
24
- opus: "Each agent runs Claude Opus with 1M context -- a powerhouse. It can own entire epics, do deep codebase research, make architectural decisions, implement complex multi-file systems end-to-end, use browser tools for analysis, and deliver expert-level work. These agents can work for 30+ minutes on the most complex tasks. Do NOT waste them on trivial edits -- give them ownership and autonomy.",
25
- sonnet: "Each agent runs Claude Sonnet -- capable of substantial implementation, refactoring, testing, and design work. Can work autonomously for 10-20 minutes on complex tasks. Give agents meaningful scope -- not just single-line edits.",
26
- haiku: "Each agent runs Claude Haiku -- fast and efficient, best for focused, well-specified tasks. Be explicit about files, functions, and expected changes. Keep tasks scoped to a clear, concrete deliverable.",
27
- unknown: "", // handled by UNKNOWN_MODEL_CAPABILITIES below
28
- };
29
- export const UNKNOWN_MODEL_CAPABILITIES = {
30
- composer: "Each agent runs a Cursor Composer model with full codebase access. Capable of focused implementation work. Be explicit about files, functions, and expected changes.",
31
- "gpt-5": "Each agent runs a GPT model via Cursor with full codebase access. Capable of focused implementation work. Be explicit about files, functions, and expected changes.",
32
- gemini: "Each agent runs a Gemini model via Cursor with full codebase access. Be explicit about files, functions, and expected changes.",
33
- grok: "Each agent runs a Grok model via Cursor with full codebase access. Be explicit about files, functions, and expected changes.",
34
- };
35
- export function modelCapabilityBlock(model) {
36
- const tier = detectModelTier(model);
37
- const cap = MODEL_CAPABILITY_DESCRIPTIONS[tier];
38
- if (cap)
39
- return cap;
55
+ /** Human-readable model name for display (e.g. in run labels). */
56
+ export function modelDisplayName(model) {
40
57
  const m = model.toLowerCase();
41
- for (const [prefix, desc] of Object.entries(UNKNOWN_MODEL_CAPABILITIES)) {
42
- if (m.startsWith(prefix))
43
- return desc;
58
+ // Exact match
59
+ if (MODEL_CAPABILITIES[m]?.displayName)
60
+ return MODEL_CAPABILITIES[m].displayName;
61
+ if (MODEL_CAPABILITIES[model]?.displayName)
62
+ return MODEL_CAPABILITIES[model].displayName;
63
+ // Substring match
64
+ for (const [key, cap] of Object.entries(MODEL_CAPABILITIES)) {
65
+ if (key !== "unknown" && m.includes(key) && cap.displayName)
66
+ return cap.displayName;
44
67
  }
45
- return `Each agent has full codebase access and can work autonomously.`;
68
+ return model;
69
+ }
70
+ /**
71
+ * Context constraint instruction injected into planner prompts.
72
+ * Tells the planner how to scope tasks based on the worker model's context.
73
+ */
74
+ export function contextConstraintNote(model) {
75
+ const cap = getModelCapability(model);
76
+ const ctx = Math.round(cap.contextWindow / 1000);
77
+ switch (cap.contextConstraint) {
78
+ case "tight":
79
+ return `Worker agents have a TIGHT context window (~${ctx}K tokens). They are prone losing thread on large tasks. Be hyper-specific: name exact files, functions, and changes. One narrow deliverable per task. No ambiguity.`;
80
+ case "moderate":
81
+ return `Worker agents have a moderate context window (~${ctx}K tokens). They can handle focused missions but may struggle with sprawling codebases. Be specific about files and expected outcomes. Scope tasks to clear, concrete deliverables.`;
82
+ case "relaxed":
83
+ return `Worker agents have a large context window (~${ctx}K tokens). They can read most of the codebase at once and reliably own multi-file features. Give them missions with full autonomy — "Design and implement X" not "edit line 42 of Y.ts".`;
84
+ }
85
+ }
86
+ /** Format context window for display (e.g. "256K"). */
87
+ export function formatContextWindow(model) {
88
+ const cap = getModelCapability(model);
89
+ if (cap.contextWindow >= 1_000_000)
90
+ return `${(cap.contextWindow / 1_000_000).toFixed(1)}M`;
91
+ return `${Math.round(cap.contextWindow / 1000)}K`;
46
92
  }
47
- // ── Default / fallback models ──
48
- export const DEFAULT_MODEL = "claude-sonnet-4-6";
49
- export const FALLBACK_MODEL = "claude-opus-4-6"; // used for planner + worker recovery
50
- // ── Planner thresholds (opus-tuned vs default) ──
51
- export const PLANNER_THRESHOLDS = {
52
- opus: { small: 5, medium: 30 },
53
- default: { small: 15, medium: 50 },
54
- };
55
- // ── Cursor model hints ──
56
- export const CURSOR_MODEL_HINTS = {
57
- opus: "Opus-tier model via Cursor",
58
- sonnet: "Sonnet-tier model via Cursor",
59
- haiku: "Haiku-tier model via Cursor (fast)",
60
- };
@@ -1,6 +1,4 @@
1
1
  import type { Task, PermMode, RateLimitWindow } from "./types.js";
2
- import { detectModelTier, modelCapabilityBlock } from "./models.js";
3
- export { detectModelTier, modelCapabilityBlock };
4
2
  /**
5
3
  * Logging callback used by planner/steering queries.
6
4
  * `kind` distinguishes ephemeral status updates (heartbeat ticker) from
@@ -1,9 +1,6 @@
1
1
  import { query } from "@anthropic-ai/claude-agent-sdk";
2
2
  import { readFileSync } from "fs";
3
3
  import { NudgeError } from "./types.js";
4
- import { detectModelTier, modelCapabilityBlock } from "./models.js";
5
- // Re-export for consumers that import from planner-query (steering.ts, index.ts).
6
- export { detectModelTier, modelCapabilityBlock };
7
4
  // ── Shared env resolver (set once at run start, used by every planner query) ──
8
5
  //
9
6
  // Swarm and planner calls share a model→env map so a custom provider configured
@@ -25,7 +22,7 @@ let _plannerRateLimitInfo = {
25
22
  utilization: 0, status: "", isUsingOverage: false, windows: new Map(), costUsd: 0,
26
23
  };
27
24
  export function getPlannerRateLimitInfo() { return _plannerRateLimitInfo; }
28
- // ── Proactive throttle: wait before making API calls when utilization is high ──
25
+ // ── Proactive rate-limit gate ──
29
26
  /**
30
27
  * Proactive rate-limit gate. Called before each planner/steering query to
31
28
  * prevent hammering the API when we're already near a limit.
package/dist/planner.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import type { Task, PermMode } from "./types.js";
2
2
  export declare function salvageFromFile(outFile: string | undefined, budget: number | undefined, onLog: (text: string, kind?: "status" | "event") => void, why: string): Task[] | null;
3
- export declare const DESIGN_THINKING = "\nHOW TO THINK ABOUT EVERY TASK:\n\nStart from the user's job. What is someone hiring this product to do? \"I need to send money abroad cheaply\" -- not \"I need a currency conversion API.\" Every decision -- what to build, how fast it responds, what happens on error -- flows from the job.\n\nThe experience IS the product. A 200ms server response is not a \"performance metric\" -- it's the difference between an app that feels alive and one that feels broken. A loading state is not \"polish\" -- it's the user knowing the app heard them. An error message is not \"error handling\" -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render -- they're all one experience the user either trusts or doesn't.\n\nBuild the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.\n\nConsistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.\n";
3
+ export declare const DESIGN_THINKING = "\nHOW TO THINK ABOUT EVERY TASK:\n\nStart from the user's job. What is someone hiring this product to do? \"I need to send money abroad cheaply\" -- not \"I need a currency conversion API.\" Every decision -- what to build, how fast it needs to respond, what happens on error -- flows from the job.\n\nThe experience IS the product. A 200ms server response is not a \"performance metric\" -- it's the difference between an app that feels alive and one that feels broken. A loading state is not \"polish\" -- it's the user knowing the app heard them. An error message is not \"error handling\" -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render -- they're all one experience the user either trusts or doesn't.\n\nBuild the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.\n\nConsistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.\n";
4
4
  export declare function planTasks(objective: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string): Promise<Task[]>;
5
5
  export declare function identifyThemes(objective: string, count: number, cwd: string, model: string, permissionMode: PermMode, onLog?: (text: string) => void): Promise<string[]>;
6
6
  export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string, previousKnowledge?: string): Task[];
package/dist/planner.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { readFileSync } from "fs";
2
2
  import { runPlannerQuery, extractTaskJson, attemptJsonParse, postProcess } from "./planner-query.js";
3
- import { detectModelTier, modelCapabilityBlock, MODEL_TIER_HAIKU, MODEL_TIER_OPUS, PLANNER_THRESHOLDS } from "./models.js";
3
+ import { contextConstraintNote } from "./models.js";
4
4
  // Resilience: if the planner query throws but the agent already wrote valid
5
5
  // tasks to `outFile` (via its Write tool), salvage them instead of discarding
6
6
  // expensive work. Returns salvaged tasks on success, null if nothing usable on
@@ -29,7 +29,7 @@ export function salvageFromFile(outFile, budget, onLog, why) {
29
29
  export const DESIGN_THINKING = `
30
30
  HOW TO THINK ABOUT EVERY TASK:
31
31
 
32
- Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply" -- not "I need a currency conversion API." Every decision -- what to build, how fast it responds, what happens on error -- flows from the job.
32
+ Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply" -- not "I need a currency conversion API." Every decision -- what to build, how fast it needs to respond, what happens on error -- flows from the job.
33
33
 
34
34
  The experience IS the product. A 200ms server response is not a "performance metric" -- it's the difference between an app that feels alive and one that feels broken. A loading state is not "polish" -- it's the user knowing the app heard them. An error message is not "error handling" -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render -- they're all one experience the user either trusts or doesn't.
35
35
 
@@ -54,53 +54,30 @@ const THEMES_SCHEMA = {
54
54
  required: ["themes"],
55
55
  },
56
56
  };
57
- // ── Budget + model aware prompt strategy ──
57
+ // ── Budget breakpoints for prompt strategy ──
58
+ const BUDGET_FOCUSED = 10; // ≤ this → surgical, file-specific tasks
59
+ const BUDGET_SUBSTANTIAL = 30; // ≤ this → mission-level, autonomous agents
60
+ // ── Context-aware prompt strategy ──
58
61
  function plannerPrompt(objective, workerModel, budget, concurrency, flexNote) {
59
- const b = budget ?? 10;
60
- const tier = detectModelTier(workerModel);
61
- const capability = modelCapabilityBlock(workerModel);
62
+ const b = budget ?? BUDGET_FOCUSED;
63
+ const constraint = contextConstraintNote(workerModel);
62
64
  const concLine = concurrency
63
65
  ? `\n- ${concurrency} agents run in parallel -- tasks that run concurrently must touch DIFFERENT files to avoid merge conflicts`
64
66
  : "";
65
67
  const flexLine = flexNote ? `\n\n${flexNote}` : "";
66
- if (tier === MODEL_TIER_HAIKU) {
68
+ if (b <= BUDGET_FOCUSED) {
67
69
  return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
68
70
 
69
71
  Objective: ${objective}
70
72
 
71
- AGENT CAPABILITY: ${capability}
73
+ ${constraint}
72
74
 
73
75
  Requirements:
74
76
  - Target exactly ~${b} tasks
75
77
  - Each task MUST be independent -- no task depends on another
76
78
  - Each task should target specific files/areas to avoid merge conflicts
77
79
  - Be specific: mention exact file paths, function names, what to change
78
- - Keep tasks focused: one concrete change per task -- Haiku agents work best with clear, scoped instructions${concLine}${flexLine}
79
-
80
- Respond with ONLY a JSON object (no markdown fences):
81
- {
82
- "tasks": [
83
- { "prompt": "In src/foo.ts, refactor the bar() function to..." },
84
- { "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
85
- ]
86
- }`;
87
- }
88
- const thresholds = tier === MODEL_TIER_OPUS ? PLANNER_THRESHOLDS.opus : PLANNER_THRESHOLDS.default;
89
- const smallThreshold = thresholds.small;
90
- const mediumThreshold = thresholds.medium;
91
- if (b <= smallThreshold) {
92
- return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
93
-
94
- Objective: ${objective}
95
-
96
- AGENT CAPABILITY: ${capability}
97
-
98
- Requirements:
99
- - Each task MUST be independent -- no task depends on another
100
- - Each task should target specific files/areas to avoid merge conflicts
101
- - Be specific: mention exact file paths, function names, what to change
102
- - Keep tasks focused: one logical change per task
103
- - Target exactly ~${b} tasks${concLine}${flexLine}
80
+ - Keep tasks focused: one concrete change per task${concLine}${flexLine}
104
81
 
105
82
  Respond with ONLY a JSON object (no markdown fences):
106
83
  {
@@ -110,18 +87,18 @@ Respond with ONLY a JSON object (no markdown fences):
110
87
  ]
111
88
  }`;
112
89
  }
113
- if (b <= mediumThreshold) {
90
+ if (b <= BUDGET_SUBSTANTIAL) {
114
91
  return `You are a task coordinator for a parallel agent system with ${b} agent sessions available.
115
92
 
116
93
  Objective: ${objective}
117
94
 
118
- AGENT CAPABILITY: ${capability}
95
+ ${constraint}
119
96
 
120
97
  Do NOT over-specify. Give each agent a MISSION, not step-by-step instructions. Let agents make their own decisions about implementation details.
121
98
 
122
99
  Requirements:
123
100
  - Target exactly ~${b} tasks
124
- - Each task should be a substantial piece of work (5-30 minutes of agent time)
101
+ - Each task should be a substantial piece of work
125
102
  - Each task MUST be independent -- no task depends on another
126
103
  - Tasks that run concurrently must touch DIFFERENT files/areas to avoid merge conflicts
127
104
  - Give agents scope and autonomy: "Design and implement X" not "In file Y, add function Z"
@@ -140,7 +117,7 @@ Respond with ONLY a JSON object (no markdown fences):
140
117
 
141
118
  Objective: ${objective}
142
119
 
143
- AGENT CAPABILITY: ${capability}
120
+ ${constraint}
144
121
 
145
122
  With ${b} sessions, you should think BIG:
146
123
  - Full feature implementations spanning multiple files
@@ -156,7 +133,7 @@ With ${b} sessions, you should think BIG:
156
133
 
157
134
  Requirements:
158
135
  - Target exactly ~${b} tasks
159
- - Each task should be substantial: 10-30 minutes of autonomous agent work
136
+ - Each task should be substantial: significant autonomous agent work
160
137
  - Each task MUST be independent -- no task depends on another
161
138
  - Tasks that run concurrently must target DIFFERENT files/areas to avoid merge conflicts
162
139
  - Give agents missions with full autonomy: "Own the entire X subsystem" not "edit line 42 of Y.ts"
@@ -245,7 +222,7 @@ Be thorough -- your findings drive the execution plan.`,
245
222
  }));
246
223
  }
247
224
  export async function orchestrate(objective, designDocs, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote, outFile) {
248
- const capability = modelCapabilityBlock(workerModel);
225
+ const constraint = contextConstraintNote(workerModel);
249
226
  const flexLine = flexNote ? `\n\n${flexNote}` : "";
250
227
  const fileInstruction = outFile ? `\n\nAFTER generating the JSON, also write it to ${outFile} using the Write tool.` : "";
251
228
  const prompt = `You are a tech lead planning a sprint based on your team's codebase research.
@@ -256,7 +233,7 @@ Your architects explored the codebase and found:
256
233
 
257
234
  ${designDocs}
258
235
 
259
- AGENT CAPABILITY: ${capability}
236
+ ${constraint}
260
237
  ${DESIGN_THINKING}
261
238
  Create exactly ~${budget} concrete execution tasks based on these findings.
262
239
 
@@ -298,7 +275,7 @@ Respond with ONLY a JSON object (no markdown fences):
298
275
  export async function refinePlan(objective, previousTasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog) {
299
276
  onLog("Refining plan...");
300
277
  const prev = previousTasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n");
301
- const capability = modelCapabilityBlock(workerModel);
278
+ const constraint = contextConstraintNote(workerModel);
302
279
  const b = budget ?? 10;
303
280
  const scaleNote = b > 50 ? `This is a LARGE budget (${b} sessions). Think big -- missions, not micro-tasks.`
304
281
  : b > 15 ? `Each of the ${b} sessions is a capable AI agent. Give substantial missions, not trivial edits.`
@@ -312,7 +289,7 @@ ${prev}
312
289
 
313
290
  The user wants changes: ${feedback}
314
291
 
315
- AGENT CAPABILITY: ${capability}
292
+ ${constraint}
316
293
 
317
294
  ${scaleNote} ${concurrency} agents run in parallel. Update the plan accordingly. Keep tasks independent and targeting different files/areas.
318
295
 
@@ -66,8 +66,29 @@ export declare function healthCheckCursorProxy(baseUrl?: string): Promise<boolea
66
66
  */
67
67
  export declare function fetchCursorModels(baseUrl?: string): Promise<string[]>;
68
68
  /**
69
- * Interactive setup guide for cursor-api-proxy.
69
+ * Auto-start the cursor-api-proxy as a detached background process.
70
+ *
71
+ * When the proxy is started, we also configure it to use system Node.js
72
+ * for spawning the cursor-agent subprocess. The agent's bundled Node.js
73
+ * segfaults with --list-models on macOS (exit 139), so we resolve the
74
+ * system `node` binary and the agent's index.js, patch the proxy's env.js
75
+ * to respect CURSOR_AGENT_NODE/SCRIPT on Unix, and pass those env vars.
76
+ *
77
+ * Handles:
78
+ * - Proxy already running and verified → returns true immediately
79
+ * - Something on the port but not our proxy → warns, skips spawn
80
+ * - Port in use by nothing responsive → returns true (something bound it)
81
+ * - Proxy not running → spawns `npx cursor-api-proxy` detached, waits for health
82
+ * - Spawn fails (not installed) → returns false, caller falls back to manual instructions
83
+ *
84
+ * Returns true when the proxy is reachable at PROXY_DEFAULT_URL.
85
+ */
86
+ export declare function ensureCursorProxyRunning(baseUrl?: string): Promise<boolean>;
87
+ /**
88
+ * Full install + configure flow for cursor-api-proxy.
70
89
  * Walks through CLI install, API key config, and proxy start.
90
+ * Only needed when the quick auto-start (`ensureCursorProxyRunning`) fails —
91
+ * e.g. npx can't find the package or the user has no API key yet.
71
92
  * Returns true when proxy is running and healthy.
72
93
  */
73
94
  export declare function setupCursorProxy(): Promise<boolean>;