npm - claude-overnight - Versions diffs - 1.19.0 → 1.23.0 - Mend

claude-overnight 1.19.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/_version.d.ts +1 -0
package/dist/_version.js +2 -0
package/dist/cursor-models.js +5 -7
package/dist/index.js +23 -12
package/dist/models.d.ts +20 -24
package/dist/models.js +86 -54
package/dist/planner-query.d.ts +0 -2
package/dist/planner-query.js +1 -4
package/dist/planner.d.ts +1 -1
package/dist/planner.js +20 -43
package/dist/providers.d.ts +26 -1
package/dist/providers.js +458 -93
package/dist/run.js +46 -85
package/dist/steering.js +4 -3
package/dist/swarm.d.ts +2 -0
package/dist/swarm.js +13 -1
package/dist/types.d.ts +1 -1
package/package.json +2 -2
package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1

package/dist/_version.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare const VERSION = "1.23.0";

package/dist/_version.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // Auto-generated by build — do not edit manually.
2	+ export const VERSION = "1.23.0";

package/dist/cursor-models.js CHANGED Viewed

@@ -11,7 +11,7 @@
 // The `priority` models always appear at the top of the picker in this order.
 // `known` models appear after them. Anything the proxy returns dynamically
 // that isn't in this list goes into a "more..." sub-menu.
-import { CURSOR_MODEL_HINTS } from "./models.js";
+import { modelDisplayName, formatContextWindow } from "./models.js";
 export const CURSOR_PRIORITY_MODELS = [
     { id: "composer-2", label: "composer-2", hint: "Cursor Composer 2 — latest, strongest Cursor model" },
     { id: "composer-2-fast", label: "composer-2-fast", hint: "Cursor Composer 2 Fast — faster, cheaper variant" },
@@ -34,12 +34,10 @@ export function cursorModelHint(modelId) {
     }
     if (m.startsWith("composer"))
         return "Cursor Composer model";
-    if (m.includes("opus"))
-        return CURSOR_MODEL_HINTS.opus;
-    if (m.includes("sonnet"))
-        return CURSOR_MODEL_HINTS.sonnet;
-    if (m.includes("haiku"))
-        return CURSOR_MODEL_HINTS.haiku;
+    // For Claude variants through Cursor, use the shared display name
+    const displayName = modelDisplayName(modelId);
+    if (displayName !== modelId)
+        return `${displayName} via Cursor · ${formatContextWindow(modelId)} context`;
     if (m.startsWith("gpt-5"))
         return "GPT model via Cursor";
     if (m.startsWith("gemini"))

package/dist/index.js CHANGED Viewed

@@ -2,16 +2,14 @@
 import { readFileSync, existsSync, readdirSync, mkdirSync } from "fs";
 import { resolve, dirname, join } from "path";
 import { fileURLToPath } from "url";
-import { createRequire } from "module";
 import chalk from "chalk";
-const pkg = createRequire(import.meta.url)("../package.json");
-const VERSION = pkg.version;
+import { VERSION } from "./_version.js";
 import { query } from "@anthropic-ai/claude-agent-sdk";
 import { Swarm } from "./swarm.js";
 import { planTasks, refinePlan, identifyThemes, buildThinkingTasks, orchestrate, salvageFromFile } from "./planner.js";
-import { detectModelTier, setPlannerEnvResolver } from "./planner-query.js";
-import { DEFAULT_MODEL } from "./models.js";
-import { pickModel, loadProviders, preflightProvider, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider } from "./providers.js";
+import { modelDisplayName, formatContextWindow, DEFAULT_MODEL } from "./models.js";
+import { setPlannerEnvResolver } from "./planner-query.js";
+import { pickModel, loadProviders, preflightProvider, buildEnvResolver, healthCheckCursorProxy, PROXY_DEFAULT_URL, isCursorProxyProvider, ensureCursorProxyRunning } from "./providers.js";
 import { RunDisplay } from "./ui.js";
 import { renderSummary } from "./render.js";
 import { executeRun } from "./run.js";
@@ -69,7 +67,6 @@ async function promptResumeOverrides(state, cliFlags, argv, noTTY, runDir) {
     const modelsPromise = fetchModels(20_000).catch(() => []);
     // ── Interactive review ──
     const fmtSummary = () => {
-        const tier = detectModelTier(state.workerModel);
         const remaining = Math.max(1, state.remaining);
         const capStr = state.usageCap != null ? `${Math.round(state.usageCap * 100)}%` : "unlimited";
         const extraStr = state.allowExtraUsage
@@ -78,7 +75,7 @@ async function promptResumeOverrides(state, cliFlags, argv, noTTY, runDir) {
         console.log();
         console.log(`  ${chalk.dim("Resume settings")}`);
         console.log(`  ${chalk.dim("─".repeat(40))}`);
-        console.log(`  ${chalk.dim("model      ")}${chalk.white(state.workerModel)} ${chalk.dim(`(${tier})`)}`);
+        console.log(`  ${chalk.dim("model      ")}${chalk.white(state.workerModel)} ${chalk.dim(`(${formatContextWindow(state.workerModel)} context)`)}`);
         console.log(`  ${chalk.dim("remaining  ")}${chalk.white(String(remaining))} ${chalk.dim("sessions")}`);
         console.log(`  ${chalk.dim("concur     ")}${chalk.white(String(state.concurrency))}`);
         console.log(`  ${chalk.dim("usage cap  ")}${chalk.white(capStr)}`);
@@ -627,11 +624,11 @@ async function main() {
         }
         const parts = [];
         if (fastModel)
-            parts.push(`${detectModelTier(plannerModel)} → ${detectModelTier(workerModel)} + ${detectModelTier(fastModel)}`);
+            parts.push(`${modelDisplayName(plannerModel)} → ${modelDisplayName(workerModel)} + ${modelDisplayName(fastModel)}`);
         else if (workerModel !== plannerModel)
-            parts.push(`${detectModelTier(workerModel)} → ${detectModelTier(plannerModel)}`);
+            parts.push(`${modelDisplayName(workerModel)} → ${modelDisplayName(plannerModel)}`);
         else
-            parts.push(detectModelTier(workerModel));
+            parts.push(modelDisplayName(workerModel));
         parts.push(`budget ${budget}`, `${concurrency}×`);
         if (budget > 2)
             parts.push("flex");
@@ -751,18 +748,31 @@ async function main() {
             ["fast", fastProvider],
         ];
         const pending = [];
+        const cursorProxies = [];
         for (const [role, p] of all) {
             if (p && !seen.has(p.id)) {
                 seen.add(p.id);
                 pending.push([role, p]);
+                if (isCursorProxyProvider(p))
+                    cursorProxies.push(p);
             }
         }
+        // Auto-start cursor proxy before pinging
+        if (cursorProxies.length > 0) {
+            await ensureCursorProxyRunning();
+        }
         process.stdout.write(`  ${chalk.dim(`◆ Pinging ${pending.map(([r, p]) => `${r} (${p.displayName})`).join(", ")}...`)}\n`);
         const results = await Promise.all(pending.map(async ([role, p]) => ({ role, provider: p, result: await preflightProvider(p, cwd) })));
         for (const { role, provider, result } of results) {
             if (!result.ok) {
                 console.error(chalk.red(`  ✗ ${role} preflight failed: ${chalk.dim(result.error)}`));
-                console.error(chalk.red(`\n  Fix the provider at ~/.claude/claude-overnight/providers.json and retry.\n`));
+                if (isCursorProxyProvider(provider)) {
+                    console.error(chalk.yellow(`  The proxy at ${PROXY_DEFAULT_URL} may have crashed. Start it: npx cursor-api-proxy`));
+                }
+                else {
+                    console.error(chalk.red(`  Fix the provider at ~/.claude/claude-overnight/providers.json and retry.`));
+                }
+                console.error("");
                 process.exit(1);
             }
             console.log(`  ${chalk.green(`✓ ${role} ready`)} ${chalk.dim(`· ${provider.displayName} · ${provider.model}`)}`);
@@ -899,6 +909,7 @@ async function main() {
                         tasks: thinkingTasks, concurrency, cwd, model: plannerModel, permissionMode,
                         useWorktrees: false, mergeStrategy: "yolo", agentTimeoutMs, usageCap, allowExtraUsage, extraUsageBudget,
                         envForModel,
+                        cursorProxy: [plannerProvider, workerProvider, fastProvider].some(p => p && isCursorProxyProvider(p)),
                     });
                     const thinkRunInfo = { accIn: 0, accOut: 0, accCost: 0, accCompleted: 0, accFailed: 0, sessionsBudget: budget ?? 10, waveNum: -1, remaining: budget ?? 10, model: plannerModel, startedAt: Date.now() };
                     const thinkDisplay = new RunDisplay(thinkRunInfo, { remaining: 0, usageCap, concurrency, paused: false, dirty: false });

package/dist/models.d.ts CHANGED Viewed

@@ -1,27 +1,23 @@
-export declare const MODEL_TIER_OPUS = "opus";
-export declare const MODEL_TIER_SONNET = "sonnet";
-export declare const MODEL_TIER_HAIKU = "haiku";
-export declare const MODEL_TIER_UNKNOWN = "unknown";
-export type ModelTier = typeof MODEL_TIER_OPUS | typeof MODEL_TIER_SONNET | typeof MODEL_TIER_HAIKU | typeof MODEL_TIER_UNKNOWN;
-export interface TierDetectionRule {
-    match: (model: string) => boolean;
-    tier: ModelTier;
+export interface ModelCapability {
+    contextWindow: number;
+    contextConstraint: "tight" | "moderate" | "relaxed";
+    /** Human-readable label for UI display. Falls back to the model key if absent. */
+    displayName?: string;
 }
-export declare const TIER_DETECTION_RULES: TierDetectionRule[];
-export declare function detectModelTier(model: string): ModelTier;
-export declare const MODEL_CAPABILITY_DESCRIPTIONS: Record<ModelTier, string>;
-export declare const UNKNOWN_MODEL_CAPABILITIES: Record<string, string>;
-export declare function modelCapabilityBlock(model: string): string;
+export declare const MODEL_CAPABILITIES: Record<string, ModelCapability>;
 export declare const DEFAULT_MODEL = "claude-sonnet-4-6";
 export declare const FALLBACK_MODEL = "claude-opus-4-6";
-export declare const PLANNER_THRESHOLDS: {
-    opus: {
-        small: number;
-        medium: number;
-    };
-    default: {
-        small: number;
-        medium: number;
-    };
-};
-export declare const CURSOR_MODEL_HINTS: Record<string, string>;
+/**
+ * Find capability info for a model string. Tries: exact match → lowercase
+ * exact → substring match. Falls back to "unknown" entry.
+ */
+export declare function getModelCapability(model: string): ModelCapability;
+/** Human-readable model name for display (e.g. in run labels). */
+export declare function modelDisplayName(model: string): string;
+/**
+ * Context constraint instruction injected into planner prompts.
+ * Tells the planner how to scope tasks based on the worker model's context.
+ */
+export declare function contextConstraintNote(model: string): string;
+/** Format context window for display (e.g. "256K"). */
+export declare function formatContextWindow(model: string): string;

package/dist/models.js CHANGED Viewed

@@ -1,60 +1,92 @@
-// ── Model tier constants ──
-export const MODEL_TIER_OPUS = "opus";
-export const MODEL_TIER_SONNET = "sonnet";
-export const MODEL_TIER_HAIKU = "haiku";
-export const MODEL_TIER_UNKNOWN = "unknown";
-export const TIER_DETECTION_RULES = [
-    { match: m => m === "default" || m.includes("opus"), tier: MODEL_TIER_OPUS },
-    { match: m => m.includes("sonnet"), tier: MODEL_TIER_SONNET },
-    { match: m => m.includes("haiku"), tier: MODEL_TIER_HAIKU },
-    { match: m => m === "auto", tier: MODEL_TIER_UNKNOWN },
-    { match: m => m.startsWith("composer"), tier: MODEL_TIER_SONNET },
-    { match: m => m.startsWith("gpt-5") || m.startsWith("gemini") || m.startsWith("grok"), tier: MODEL_TIER_SONNET },
-];
-export function detectModelTier(model) {
+// ── Model capability catalog ──
+//
+// Single source of truth for model capabilities. Update this when new models
+// arrive (which happens basically daily). Each entry describes what the model
+// can handle in terms of context and task scoping.
+//
+// contextConstraint:
+//   "tight"    — small context window. Model is lazy and error-prone on big
+//                tasks. Needs surgical, hyper-specific instructions.
+//   "moderate" — decent context. Can handle focused missions but may lose
+//                thread on sprawling codebases.
+//   "relaxed"  — large context. Can read most of the codebase at once,
+//                reliably own multi-file features with autonomy.
+export const MODEL_CAPABILITIES = {
+    // ── Anthropic Claude 4.5 / 4.6 ──
+    "claude-sonnet-4-6": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Sonnet 4.6" },
+    "claude-sonnet-4-5": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Sonnet 4.5" },
+    "claude-opus-4-6": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4.6" },
+    "claude-opus-4-5": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4.5" },
+    "claude-opus-4-20250514": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Opus 4" },
+    "claude-haiku-4-5": { contextWindow: 200_000, contextConstraint: "moderate", displayName: "Haiku 4.5" },
+    "claude-haiku-4-5-20251001": { contextWindow: 200_000, contextConstraint: "moderate", displayName: "Haiku 4.5" },
+    // ── Cursor models ──
+    "auto": { contextWindow: 256_000, contextConstraint: "relaxed", displayName: "Cursor Auto" },
+    "composer-2": { contextWindow: 200_000, contextConstraint: "relaxed", displayName: "Composer 2" },
+    "composer-2-fast": { contextWindow: 128_000, contextConstraint: "moderate", displayName: "Composer 2 Fast" },
+    "composer": { contextWindow: 128_000, contextConstraint: "moderate", displayName: "Composer" },
+    // ── Qwen (via DashScope / custom provider) ──
+    "qwen3.6-plus": { contextWindow: 131_072, contextConstraint: "moderate", displayName: "Qwen 3.6 Plus" },
+    "qwen3-coder": { contextWindow: 262_144, contextConstraint: "relaxed", displayName: "Qwen 3 Coder" },
+    "qwen-max": { contextWindow: 32_768, contextConstraint: "tight", displayName: "Qwen Max" },
+    // ── Fallback for unknown models ──
+    "unknown": { contextWindow: 128_000, contextConstraint: "moderate" },
+};
+// ── Default / fallback models ──
+export const DEFAULT_MODEL = "claude-sonnet-4-6";
+export const FALLBACK_MODEL = "claude-opus-4-6"; // used for planner + worker recovery
+// ── Lookup ──
+/**
+ * Find capability info for a model string. Tries: exact match → lowercase
+ * exact → substring match. Falls back to "unknown" entry.
+ */
+export function getModelCapability(model) {
     const m = model.toLowerCase();
-    for (const rule of TIER_DETECTION_RULES) {
-        if (rule.match(m))
-            return rule.tier;
+    if (MODEL_CAPABILITIES[m])
+        return MODEL_CAPABILITIES[m];
+    if (MODEL_CAPABILITIES[model])
+        return MODEL_CAPABILITIES[model];
+    for (const [key, cap] of Object.entries(MODEL_CAPABILITIES)) {
+        if (key !== "unknown" && m.includes(key))
+            return cap;
     }
-    return MODEL_TIER_UNKNOWN;
+    return MODEL_CAPABILITIES.unknown;
 }
-// ── Capability descriptions ──
-export const MODEL_CAPABILITY_DESCRIPTIONS = {
-    opus: "Each agent runs Claude Opus with 1M context  -- a powerhouse. It can own entire epics, do deep codebase research, make architectural decisions, implement complex multi-file systems end-to-end, use browser tools for analysis, and deliver expert-level work. These agents can work for 30+ minutes on the most complex tasks. Do NOT waste them on trivial edits  -- give them ownership and autonomy.",
-    sonnet: "Each agent runs Claude Sonnet  -- capable of substantial implementation, refactoring, testing, and design work. Can work autonomously for 10-20 minutes on complex tasks. Give agents meaningful scope  -- not just single-line edits.",
-    haiku: "Each agent runs Claude Haiku  -- fast and efficient, best for focused, well-specified tasks. Be explicit about files, functions, and expected changes. Keep tasks scoped to a clear, concrete deliverable.",
-    unknown: "", // handled by UNKNOWN_MODEL_CAPABILITIES below
-};
-export const UNKNOWN_MODEL_CAPABILITIES = {
-    composer: "Each agent runs a Cursor Composer model with full codebase access. Capable of focused implementation work. Be explicit about files, functions, and expected changes.",
-    "gpt-5": "Each agent runs a GPT model via Cursor with full codebase access. Capable of focused implementation work. Be explicit about files, functions, and expected changes.",
-    gemini: "Each agent runs a Gemini model via Cursor with full codebase access. Be explicit about files, functions, and expected changes.",
-    grok: "Each agent runs a Grok model via Cursor with full codebase access. Be explicit about files, functions, and expected changes.",
-};
-export function modelCapabilityBlock(model) {
-    const tier = detectModelTier(model);
-    const cap = MODEL_CAPABILITY_DESCRIPTIONS[tier];
-    if (cap)
-        return cap;
+/** Human-readable model name for display (e.g. in run labels). */
+export function modelDisplayName(model) {
     const m = model.toLowerCase();
-    for (const [prefix, desc] of Object.entries(UNKNOWN_MODEL_CAPABILITIES)) {
-        if (m.startsWith(prefix))
-            return desc;
+    // Exact match
+    if (MODEL_CAPABILITIES[m]?.displayName)
+        return MODEL_CAPABILITIES[m].displayName;
+    if (MODEL_CAPABILITIES[model]?.displayName)
+        return MODEL_CAPABILITIES[model].displayName;
+    // Substring match
+    for (const [key, cap] of Object.entries(MODEL_CAPABILITIES)) {
+        if (key !== "unknown" && m.includes(key) && cap.displayName)
+            return cap.displayName;
     }
-    return `Each agent has full codebase access and can work autonomously.`;
+    return model;
+}
+/**
+ * Context constraint instruction injected into planner prompts.
+ * Tells the planner how to scope tasks based on the worker model's context.
+ */
+export function contextConstraintNote(model) {
+    const cap = getModelCapability(model);
+    const ctx = Math.round(cap.contextWindow / 1000);
+    switch (cap.contextConstraint) {
+        case "tight":
+            return `Worker agents have a TIGHT context window (~${ctx}K tokens). They are prone losing thread on large tasks. Be hyper-specific: name exact files, functions, and changes. One narrow deliverable per task. No ambiguity.`;
+        case "moderate":
+            return `Worker agents have a moderate context window (~${ctx}K tokens). They can handle focused missions but may struggle with sprawling codebases. Be specific about files and expected outcomes. Scope tasks to clear, concrete deliverables.`;
+        case "relaxed":
+            return `Worker agents have a large context window (~${ctx}K tokens). They can read most of the codebase at once and reliably own multi-file features. Give them missions with full autonomy — "Design and implement X" not "edit line 42 of Y.ts".`;
+    }
+}
+/** Format context window for display (e.g. "256K"). */
+export function formatContextWindow(model) {
+    const cap = getModelCapability(model);
+    if (cap.contextWindow >= 1_000_000)
+        return `${(cap.contextWindow / 1_000_000).toFixed(1)}M`;
+    return `${Math.round(cap.contextWindow / 1000)}K`;
 }
-// ── Default / fallback models ──
-export const DEFAULT_MODEL = "claude-sonnet-4-6";
-export const FALLBACK_MODEL = "claude-opus-4-6"; // used for planner + worker recovery
-// ── Planner thresholds (opus-tuned vs default) ──
-export const PLANNER_THRESHOLDS = {
-    opus: { small: 5, medium: 30 },
-    default: { small: 15, medium: 50 },
-};
-// ── Cursor model hints ──
-export const CURSOR_MODEL_HINTS = {
-    opus: "Opus-tier model via Cursor",
-    sonnet: "Sonnet-tier model via Cursor",
-    haiku: "Haiku-tier model via Cursor (fast)",
-};

package/dist/planner-query.d.ts CHANGED Viewed

@@ -1,6 +1,4 @@
 import type { Task, PermMode, RateLimitWindow } from "./types.js";
-import { detectModelTier, modelCapabilityBlock } from "./models.js";
-export { detectModelTier, modelCapabilityBlock };
 /**
  * Logging callback used by planner/steering queries.
  * `kind` distinguishes ephemeral status updates (heartbeat ticker) from

package/dist/planner-query.js CHANGED Viewed

@@ -1,9 +1,6 @@
 import { query } from "@anthropic-ai/claude-agent-sdk";
 import { readFileSync } from "fs";
 import { NudgeError } from "./types.js";
-import { detectModelTier, modelCapabilityBlock } from "./models.js";
-// Re-export for consumers that import from planner-query (steering.ts, index.ts).
-export { detectModelTier, modelCapabilityBlock };
 // ── Shared env resolver (set once at run start, used by every planner query) ──
 //
 // Swarm and planner calls share a model→env map so a custom provider configured
@@ -25,7 +22,7 @@ let _plannerRateLimitInfo = {
     utilization: 0, status: "", isUsingOverage: false, windows: new Map(), costUsd: 0,
 };
 export function getPlannerRateLimitInfo() { return _plannerRateLimitInfo; }
-// ── Proactive throttle: wait before making API calls when utilization is high ──
+// ── Proactive rate-limit gate ──
 /**
  * Proactive rate-limit gate. Called before each planner/steering query to
  * prevent hammering the API when we're already near a limit.

package/dist/planner.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { Task, PermMode } from "./types.js";
 export declare function salvageFromFile(outFile: string | undefined, budget: number | undefined, onLog: (text: string, kind?: "status" | "event") => void, why: string): Task[] | null;
-export declare const DESIGN_THINKING = "\nHOW TO THINK ABOUT EVERY TASK:\n\nStart from the user's job. What is someone hiring this product to do? \"I need to send money abroad cheaply\"  -- not \"I need a currency conversion API.\" Every decision  -- what to build, how fast it responds, what happens on error  -- flows from the job.\n\nThe experience IS the product. A 200ms server response is not a \"performance metric\"  -- it's the difference between an app that feels alive and one that feels broken. A loading state is not \"polish\"  -- it's the user knowing the app heard them. An error message is not \"error handling\"  -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render  -- they're all one experience the user either trusts or doesn't.\n\nBuild the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.\n\nConsistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.\n";
+export declare const DESIGN_THINKING = "\nHOW TO THINK ABOUT EVERY TASK:\n\nStart from the user's job. What is someone hiring this product to do? \"I need to send money abroad cheaply\"  -- not \"I need a currency conversion API.\" Every decision  -- what to build, how fast it needs to respond, what happens on error  -- flows from the job.\n\nThe experience IS the product. A 200ms server response is not a \"performance metric\"  -- it's the difference between an app that feels alive and one that feels broken. A loading state is not \"polish\"  -- it's the user knowing the app heard them. An error message is not \"error handling\"  -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render  -- they're all one experience the user either trusts or doesn't.\n\nBuild the core, verify it works, learn, iterate. Don't plan 20 features and build them all. Build the ONE thing that matters most, run it, see if it actually works from a user's chair. What you learn from seeing it run will change what you build next. Each wave should make what exists better before adding what doesn't exist yet.\n\nConsistency is what makes complex things feel simple. One design system, rigid rules, no exceptions. This is how Revolut ships a super-app with 30+ features that doesn't feel like chaos.\n";
 export declare function planTasks(objective: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void, flexNote?: string, outFile?: string): Promise<Task[]>;
 export declare function identifyThemes(objective: string, count: number, cwd: string, model: string, permissionMode: PermMode, onLog?: (text: string) => void): Promise<string[]>;
 export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string, previousKnowledge?: string): Task[];

package/dist/planner.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { readFileSync } from "fs";
 import { runPlannerQuery, extractTaskJson, attemptJsonParse, postProcess } from "./planner-query.js";
-import { detectModelTier, modelCapabilityBlock, MODEL_TIER_HAIKU, MODEL_TIER_OPUS, PLANNER_THRESHOLDS } from "./models.js";
+import { contextConstraintNote } from "./models.js";
 // Resilience: if the planner query throws but the agent already wrote valid
 // tasks to `outFile` (via its Write tool), salvage them instead of discarding
 // expensive work. Returns salvaged tasks on success, null if nothing usable on
@@ -29,7 +29,7 @@ export function salvageFromFile(outFile, budget, onLog, why) {
 export const DESIGN_THINKING = `
 HOW TO THINK ABOUT EVERY TASK:
-Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply"  -- not "I need a currency conversion API." Every decision  -- what to build, how fast it responds, what happens on error  -- flows from the job.
+Start from the user's job. What is someone hiring this product to do? "I need to send money abroad cheaply"  -- not "I need a currency conversion API." Every decision  -- what to build, how fast it needs to respond, what happens on error  -- flows from the job.
 The experience IS the product. A 200ms server response is not a "performance metric"  -- it's the difference between an app that feels alive and one that feels broken. A loading state is not "polish"  -- it's the user knowing the app heard them. An error message is not "error handling"  -- it's the app being honest. There is no line between backend and UX. The server, the API, the database query, the render  -- they're all one experience the user either trusts or doesn't.
@@ -54,53 +54,30 @@ const THEMES_SCHEMA = {
         required: ["themes"],
     },
 };
-// ── Budget + model aware prompt strategy ──
+// ── Budget breakpoints for prompt strategy ──
+const BUDGET_FOCUSED = 10; // ≤ this → surgical, file-specific tasks
+const BUDGET_SUBSTANTIAL = 30; // ≤ this → mission-level, autonomous agents
+// ── Context-aware prompt strategy ──
 function plannerPrompt(objective, workerModel, budget, concurrency, flexNote) {
-    const b = budget ?? 10;
-    const tier = detectModelTier(workerModel);
-    const capability = modelCapabilityBlock(workerModel);
+    const b = budget ?? BUDGET_FOCUSED;
+    const constraint = contextConstraintNote(workerModel);
     const concLine = concurrency
         ? `\n- ${concurrency} agents run in parallel  -- tasks that run concurrently must touch DIFFERENT files to avoid merge conflicts`
         : "";
     const flexLine = flexNote ? `\n\n${flexNote}` : "";
-    if (tier === MODEL_TIER_HAIKU) {
+    if (b <= BUDGET_FOCUSED) {
         return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
 Objective: ${objective}
-AGENT CAPABILITY: ${capability}
+${constraint}
 Requirements:
 - Target exactly ~${b} tasks
 - Each task MUST be independent  -- no task depends on another
 - Each task should target specific files/areas to avoid merge conflicts
 - Be specific: mention exact file paths, function names, what to change
-- Keep tasks focused: one concrete change per task  -- Haiku agents work best with clear, scoped instructions${concLine}${flexLine}
-Respond with ONLY a JSON object (no markdown fences):
-{
-  "tasks": [
-    { "prompt": "In src/foo.ts, refactor the bar() function to..." },
-    { "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
-  ]
-}`;
-    }
-    const thresholds = tier === MODEL_TIER_OPUS ? PLANNER_THRESHOLDS.opus : PLANNER_THRESHOLDS.default;
-    const smallThreshold = thresholds.small;
-    const mediumThreshold = thresholds.medium;
-    if (b <= smallThreshold) {
-        return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
-Objective: ${objective}
-AGENT CAPABILITY: ${capability}
-Requirements:
-- Each task MUST be independent  -- no task depends on another
-- Each task should target specific files/areas to avoid merge conflicts
-- Be specific: mention exact file paths, function names, what to change
-- Keep tasks focused: one logical change per task
-- Target exactly ~${b} tasks${concLine}${flexLine}
+- Keep tasks focused: one concrete change per task${concLine}${flexLine}
 Respond with ONLY a JSON object (no markdown fences):
 {
@@ -110,18 +87,18 @@ Respond with ONLY a JSON object (no markdown fences):
   ]
 }`;
     }
-    if (b <= mediumThreshold) {
+    if (b <= BUDGET_SUBSTANTIAL) {
         return `You are a task coordinator for a parallel agent system with ${b} agent sessions available.
 Objective: ${objective}
-AGENT CAPABILITY: ${capability}
+${constraint}
 Do NOT over-specify. Give each agent a MISSION, not step-by-step instructions. Let agents make their own decisions about implementation details.
 Requirements:
 - Target exactly ~${b} tasks
-- Each task should be a substantial piece of work (5-30 minutes of agent time)
+- Each task should be a substantial piece of work
 - Each task MUST be independent  -- no task depends on another
 - Tasks that run concurrently must touch DIFFERENT files/areas to avoid merge conflicts
 - Give agents scope and autonomy: "Design and implement X" not "In file Y, add function Z"
@@ -140,7 +117,7 @@ Respond with ONLY a JSON object (no markdown fences):
 Objective: ${objective}
-AGENT CAPABILITY: ${capability}
+${constraint}
 With ${b} sessions, you should think BIG:
 - Full feature implementations spanning multiple files
@@ -156,7 +133,7 @@ With ${b} sessions, you should think BIG:
 Requirements:
 - Target exactly ~${b} tasks
-- Each task should be substantial: 10-30 minutes of autonomous agent work
+- Each task should be substantial: significant autonomous agent work
 - Each task MUST be independent  -- no task depends on another
 - Tasks that run concurrently must target DIFFERENT files/areas to avoid merge conflicts
 - Give agents missions with full autonomy: "Own the entire X subsystem" not "edit line 42 of Y.ts"
@@ -245,7 +222,7 @@ Be thorough  -- your findings drive the execution plan.`,
     }));
 }
 export async function orchestrate(objective, designDocs, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote, outFile) {
-    const capability = modelCapabilityBlock(workerModel);
+    const constraint = contextConstraintNote(workerModel);
     const flexLine = flexNote ? `\n\n${flexNote}` : "";
     const fileInstruction = outFile ? `\n\nAFTER generating the JSON, also write it to ${outFile} using the Write tool.` : "";
     const prompt = `You are a tech lead planning a sprint based on your team's codebase research.
@@ -256,7 +233,7 @@ Your architects explored the codebase and found:
 ${designDocs}
-AGENT CAPABILITY: ${capability}
+${constraint}
 ${DESIGN_THINKING}
 Create exactly ~${budget} concrete execution tasks based on these findings.
@@ -298,7 +275,7 @@ Respond with ONLY a JSON object (no markdown fences):
 export async function refinePlan(objective, previousTasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog) {
     onLog("Refining plan...");
     const prev = previousTasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n");
-    const capability = modelCapabilityBlock(workerModel);
+    const constraint = contextConstraintNote(workerModel);
     const b = budget ?? 10;
     const scaleNote = b > 50 ? `This is a LARGE budget (${b} sessions). Think big  -- missions, not micro-tasks.`
         : b > 15 ? `Each of the ${b} sessions is a capable AI agent. Give substantial missions, not trivial edits.`
@@ -312,7 +289,7 @@ ${prev}
 The user wants changes: ${feedback}
-AGENT CAPABILITY: ${capability}
+${constraint}
 ${scaleNote} ${concurrency} agents run in parallel. Update the plan accordingly. Keep tasks independent and targeting different files/areas.

package/dist/providers.d.ts CHANGED Viewed

@@ -58,6 +58,7 @@ export declare const PROXY_DEFAULT_URL = "http://127.0.0.1:8765";
 export declare function isCursorProxyProvider(p: ProviderConfig): boolean;
 /**
  * Health check: GET /health on the proxy. Returns true if proxy is reachable.
+ * Passes the stored API key so the /health endpoint doesn't return 401.
  */
 export declare function healthCheckCursorProxy(baseUrl?: string): Promise<boolean>;
 /**
@@ -66,8 +67,32 @@ export declare function healthCheckCursorProxy(baseUrl?: string): Promise<boolea
  */
 export declare function fetchCursorModels(baseUrl?: string): Promise<string[]>;
 /**
- * Interactive setup guide for cursor-api-proxy.
+ * Auto-start the cursor-api-proxy as a detached background process.
+ *
+ * When the proxy is started, we also configure it to use system Node.js
+ * for spawning the cursor-agent subprocess. The agent's bundled Node.js
+ * segfaults with --list-models on macOS (exit 139), so we resolve the
+ * system `node` binary and the agent's index.js, patch the proxy's env.js
+ * to respect CURSOR_AGENT_NODE/SCRIPT on Unix, and pass those env vars.
+ *
+ * Handles:
+ *  - Proxy already running and verified → returns true immediately
+ *  - Something on the port but not our proxy → warns, skips spawn
+ *  - Port in use by nothing responsive → returns true (something bound it)
+ *  - Proxy not running → spawns `npx cursor-api-proxy` detached, waits for health
+ *  - Spawn fails (not installed) → returns false, caller falls back to manual instructions
+ *
+ * When `forceRestart` is true and a stale process is on the port, it will be
+ * killed and the proxy restarted.
+ *
+ * Returns true when the proxy is reachable at PROXY_DEFAULT_URL.
+ */
+export declare function ensureCursorProxyRunning(baseUrl?: string, forceRestart?: boolean): Promise<boolean>;
+/**
+ * Full install + configure flow for cursor-api-proxy.
  * Walks through CLI install, API key config, and proxy start.
+ * Only needed when the quick auto-start (`ensureCursorProxyRunning`) fails —
+ * e.g. npx can't find the package or the user has no API key yet.
  * Returns true when proxy is running and healthy.
  */
 export declare function setupCursorProxy(): Promise<boolean>;