npm - claude-overnight - Versions diffs - 0.1.2 → 0.3.2 - Mend

claude-overnight 0.1.2 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/planner.js CHANGED Viewed

@@ -1,17 +1,76 @@
 import { query } from "@anthropic-ai/claude-agent-sdk";
 const INACTIVITY_MS = 5 * 60 * 1000;
-function plannerPrompt(objective, budget, concurrency) {
-    const budgetLine = budget ? `\n- Target exactly ~${budget} tasks (this is the user's agent budget)` : "\n- Aim for 3-15 tasks depending on scope";
-    const concLine = concurrency ? `\n- ${concurrency} agents will run in parallel — design tasks so parallel agents touch DIFFERENT files to avoid merge conflicts` : "";
-    return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
+export function detectModelTier(model) {
+    const m = model.toLowerCase();
+    if (m.includes("opus"))
+        return "opus";
+    if (m.includes("sonnet"))
+        return "sonnet";
+    if (m.includes("haiku"))
+        return "haiku";
+    return "unknown";
+}
+function modelCapabilityBlock(model) {
+    switch (detectModelTier(model)) {
+        case "opus":
+            return `Each agent runs Claude Opus with 1M context — a powerhouse. It can own entire epics, do deep codebase research, make architectural decisions, implement complex multi-file systems end-to-end, use browser tools for analysis, and deliver expert-level work. These agents can work for 30+ minutes on the most complex tasks. Do NOT waste them on trivial edits — give them ownership and autonomy.`;
+        case "sonnet":
+            return `Each agent runs Claude Sonnet — capable of substantial implementation, refactoring, testing, and design work. Can work autonomously for 10-20 minutes on complex tasks. Give agents meaningful scope — not just single-line edits.`;
+        case "haiku":
+            return `Each agent runs Claude Haiku — fast and efficient, best for focused, well-specified tasks. Be explicit about files, functions, and expected changes. Keep each task scoped to a clear, concrete deliverable.`;
+        default:
+            return `Each agent has full codebase access and can work autonomously.`;
+    }
+}
+// ── Budget + model aware prompt strategy ──
+function plannerPrompt(objective, workerModel, budget, concurrency, flexNote) {
+    const b = budget ?? 10;
+    const tier = detectModelTier(workerModel);
+    const capability = modelCapabilityBlock(workerModel);
+    const concLine = concurrency
+        ? `\n- ${concurrency} agents run in parallel — tasks that run concurrently must touch DIFFERENT files to avoid merge conflicts`
+        : "";
+    const flexLine = flexNote ? `\n\n${flexNote}` : "";
+    // Haiku always gets specific guided tasks regardless of budget
+    if (tier === "haiku") {
+        return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
+Objective: ${objective}
+AGENT CAPABILITY: ${capability}
+Requirements:
+- Target exactly ~${b} tasks
+- Each task MUST be independent — no task depends on another
+- Each task should target specific files/areas to avoid merge conflicts
+- Be specific: mention exact file paths, function names, what to change
+- Keep tasks focused: one concrete change per task — Haiku agents work best with clear, scoped instructions${concLine}${flexLine}
+Respond with ONLY a JSON object (no markdown fences):
+{
+  "tasks": [
+    { "prompt": "In src/foo.ts, refactor the bar() function to..." },
+    { "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
+  ]
+}`;
+    }
+    // Opus gets ambitious missions even at moderate budgets
+    const smallThreshold = tier === "opus" ? 5 : 15;
+    const mediumThreshold = tier === "opus" ? 30 : 50;
+    // Small budget: specific tasks
+    if (b <= smallThreshold) {
+        return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
 Objective: ${objective}
+AGENT CAPABILITY: ${capability}
 Requirements:
 - Each task MUST be independent — no task depends on another
 - Each task should target specific files/areas to avoid merge conflicts
 - Be specific: mention exact file paths, function names, what to change
-- Keep tasks focused: one logical change per task${budgetLine}${concLine}
+- Keep tasks focused: one logical change per task
+- Target exactly ~${b} tasks${concLine}${flexLine}
 Respond with ONLY a JSON object (no markdown fences):
 {
@@ -19,10 +78,77 @@ Respond with ONLY a JSON object (no markdown fences):
     { "prompt": "In src/foo.ts, refactor the bar() function to..." },
     { "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
   ]
+}`;
+    }
+    // Medium budget: substantial missions with autonomy
+    if (b <= mediumThreshold) {
+        return `You are a task coordinator for a parallel agent system with ${b} agent sessions available.
+Objective: ${objective}
+AGENT CAPABILITY: ${capability}
+Do NOT over-specify. Give each agent a MISSION, not step-by-step instructions. Let agents make their own decisions about implementation details.
+Requirements:
+- Target exactly ~${b} tasks
+- Each task should be a substantial piece of work (5-30 minutes of agent time)
+- Each task MUST be independent — no task depends on another
+- Tasks that run concurrently must touch DIFFERENT files/areas to avoid merge conflicts
+- Give agents scope and autonomy: "Design and implement X" not "In file Y, add function Z"
+- Include research/exploration tasks, design tasks, implementation tasks, testing tasks, and polish tasks
+- Think in terms of workstreams: architecture, features, tests, docs, UX, performance, etc.${concLine}${flexLine}
+Respond with ONLY a JSON object (no markdown fences):
+{
+  "tasks": [
+    { "prompt": "Design and implement the complete user favorites system: database schema, API routes, client hooks, and error handling. Research existing patterns in the codebase first." },
+    { "prompt": "Audit all existing API routes for consistency, error handling, and input validation. Fix any issues found." }
+  ]
+}`;
+    }
+    // Large budget: ambitious multi-workstream decomposition
+    return `You are a task coordinator for a parallel agent system with ${b} agent sessions available. This is a LARGE budget — equivalent to months of professional engineering work.
+Objective: ${objective}
+AGENT CAPABILITY: ${capability}
+With ${b} sessions, you should think BIG:
+- Full feature implementations spanning multiple files
+- Deep refactoring of entire subsystems
+- Comprehensive test suites for each module
+- UX audits and polishing passes
+- Performance optimization investigations
+- Security audits and hardening
+- Documentation and code quality passes
+- Multiple iterations of the same area (implement, then separately review/improve)
+- Edge case handling, error recovery, accessibility
+- Integration testing across features
+Requirements:
+- Target exactly ~${b} tasks
+- Each task should be substantial: 10-30 minutes of autonomous agent work
+- Each task MUST be independent — no task depends on another
+- Tasks that run concurrently must target DIFFERENT files/areas to avoid merge conflicts
+- Give agents missions with full autonomy: "Own the entire X subsystem" not "edit line 42 of Y.ts"
+- Cover ALL aspects: architecture, implementation, testing, UX, performance, security, polish
+- It's OK to have multiple tasks for the same area if they target different concerns (e.g. one implements, another writes tests, another does a UX polish pass)
+- Organize by workstreams: core features, supporting infrastructure, quality, polish
+- Think about what a team of ${b} senior engineers could accomplish in parallel${concLine}${flexLine}
+Respond with ONLY a JSON object (no markdown fences):
+{
+  "tasks": [
+    { "prompt": "Own the complete implementation of [feature X]: research the codebase for patterns, design the architecture, implement the database layer, API routes, and client hooks. Make it production-ready." },
+    { "prompt": "Comprehensive test suite for [module Y]: unit tests, integration tests, edge cases, error scenarios. Aim for high coverage and meaningful assertions." },
+    { "prompt": "UX audit and polish pass on [area Z]: review all user-facing flows, improve error messages, loading states, empty states, and micro-interactions." }
+  ]
 }`;
 }
 async function runPlannerQuery(prompt, opts, onLog) {
     let resultText = "";
+    const startedAt = Date.now();
     const pq = query({
         prompt,
         options: {
@@ -36,6 +162,17 @@ async function runPlannerQuery(prompt, opts, onLog) {
             includePartialMessages: true,
         },
     });
+    // Progress ticker — show elapsed time so it doesn't look frozen
+    let lastLogText = "";
+    let toolCount = 0;
+    const ticker = setInterval(() => {
+        const elapsed = Math.round((Date.now() - startedAt) / 1000);
+        const m = Math.floor(elapsed / 60);
+        const s = elapsed % 60;
+        const timeStr = m > 0 ? `${m}m ${s}s` : `${s}s`;
+        const extra = lastLogText ? ` — ${lastLogText}` : "";
+        onLog(`${timeStr} elapsed, ${toolCount} tool calls${extra}`);
+    }, 3000);
     let lastActivity = Date.now();
     let timer;
     const watchdog = new Promise((_, reject) => {
@@ -55,8 +192,21 @@ async function runPlannerQuery(prompt, opts, onLog) {
             lastActivity = Date.now();
             if (msg.type === "stream_event") {
                 const ev = msg.event;
-                if (ev?.type === "content_block_start" && ev.content_block?.type === "tool_use")
+                if (ev?.type === "content_block_start" && ev.content_block?.type === "tool_use") {
+                    toolCount++;
+                    lastLogText = ev.content_block.name;
                     onLog(ev.content_block.name);
+                }
+                // Stream text snippets so the user sees the planner is thinking
+                if (ev?.type === "content_block_delta") {
+                    const delta = ev.delta;
+                    if (delta?.type === "text_delta" && delta.text) {
+                        const snippet = delta.text.trim();
+                        if (snippet.length > 3) {
+                            lastLogText = snippet.slice(0, 60);
+                        }
+                    }
+                }
             }
             if (msg.type === "result") {
                 if (msg.subtype === "success")
@@ -71,10 +221,11 @@ async function runPlannerQuery(prompt, opts, onLog) {
     }
     finally {
         clearTimeout(timer);
+        clearInterval(ticker);
     }
     return resultText;
 }
-function postProcess(raw, onLog) {
+function postProcess(raw, budget, onLog) {
     let tasks = raw;
     // Filter garbage (< 3 words)
     const before = tasks.length;
@@ -92,7 +243,7 @@ function postProcess(raw, onLog) {
                 continue;
             const setB = new Set(tasks[j].prompt.toLowerCase().split(/\s+/));
             const shared = [...setA].filter((w) => setB.has(w)).length;
-            const overlap = shared / Math.min(setA.size, setB.size);
+            const overlap = shared / Math.max(setA.size, setB.size);
             if (overlap > 0.8) {
                 const drop = setA.size >= setB.size ? j : i;
                 dominated.add(drop);
@@ -105,48 +256,44 @@ function postProcess(raw, onLog) {
         tasks = tasks.filter((_, i) => !dominated.has(i));
         onLog(`Deduplicated to ${tasks.length} tasks`);
     }
-    // Warn on compound tasks
-    for (const t of tasks) {
-        const parts = t.prompt.split(/\s+and\s+/i);
-        if (parts.length >= 2 && parts.every((p) => p.trim().split(/\s+/).length >= 3)) {
-            onLog(`Task ${t.id} looks compound — consider splitting`);
+    // Warn on file overlap (only for small budgets where tasks are file-specific)
+    if ((budget ?? 10) <= 15) {
+        const fileRe = /(?:^|\s)((?:[\w.-]+\/)+[\w.-]+\.\w+)/g;
+        const pathToTasks = new Map();
+        for (const t of tasks) {
+            for (const m of t.prompt.matchAll(fileRe)) {
+                const ids = pathToTasks.get(m[1]);
+                if (ids)
+                    ids.push(t.id);
+                else
+                    pathToTasks.set(m[1], [t.id]);
+            }
         }
-    }
-    // Warn on file overlap
-    const fileRe = /(?:^|\s)((?:[\w.-]+\/)+[\w.-]+\.\w+)/g;
-    const pathToTasks = new Map();
-    for (const t of tasks) {
-        for (const m of t.prompt.matchAll(fileRe)) {
-            const ids = pathToTasks.get(m[1]);
-            if (ids)
-                ids.push(t.id);
-            else
-                pathToTasks.set(m[1], [t.id]);
+        for (const [path, ids] of pathToTasks) {
+            if (ids.length > 1)
+                onLog(`Overlap risk: ${path} in tasks ${ids.join(", ")}`);
         }
     }
-    for (const [path, ids] of pathToTasks) {
-        if (ids.length > 1)
-            onLog(`Overlap risk: ${path} in tasks ${ids.join(", ")}`);
-    }
-    // Cap and sort (tests last)
-    if (tasks.length > 30) {
-        onLog(`Truncating ${tasks.length} → 30`);
-        tasks = tasks.slice(0, 30);
+    // Cap at budget (with generous headroom) — no arbitrary 30 limit
+    const cap = budget ? Math.ceil(budget * 1.2) : 30;
+    if (tasks.length > cap) {
+        onLog(`Truncating ${tasks.length} → ${cap}`);
+        tasks = tasks.slice(0, cap);
     }
     tasks.sort((a, b) => Number(/\btest/i.test(a.prompt)) - Number(/\btest/i.test(b.prompt)));
     // Re-index
     tasks = tasks.map((t, i) => ({ ...t, id: String(i) }));
     return tasks;
 }
-export async function planTasks(objective, cwd, model, permissionMode, budget, concurrency, onLog) {
+export async function planTasks(objective, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote) {
     onLog("Analyzing codebase...");
-    const resultText = await runPlannerQuery(plannerPrompt(objective, budget, concurrency), { cwd, model, permissionMode }, onLog);
+    const resultText = await runPlannerQuery(plannerPrompt(objective, workerModel, budget, concurrency, flexNote), { cwd, model: plannerModel, permissionMode }, onLog);
     const parsed = await extractTaskJson(resultText, async () => {
         onLog("Retrying for valid JSON...");
         let retryText = "";
         for await (const msg of query({
             prompt: `Your previous response did not contain valid JSON. Output ONLY a JSON object:\n{"tasks":[{"prompt":"..."}]}`,
-            options: { cwd, model, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
+            options: { cwd, model: plannerModel, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
         })) {
             if (msg.type === "result" && msg.subtype === "success")
                 retryText = msg.result || "";
@@ -157,16 +304,22 @@ export async function planTasks(objective, cwd, model, permissionMode, budget, c
         id: String(i),
         prompt: typeof t === "string" ? t : t.prompt,
     }));
-    tasks = postProcess(tasks, onLog);
+    tasks = postProcess(tasks, budget, onLog);
     if (tasks.length === 0)
         throw new Error("Planner generated 0 tasks");
     onLog(`${tasks.length} tasks`);
     return tasks;
 }
-export async function refinePlan(objective, previousTasks, feedback, cwd, model, permissionMode, budget, concurrency, onLog) {
+export async function refinePlan(objective, previousTasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog) {
     onLog("Refining plan...");
     const prev = previousTasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n");
-    const budgetLine = budget ? `Target ~${budget} tasks.` : "";
+    const capability = modelCapabilityBlock(workerModel);
+    const b = budget ?? 10;
+    const scaleNote = b > 50
+        ? `This is a LARGE budget (${b} sessions). Think big — missions, not micro-tasks.`
+        : b > 15
+            ? `Each of the ${b} sessions is a capable AI agent. Give substantial missions, not trivial edits.`
+            : `Target ~${b} tasks.`;
     const prompt = `You are a task coordinator. You previously planned these tasks for the objective:
 Objective: ${objective}
@@ -176,17 +329,19 @@ ${prev}
 The user wants changes: ${feedback}
-${budgetLine} ${concurrency} agents run in parallel. Update the plan accordingly. Keep tasks independent and targeting different files.
+AGENT CAPABILITY: ${capability}
+${scaleNote} ${concurrency} agents run in parallel. Update the plan accordingly. Keep tasks independent and targeting different files/areas.
 Respond with ONLY a JSON object (no markdown):
 {"tasks":[{"prompt":"..."}]}`;
-    const resultText = await runPlannerQuery(prompt, { cwd, model, permissionMode }, onLog);
+    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
     const parsed = await extractTaskJson(resultText, async () => {
         onLog("Retrying...");
         let retryText = "";
         for await (const msg of query({
             prompt: `Output ONLY a JSON object:\n{"tasks":[{"prompt":"..."}]}`,
-            options: { cwd, model, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
+            options: { cwd, model: plannerModel, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
         })) {
             if (msg.type === "result" && msg.subtype === "success")
                 retryText = msg.result || "";
@@ -197,7 +352,7 @@ Respond with ONLY a JSON object (no markdown):
         id: String(i),
         prompt: typeof t === "string" ? t : t.prompt,
     }));
-    tasks = postProcess(tasks, onLog);
+    tasks = postProcess(tasks, budget, onLog);
     if (tasks.length === 0)
         throw new Error("Refinement produced 0 tasks");
     onLog(`${tasks.length} tasks`);
@@ -219,50 +374,104 @@ function extractOutermostBraces(text) {
     }
     return null;
 }
-/** Try multiple strategies to parse task JSON, with one retry callback. */
-async function extractTaskJson(raw, retry) {
-    const attempt = (text) => {
+/** Try multiple strategies to parse JSON from LLM output. */
+function attemptJsonParse(text) {
+    try {
+        const obj = JSON.parse(text);
+        if (typeof obj === "object" && obj !== null)
+            return obj;
+    }
+    catch { }
+    const braces = extractOutermostBraces(text);
+    if (braces) {
         try {
-            const obj = JSON.parse(text);
-            if (obj?.tasks)
+            const obj = JSON.parse(braces);
+            if (typeof obj === "object" && obj !== null)
                 return obj;
         }
         catch { }
-        const braces = extractOutermostBraces(text);
-        if (braces) {
-            try {
-                const obj = JSON.parse(braces);
-                if (obj?.tasks)
-                    return obj;
-            }
-            catch { }
+    }
+    const stripped = text.replace(/```json?\s*/g, "").replace(/```/g, "").trim();
+    if (stripped !== text) {
+        try {
+            const obj = JSON.parse(stripped);
+            if (typeof obj === "object" && obj !== null)
+                return obj;
         }
-        const stripped = text.replace(/```json?\s*/g, "").replace(/```/g, "").trim();
-        if (stripped !== text) {
+        catch { }
+        const b2 = extractOutermostBraces(stripped);
+        if (b2) {
             try {
-                const obj = JSON.parse(stripped);
-                if (obj?.tasks)
-                    return obj;
+                return JSON.parse(b2);
             }
             catch { }
-            const b2 = extractOutermostBraces(stripped);
-            if (b2) {
-                try {
-                    const obj = JSON.parse(b2);
-                    if (obj?.tasks)
-                        return obj;
-                }
-                catch { }
-            }
         }
-        return null;
-    };
-    const first = attempt(raw);
-    if (first)
+    }
+    return null;
+}
+/** Extract task JSON with validation and one retry. */
+async function extractTaskJson(raw, retry) {
+    const first = attemptJsonParse(raw);
+    if (first?.tasks)
         return first;
     const retryText = await retry();
-    const second = attempt(retryText);
-    if (second)
+    const second = attemptJsonParse(retryText);
+    if (second?.tasks)
         return second;
     throw new Error("Planner did not return valid task JSON after retry");
 }
+// ── Wave steering ──
+export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog) {
+    const capability = modelCapabilityBlock(workerModel);
+    const historyText = history.map(w => {
+        const lines = w.tasks.map(t => {
+            const files = t.filesChanged ? ` (${t.filesChanged} files)` : "";
+            const err = t.error ? ` — ${t.error}` : "";
+            return `  - [${t.status}] ${t.prompt.slice(0, 120)}${files}${err}`;
+        }).join("\n");
+        return `Wave ${w.wave + 1}:\n${lines}`;
+    }).join("\n\n");
+    const prompt = `You are steering an autonomous multi-wave agent system. Read the codebase to understand current state, then decide what's next.
+Objective: ${objective}
+Work completed so far:
+${historyText}
+Remaining budget: ${remainingBudget} agent sessions. ${concurrency} agents run in parallel — tasks must touch DIFFERENT files.
+${capability}
+Read the codebase. Then decide:
+- Is the objective fully met? → {"done": true, "reasoning": "..."}
+- More work needed? Plan the next wave → {"done": false, "reasoning": "what needs doing and why", "tasks": [{"prompt": "..."}]}
+Think like a tech lead between sprints: what shipped, what's missing, what needs polish, what should be scrapped and redone, what's over-engineered. Less is more — don't add work for the sake of filling budget.
+Respond with ONLY a JSON object (no markdown fences).`;
+    onLog("Reading codebase...");
+    const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
+    const parsed = await (async () => {
+        const first = attemptJsonParse(resultText);
+        if (first)
+            return first;
+        onLog("Retrying...");
+        let retryText = "";
+        for await (const msg of query({
+            prompt: `Output ONLY a JSON object: {"done":true/false,"reasoning":"...","tasks":[{"prompt":"..."}]}`,
+            options: { cwd, model: plannerModel, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
+        })) {
+            if (msg.type === "result" && msg.subtype === "success")
+                retryText = msg.result || "";
+        }
+        return attemptJsonParse(retryText) ?? { done: true, reasoning: "Could not parse steering response" };
+    })();
+    if (parsed.done) {
+        return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete" };
+    }
+    let tasks = (parsed.tasks || []).map((t, i) => ({
+        id: String(i),
+        prompt: typeof t === "string" ? t : t.prompt,
+    }));
+    tasks = postProcess(tasks, remainingBudget, onLog);
+    return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "" };
+}

package/dist/swarm.d.ts CHANGED Viewed

@@ -10,6 +10,8 @@ export interface SwarmConfig {
     agentTimeoutMs?: number;
     maxRetries?: number;
     mergeStrategy?: MergeStrategy;
+    /** Stop dispatching new tasks when rate-limit utilization reaches this fraction (0-1). */
+    usageCap?: number;
 }
 export interface MergeResult {
     branch: string;
@@ -35,10 +37,11 @@ export declare class Swarm {
     totalOutputTokens: number;
     phase: SwarmPhase;
     aborted: boolean;
+    cappedOut: boolean;
     mergeResults: MergeResult[];
     rateLimitUtilization: number;
     rateLimitStatus: string;
-    private rateLimitResetsAt?;
+    rateLimitResetsAt?: number;
     private queue;
     private config;
     private nextId;
@@ -47,11 +50,14 @@ export declare class Swarm {
     private cleanedUp;
     logFile?: string;
     readonly model: string | undefined;
+    readonly usageCap: number | undefined;
     constructor(config: SwarmConfig);
     get active(): number;
     get pending(): number;
     run(): Promise<void>;
     abort(): void;
+    /** Monotonic counter so non-TTY consumers can detect log trimming. */
+    logSequence: number;
     log(agentId: number, text: string): void;
     private worker;
     private throttle;