npm - @workermill/agent - Versions diffs - 0.8.5 → 0.8.9 - Mend

@workermill/agent 0.8.5 → 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/dist/cli.js +31 -64
package/dist/index.js +23 -123
package/package.json +3 -2
package/dist/ai-sdk-generate.d.ts +0 -33
package/dist/ai-sdk-generate.js +0 -160
package/dist/api.d.ts +0 -13
package/dist/api.js +0 -29
package/dist/cli.d.ts +0 -8
package/dist/commands/logs.d.ts +0 -9
package/dist/commands/logs.js +0 -52
package/dist/commands/pull.d.ts +0 -4
package/dist/commands/pull.js +0 -35
package/dist/commands/setup.d.ts +0 -11
package/dist/commands/setup.js +0 -396
package/dist/commands/start.d.ts +0 -11
package/dist/commands/start.js +0 -152
package/dist/commands/status.d.ts +0 -6
package/dist/commands/status.js +0 -86
package/dist/commands/stop.d.ts +0 -6
package/dist/commands/stop.js +0 -61
package/dist/commands/update.d.ts +0 -1
package/dist/commands/update.js +0 -20
package/dist/config.d.ts +0 -77
package/dist/config.js +0 -284
package/dist/index.d.ts +0 -14
package/dist/plan-validator.d.ts +0 -104
package/dist/plan-validator.js +0 -436
package/dist/planner.d.ts +0 -40
package/dist/planner.js +0 -792
package/dist/poller.d.ts +0 -20
package/dist/poller.js +0 -346
package/dist/providers.d.ts +0 -18
package/dist/providers.js +0 -118
package/dist/spawner.d.ts +0 -110
package/dist/spawner.js +0 -523
package/dist/updater.d.ts +0 -8
package/dist/updater.js +0 -40
package/dist/version.d.ts +0 -1
package/dist/version.js +0 -4

package/dist/plan-validator.js DELETED Viewed

@@ -1,436 +0,0 @@
-/**
- * Plan Validator for Remote Agent
- *
- * Validates execution plans locally before posting to the cloud API.
- * Implements the same guardrails as the server-side planning pipeline:
- *   1. File cap: max 5 targetFiles per story (prevents scope explosion)
- *   2. Critic validation: LLM scores the plan, rejects below threshold
- *
- * This ensures remote agent plans get the same quality gates as cloud plans,
- * even though the planning prompt runs locally via Claude CLI.
- */
-import { spawn } from "child_process";
-import chalk from "chalk";
-import { generateText } from "./providers.js";
-import { api } from "./api.js";
-// ============================================================================
-// CONSTANTS
-// ============================================================================
-const MAX_TARGET_FILES = 15;
-const AUTO_APPROVAL_THRESHOLD = 85;
-// ============================================================================
-// PLAN PARSING
-// ============================================================================
-/**
- * Parse execution plan JSON from raw Claude CLI output.
- * Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
- */
-export function parseExecutionPlan(output) {
-    // Strategy 1: Find ```json ... ``` block using bracket-matching instead of regex.
-    // The lazy regex ([\s\S]*?) fails when JSON string values contain ``` (e.g., code
-    // blocks in story descriptions from PRDs with CI/CD YAML examples).
-    const jsonFenceStart = output.indexOf("```json");
-    if (jsonFenceStart !== -1) {
-        // Find the opening { after ```json
-        const searchFrom = jsonFenceStart + 7; // length of "```json"
-        const braceStart = output.indexOf("{", searchFrom);
-        if (braceStart !== -1) {
-            const extracted = extractBalancedJson(output, braceStart);
-            if (extracted) {
-                return JSON.parse(extracted);
-            }
-        }
-    }
-    // Strategy 2: Find raw JSON with "stories" key using bracket-matching
-    const storiesIdx = output.indexOf('"stories"');
-    if (storiesIdx !== -1) {
-        // Walk backwards to find the opening {
-        const before = output.substring(0, storiesIdx);
-        const braceStart = before.lastIndexOf("{");
-        if (braceStart !== -1) {
-            const extracted = extractBalancedJson(output, braceStart);
-            if (extracted) {
-                return JSON.parse(extracted);
-            }
-        }
-    }
-    throw new Error("Could not find JSON execution plan in output");
-}
-/**
- * Extract a balanced JSON object from a string starting at the given position.
- * Properly handles nested braces, strings with escaped characters, and code
- * blocks embedded in JSON string values (which contain triple backticks).
- */
-function extractBalancedJson(text, start) {
-    let depth = 0;
-    let inString = false;
-    let escape = false;
-    for (let i = start; i < text.length; i++) {
-        const ch = text[i];
-        if (escape) {
-            escape = false;
-            continue;
-        }
-        if (ch === "\\") {
-            if (inString)
-                escape = true;
-            continue;
-        }
-        if (ch === '"') {
-            inString = !inString;
-            continue;
-        }
-        if (inString)
-            continue;
-        if (ch === "{")
-            depth++;
-        else if (ch === "}") {
-            depth--;
-            if (depth === 0) {
-                return text.substring(start, i + 1);
-            }
-        }
-    }
-    return null; // Unbalanced
-}
-// ============================================================================
-// FILE CAP
-// ============================================================================
-/**
- * Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
- * Returns details about truncated stories for logging.
- */
-export function applyFileCap(plan) {
-    let truncatedCount = 0;
-    const details = [];
-    for (const story of plan.stories) {
-        if (!story.targetFiles || !Array.isArray(story.targetFiles)) {
-            story.targetFiles = [];
-        }
-        else if (story.targetFiles.length > MAX_TARGET_FILES) {
-            const dropped = story.targetFiles.slice(MAX_TARGET_FILES);
-            details.push(`${story.id}: ${story.targetFiles.length} files → ${MAX_TARGET_FILES} (dropped: ${dropped.join(", ")})`);
-            story.targetFiles = story.targetFiles.slice(0, MAX_TARGET_FILES);
-            truncatedCount++;
-        }
-    }
-    return { truncatedCount, details };
-}
-// ============================================================================
-// STORY CAP
-// ============================================================================
-/**
- * Apply story cap to the plan. Truncates stories beyond maxStories.
- * Returns details about dropped stories for logging.
- */
-export function applyStoryCap(plan, maxStories) {
-    if (plan.stories.length <= maxStories) {
-        return { droppedCount: 0, details: [] };
-    }
-    const droppedCount = plan.stories.length - maxStories;
-    const dropped = plan.stories.slice(maxStories);
-    const details = dropped.map((s) => `${s.id}: "${s.title}" (${s.persona})`);
-    plan.stories = plan.stories.slice(0, maxStories);
-    // Fix dependencies that reference dropped stories
-    const validIds = new Set(plan.stories.map((s) => s.id));
-    for (const story of plan.stories) {
-        story.dependencies = story.dependencies.filter((dep) => validIds.has(dep));
-    }
-    return { droppedCount, details };
-}
-// ============================================================================
-// FILE OVERLAP VALIDATION
-// ============================================================================
-/**
- * Resolve file overlaps by assigning each shared file to exactly one story.
- * When multiple stories list the same targetFile, the first story keeps it
- * and it's removed from subsequent stories. This prevents parallel merge
- * conflicts during consolidation — same auto-fix pattern as applyFileCap.
- *
- * Returns details about resolved overlaps for logging.
- */
-export function resolveFileOverlaps(plan) {
-    const fileOwner = new Map(); // file → first story that claims it
-    let resolvedCount = 0;
-    const details = [];
-    for (const story of plan.stories) {
-        if (!story.targetFiles || story.targetFiles.length === 0)
-            continue;
-        const kept = [];
-        const removed = [];
-        for (const file of story.targetFiles) {
-            const owner = fileOwner.get(file);
-            if (owner) {
-                // File already claimed by an earlier story — remove from this one
-                removed.push(file);
-            }
-            else {
-                fileOwner.set(file, story.id);
-                kept.push(file);
-            }
-        }
-        if (removed.length > 0) {
-            story.targetFiles = kept;
-            resolvedCount += removed.length;
-            details.push(`${story.id}: removed ${removed.join(", ")} (owned by ${removed.map((f) => fileOwner.get(f)).join(", ")})`);
-        }
-    }
-    return { resolvedCount, details };
-}
-// ============================================================================
-// PLAN SERIALIZATION
-// ============================================================================
-/**
- * Re-serialize plan as a JSON code block for posting to the API.
- * The server-side parseExecutionPlan() expects ```json ... ``` blocks.
- */
-export function serializePlan(plan) {
-    return "```json\n" + JSON.stringify(plan, null, 2) + "\n```";
-}
-// ============================================================================
-// CRITIC
-// ============================================================================
-/**
- * Critic prompt — identical to server-side critic-agent.ts CRITIC_PROMPT.
- */
-const CRITIC_PROMPT = `You are a Senior Architect reviewing an execution plan. Your job is to ensure the plan is appropriately sized for the task.
-Review this execution plan against the PRD:
-## PRD (Product Requirements Document)
-{{PRD}}
-## PROPOSED EXECUTION PLAN
-{{PLAN}}
-## Review Guidelines
-**IMPORTANT: Match plan size to task complexity**
-- Simple tasks (typos, config changes, single-file fixes) = 1 step is CORRECT
-- Medium tasks (2-4 files, small features) = 2-3 steps is appropriate
-- Complex tasks (new systems, security) = 3-5 steps is appropriate
-**Do NOT penalize:**
-- Single-step plans for genuinely simple tasks
-- Using one persona when only one skill is needed
-**DO check for:**
-1. **Missing Requirements** - Does the plan cover what the PRD asks for?
-2. **Scope Clarity** - Is each story's description a brief file scope label (1 line)? Stories should NOT rewrite ticket requirements.
-3. **Security Issues** - Only for tasks involving auth, user data, or external input
-4. **Unrealistic Scope** - Any step targeting >5 files MUST score below 85 (auto-rejection threshold). Each step should modify at most 5 files. If a step needs more, split it into multiple steps first.
-5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
-6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
-7. **Serialization Bottleneck** - If more than half the stories depend on a single story that targets >5 files, the plan has a bottleneck. Deduct 15 points — split the foundation or allow more parallel work.
-8. **Requirement Rewriting** - If any story description contains implementation details, acceptance criteria, or rewritten requirements from the PRD, deduct 15 points per offending story. Story descriptions must be ONE-LINE file scope labels (e.g., "Database layer — migrations and entity definitions"). The original ticket is the spec.
-## Scoring Guide
-- **90-100**: Plan matches task complexity, requirements covered
-- **75-89**: Minor gaps but fundamentally sound
-- **50-74**: Significant issues or wrong-sized for the task
-- **0-49**: Fundamentally flawed
-## Output Format
-Respond with ONLY a JSON object (no markdown, no explanation):
-{"approved": boolean, "score": number, "risks": ["risk1", "risk2"], "suggestions": ["suggestion1", "suggestion2"], "storyFeedback": [{"storyId": "step-0", "feedback": "specific feedback", "suggestedChanges": ["change1"]}]}
-Rules:
-- approved = true if score >= 85 AND plan is right-sized for task
-- risks = specific issues (empty array if none)
-- suggestions = actionable improvements (empty array if none)
-- storyFeedback = per-step feedback (optional, only for steps that need changes)`;
-/**
- * Build the critic prompt with PRD and plan substituted.
- */
-export function buildCriticPrompt(prd, plan) {
-    const planJson = JSON.stringify(plan, null, 2);
-    return CRITIC_PROMPT.replace("{{PRD}}", prd).replace("{{PLAN}}", planJson);
-}
-/**
- * Parse critic JSON response from raw Claude CLI output.
- */
-export function parseCriticResponse(text) {
-    let jsonText = text.trim();
-    // Handle markdown code blocks
-    if (jsonText.includes("```")) {
-        const match = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/);
-        if (match)
-            jsonText = match[1].trim();
-    }
-    // Find JSON object if preceded by reasoning text
-    const jsonStart = jsonText.indexOf("{");
-    if (jsonStart > 0) {
-        jsonText = jsonText.substring(jsonStart);
-    }
-    const result = JSON.parse(jsonText);
-    return {
-        approved: result.approved,
-        score: Math.max(0, Math.min(100, Math.round(result.score))),
-        risks: result.risks || [],
-        suggestions: result.suggestions,
-        storyFeedback: Array.isArray(result.storyFeedback)
-            ? result.storyFeedback
-            : undefined,
-    };
-}
-/**
- * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
- * Returns the raw text output.
- */
-export function runCriticCli(claudePath, model, prompt, env, taskId) {
-    return new Promise((resolve, reject) => {
-        const proc = spawn(claudePath, [
-            "--print",
-            "--model",
-            model,
-            "--permission-mode",
-            "bypassPermissions",
-        ], {
-            env,
-            stdio: ["pipe", "pipe", "pipe"],
-        });
-        proc.stdin.write(prompt);
-        proc.stdin.end();
-        let stdout = "";
-        let stderr = "";
-        proc.stdout.on("data", (data) => {
-            const chunk = data.toString();
-            stdout += chunk;
-            // Stream critic reasoning to dashboard in real-time
-            const lines = chunk.split("\n").filter((l) => l.trim());
-            for (const line of lines) {
-                const trimmed = line.trim().length > 200
-                    ? line.trim().substring(0, 200) + "…"
-                    : line.trim();
-                if (trimmed) {
-                    if (taskId) {
-                        postLog(taskId, `${PREFIX} [critic] ${trimmed}`, "output");
-                    }
-                    console.log(`${ts()} ${chalk.dim("🔍")} ${chalk.dim(trimmed)}`);
-                }
-            }
-        });
-        proc.stderr.on("data", (data) => {
-            stderr += data.toString();
-        });
-        const timeout = setTimeout(() => {
-            proc.kill("SIGTERM");
-            reject(new Error("Critic CLI timed out after 20 minutes"));
-        }, 1_200_000);
-        proc.on("exit", (code) => {
-            clearTimeout(timeout);
-            if (code !== 0) {
-                reject(new Error(`Critic CLI failed (exit ${code}): ${stderr.substring(0, 300)}`));
-            }
-            else {
-                resolve(stdout);
-            }
-        });
-        proc.on("error", (err) => {
-            clearTimeout(timeout);
-            reject(err);
-        });
-    });
-}
-/**
- * Format critic feedback for appending to the planner prompt on re-run.
- */
-export function formatCriticFeedback(critic) {
-    const lines = [
-        "",
-        "## CRITIC FEEDBACK — Your previous plan was REJECTED",
-        "",
-        `Score: ${critic.score}/100 (need >= ${AUTO_APPROVAL_THRESHOLD} to pass)`,
-        "",
-    ];
-    if (critic.risks.length > 0) {
-        lines.push("### Risks Identified:");
-        for (const risk of critic.risks) {
-            lines.push(`- ${risk}`);
-        }
-        lines.push("");
-    }
-    if (critic.suggestions && critic.suggestions.length > 0) {
-        lines.push("### Required Changes:");
-        for (const suggestion of critic.suggestions) {
-            lines.push(`- ${suggestion}`);
-        }
-        lines.push("");
-    }
-    if (critic.storyFeedback && critic.storyFeedback.length > 0) {
-        lines.push("### Per-Story Feedback:");
-        for (const fb of critic.storyFeedback) {
-            lines.push(`- **${fb.storyId}**: ${fb.feedback}`);
-            if (fb.suggestedChanges) {
-                for (const change of fb.suggestedChanges) {
-                    lines.push(`  - ${change}`);
-                }
-            }
-        }
-        lines.push("");
-    }
-    lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
-    return lines.join("\n");
-}
-/** Consistent prefix matching planner dashboard format */
-const PREFIX = "[🗺️ planning_agent 🤖]";
-/** Timestamp prefix for console logs */
-function ts() {
-    return chalk.dim(new Date().toLocaleTimeString());
-}
-/**
- * Post a log message to the cloud dashboard for real-time visibility.
- */
-async function postLog(taskId, message, type = "system", severity = "info") {
-    try {
-        await api.post("/api/control-center/logs", {
-            taskId,
-            type,
-            message,
-            severity,
-        });
-    }
-    catch {
-        // Fire and forget — don't block critic on log failures
-    }
-}
-/**
- * Run critic validation on a parsed plan.
- * Routes to Claude CLI (Anthropic) or HTTP API (other providers).
- * Returns the critic result, or null if critic fails (non-blocking).
- */
-export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey, taskId) {
-    const criticPrompt = buildCriticPrompt(prd, plan);
-    const effectiveProvider = provider || "anthropic";
-    console.log(`${ts()} ${taskLabel} ${chalk.dim(`Running critic validation (${effectiveProvider})...`)}`);
-    if (taskId) {
-        postLog(taskId, `${PREFIX} Running critic validation (${effectiveProvider})...`);
-    }
-    try {
-        let rawCriticOutput;
-        if (effectiveProvider === "anthropic") {
-            rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env, taskId);
-        }
-        else {
-            if (!providerApiKey) {
-                throw new Error(`No API key for critic provider "${effectiveProvider}"`);
-            }
-            rawCriticOutput = await generateText(effectiveProvider, model, criticPrompt, providerApiKey, { maxTokens: 4096, temperature: 0.3, timeoutMs: 1_200_000 });
-        }
-        const result = parseCriticResponse(rawCriticOutput);
-        const statusIcon = result.score >= AUTO_APPROVAL_THRESHOLD
-            ? chalk.green("✓")
-            : chalk.red("✗");
-        console.log(`${ts()} ${taskLabel} ${statusIcon} Critic score: ${result.score}/100 (threshold: ${AUTO_APPROVAL_THRESHOLD})`);
-        return result;
-    }
-    catch (error) {
-        const errMsg = error instanceof Error ? error.message : String(error);
-        console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Critic failed: ${errMsg.substring(0, 100)}`);
-        return null;
-    }
-}
-export { AUTO_APPROVAL_THRESHOLD };

package/dist/planner.d.ts DELETED Viewed

@@ -1,40 +0,0 @@
-/**
- * Remote Agent Planner
- *
- * Fetches the planning prompt from the cloud API, runs it through
- * Claude CLI locally (using the customer's Claude Max subscription),
- * validates with a Planner-Critic loop, and posts the approved plan
- * back for server-side processing.
- *
- * Guardrails (matching server-side planning pipeline):
- *   1. File cap: max 5 targetFiles per story (prevents scope explosion)
- *   2. Critic validation: LLM scores the plan, rejects below 85/100
- *   3. Max 3 Planner-Critic iterations before failure
- *
- * Logs are streamed to the cloud dashboard in real-time so the user
- * sees the same planning progress as cloud mode.
- */
-import { type AgentConfig } from "./config.js";
-import type { ClaimCredentials } from "./spawner.js";
-export interface PlanningTask {
-    id: string;
-    summary: string;
-    description: string | null;
-    githubRepo?: string;
-    scmProvider?: string;
-}
-/**
- * Run planning for a task with Planner-Critic validation loop.
- *
- * Flow:
- *   1. Fetch planning prompt from cloud API
- *   2. Clone target repo (if available) so planner can explore with tools
- *   3. Run Claude CLI to generate plan
- *   4. Parse plan, apply file cap (max 5 files per story)
- *   5. Run critic validation via Claude CLI
- *   6. If critic approves (score >= 80): post validated plan to API
- *   7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
- *   8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
- *   9. If no plan scored >= 50: fail the task
- */
-export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;