npm - @workermill/agent - Versions diffs - 0.1.1 → 0.2.0 - Mend

@workermill/agent 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md CHANGED Viewed

File without changes

package/dist/api.d.ts CHANGED Viewed

File without changes

package/dist/api.js CHANGED Viewed

File without changes

package/dist/cli.d.ts CHANGED Viewed

File without changes

package/dist/cli.js CHANGED Viewed

File without changes

package/dist/commands/logs.d.ts CHANGED Viewed

File without changes

package/dist/commands/logs.js CHANGED Viewed

File without changes

package/dist/commands/pull.d.ts CHANGED Viewed

File without changes

package/dist/commands/pull.js CHANGED Viewed

File without changes

package/dist/commands/setup.d.ts CHANGED Viewed

File without changes

package/dist/commands/setup.js CHANGED Viewed

File without changes

package/dist/commands/start.d.ts CHANGED Viewed

File without changes

package/dist/commands/start.js CHANGED Viewed

File without changes

package/dist/commands/status.d.ts CHANGED Viewed

File without changes

package/dist/commands/status.js CHANGED Viewed

File without changes

package/dist/commands/stop.d.ts CHANGED Viewed

File without changes

package/dist/commands/stop.js CHANGED Viewed

File without changes

package/dist/config.d.ts CHANGED Viewed

File without changes

package/dist/config.js CHANGED Viewed

File without changes

package/dist/index.d.ts CHANGED Viewed

File without changes

package/dist/index.js CHANGED Viewed

File without changes

package/dist/plan-validator.d.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Plan Validator for Remote Agent
+ *
+ * Validates execution plans locally before posting to the cloud API.
+ * Implements the same guardrails as the server-side planning pipeline:
+ *   1. File cap: max 5 targetFiles per story (prevents scope explosion)
+ *   2. Critic validation: LLM scores the plan, rejects below threshold
+ *
+ * This ensures remote agent plans get the same quality gates as cloud plans,
+ * even though the planning prompt runs locally via Claude CLI.
+ */
+export interface PlannedStory {
+    id: string;
+    title: string;
+    description: string;
+    persona: string;
+    priority: number;
+    estimatedEffort: "small" | "medium" | "large";
+    dependencies: string[];
+    acceptanceCriteria: string[];
+    targetFiles?: string[];
+    scope?: string;
+}
+export interface ExecutionPlan {
+    summary: string;
+    stories: PlannedStory[];
+    risks: string[];
+    assumptions: string[];
+}
+export interface CriticResult {
+    approved: boolean;
+    score: number;
+    risks: string[];
+    suggestions?: string[];
+    storyFeedback?: Array<{
+        storyId: string;
+        feedback: string;
+        suggestedChanges?: string[];
+    }>;
+}
+declare const AUTO_APPROVAL_THRESHOLD = 85;
+/**
+ * Parse execution plan JSON from raw Claude CLI output.
+ * Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
+ */
+export declare function parseExecutionPlan(output: string): ExecutionPlan;
+/**
+ * Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
+ * Returns details about truncated stories for logging.
+ */
+export declare function applyFileCap(plan: ExecutionPlan): {
+    truncatedCount: number;
+    details: string[];
+};
+/**
+ * Re-serialize plan as a JSON code block for posting to the API.
+ * The server-side parseExecutionPlan() expects ```json ... ``` blocks.
+ */
+export declare function serializePlan(plan: ExecutionPlan): string;
+/**
+ * Build the critic prompt with PRD and plan substituted.
+ */
+export declare function buildCriticPrompt(prd: string, plan: ExecutionPlan): string;
+/**
+ * Parse critic JSON response from raw Claude CLI output.
+ */
+export declare function parseCriticResponse(text: string): CriticResult;
+/**
+ * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
+ * Returns the raw text output.
+ */
+export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>): Promise<string>;
+/**
+ * Format critic feedback for appending to the planner prompt on re-run.
+ */
+export declare function formatCriticFeedback(critic: CriticResult): string;
+/**
+ * Run critic validation on a parsed plan.
+ * Returns the critic result, or null if critic fails (non-blocking).
+ */
+export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string): Promise<CriticResult | null>;
+export { AUTO_APPROVAL_THRESHOLD };

package/dist/plan-validator.js ADDED Viewed

@@ -0,0 +1,268 @@
+/**
+ * Plan Validator for Remote Agent
+ *
+ * Validates execution plans locally before posting to the cloud API.
+ * Implements the same guardrails as the server-side planning pipeline:
+ *   1. File cap: max 5 targetFiles per story (prevents scope explosion)
+ *   2. Critic validation: LLM scores the plan, rejects below threshold
+ *
+ * This ensures remote agent plans get the same quality gates as cloud plans,
+ * even though the planning prompt runs locally via Claude CLI.
+ */
+import { spawn } from "child_process";
+import chalk from "chalk";
+// ============================================================================
+// CONSTANTS
+// ============================================================================
+const MAX_TARGET_FILES = 5;
+const AUTO_APPROVAL_THRESHOLD = 85;
+// ============================================================================
+// PLAN PARSING
+// ============================================================================
+/**
+ * Parse execution plan JSON from raw Claude CLI output.
+ * Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
+ */
+export function parseExecutionPlan(output) {
+    const jsonMatch = output.match(/```json\s*([\s\S]*?)\s*```/);
+    if (jsonMatch) {
+        return JSON.parse(jsonMatch[1]);
+    }
+    const rawJsonMatch = output.match(/\{[\s\S]*"stories"[\s\S]*\}/);
+    if (rawJsonMatch) {
+        return JSON.parse(rawJsonMatch[0]);
+    }
+    throw new Error("Could not find JSON execution plan in output");
+}
+// ============================================================================
+// FILE CAP
+// ============================================================================
+/**
+ * Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
+ * Returns details about truncated stories for logging.
+ */
+export function applyFileCap(plan) {
+    let truncatedCount = 0;
+    const details = [];
+    for (const story of plan.stories) {
+        if (!story.targetFiles || !Array.isArray(story.targetFiles)) {
+            story.targetFiles = [];
+        }
+        else if (story.targetFiles.length > MAX_TARGET_FILES) {
+            const dropped = story.targetFiles.slice(MAX_TARGET_FILES);
+            details.push(`${story.id}: ${story.targetFiles.length} files → ${MAX_TARGET_FILES} (dropped: ${dropped.join(", ")})`);
+            story.targetFiles = story.targetFiles.slice(0, MAX_TARGET_FILES);
+            truncatedCount++;
+        }
+    }
+    return { truncatedCount, details };
+}
+// ============================================================================
+// PLAN SERIALIZATION
+// ============================================================================
+/**
+ * Re-serialize plan as a JSON code block for posting to the API.
+ * The server-side parseExecutionPlan() expects ```json ... ``` blocks.
+ */
+export function serializePlan(plan) {
+    return "```json\n" + JSON.stringify(plan, null, 2) + "\n```";
+}
+// ============================================================================
+// CRITIC
+// ============================================================================
+/**
+ * Critic prompt — identical to server-side critic-agent.ts CRITIC_PROMPT.
+ */
+const CRITIC_PROMPT = `You are a Senior Architect reviewing an execution plan. Your job is to ensure the plan is appropriately sized for the task.
+Review this execution plan against the PRD:
+## PRD (Product Requirements Document)
+{{PRD}}
+## PROPOSED EXECUTION PLAN
+{{PLAN}}
+## Review Guidelines
+**IMPORTANT: Match plan size to task complexity**
+- Simple tasks (typos, config changes, single-file fixes) = 1 step is CORRECT
+- Medium tasks (2-4 files, small features) = 2-3 steps is appropriate
+- Complex tasks (new systems, security) = 3-5 steps is appropriate
+**Do NOT penalize:**
+- Single-step plans for genuinely simple tasks
+- Using one persona when only one skill is needed
+**DO check for:**
+1. **Missing Requirements** - Does the plan cover what the PRD asks for?
+2. **Vague Instructions** - Will the worker know what to do?
+3. **Security Issues** - Only for tasks involving auth, user data, or external input
+4. **Unrealistic Scope** - Any step targeting >3 files MUST score below 85 (auto-rejection threshold). Each step should modify at most 3 files. If a step needs more, split it into multiple steps first.
+5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
+6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
+## Scoring Guide
+- **90-100**: Plan matches task complexity, requirements covered
+- **75-89**: Minor gaps but fundamentally sound
+- **50-74**: Significant issues or wrong-sized for the task
+- **0-49**: Fundamentally flawed
+## Output Format
+Respond with ONLY a JSON object (no markdown, no explanation):
+{"approved": boolean, "score": number, "risks": ["risk1", "risk2"], "suggestions": ["suggestion1", "suggestion2"], "storyFeedback": [{"storyId": "step-0", "feedback": "specific feedback", "suggestedChanges": ["change1"]}]}
+Rules:
+- approved = true if score >= 85 AND plan is right-sized for task
+- risks = specific issues (empty array if none)
+- suggestions = actionable improvements (empty array if none)
+- storyFeedback = per-step feedback (optional, only for steps that need changes)`;
+/**
+ * Build the critic prompt with PRD and plan substituted.
+ */
+export function buildCriticPrompt(prd, plan) {
+    const planJson = JSON.stringify(plan, null, 2);
+    return CRITIC_PROMPT.replace("{{PRD}}", prd).replace("{{PLAN}}", planJson);
+}
+/**
+ * Parse critic JSON response from raw Claude CLI output.
+ */
+export function parseCriticResponse(text) {
+    let jsonText = text.trim();
+    // Handle markdown code blocks
+    if (jsonText.includes("```")) {
+        const match = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/);
+        if (match)
+            jsonText = match[1].trim();
+    }
+    // Find JSON object if preceded by reasoning text
+    const jsonStart = jsonText.indexOf("{");
+    if (jsonStart > 0) {
+        jsonText = jsonText.substring(jsonStart);
+    }
+    const result = JSON.parse(jsonText);
+    return {
+        approved: result.approved,
+        score: Math.max(0, Math.min(100, Math.round(result.score))),
+        risks: result.risks || [],
+        suggestions: result.suggestions,
+        storyFeedback: Array.isArray(result.storyFeedback)
+            ? result.storyFeedback
+            : undefined,
+    };
+}
+/**
+ * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
+ * Returns the raw text output.
+ */
+export function runCriticCli(claudePath, model, prompt, env) {
+    return new Promise((resolve, reject) => {
+        const proc = spawn(claudePath, [
+            "--print",
+            "--model",
+            model,
+            "--permission-mode",
+            "bypassPermissions",
+        ], {
+            env,
+            stdio: ["pipe", "pipe", "pipe"],
+        });
+        proc.stdin.write(prompt);
+        proc.stdin.end();
+        let stdout = "";
+        let stderr = "";
+        proc.stdout.on("data", (data) => {
+            stdout += data.toString();
+        });
+        proc.stderr.on("data", (data) => {
+            stderr += data.toString();
+        });
+        const timeout = setTimeout(() => {
+            proc.kill("SIGTERM");
+            reject(new Error("Critic CLI timed out after 3 minutes"));
+        }, 180_000);
+        proc.on("exit", (code) => {
+            clearTimeout(timeout);
+            if (code !== 0) {
+                reject(new Error(`Critic CLI failed (exit ${code}): ${stderr.substring(0, 300)}`));
+            }
+            else {
+                resolve(stdout);
+            }
+        });
+        proc.on("error", (err) => {
+            clearTimeout(timeout);
+            reject(err);
+        });
+    });
+}
+/**
+ * Format critic feedback for appending to the planner prompt on re-run.
+ */
+export function formatCriticFeedback(critic) {
+    const lines = [
+        "",
+        "## CRITIC FEEDBACK — Your previous plan was REJECTED",
+        "",
+        `Score: ${critic.score}/100 (need >= ${AUTO_APPROVAL_THRESHOLD} to pass)`,
+        "",
+    ];
+    if (critic.risks.length > 0) {
+        lines.push("### Risks Identified:");
+        for (const risk of critic.risks) {
+            lines.push(`- ${risk}`);
+        }
+        lines.push("");
+    }
+    if (critic.suggestions && critic.suggestions.length > 0) {
+        lines.push("### Required Changes:");
+        for (const suggestion of critic.suggestions) {
+            lines.push(`- ${suggestion}`);
+        }
+        lines.push("");
+    }
+    if (critic.storyFeedback && critic.storyFeedback.length > 0) {
+        lines.push("### Per-Story Feedback:");
+        for (const fb of critic.storyFeedback) {
+            lines.push(`- **${fb.storyId}**: ${fb.feedback}`);
+            if (fb.suggestedChanges) {
+                for (const change of fb.suggestedChanges) {
+                    lines.push(`  - ${change}`);
+                }
+            }
+        }
+        lines.push("");
+    }
+    lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
+    return lines.join("\n");
+}
+/** Timestamp prefix for console logs */
+function ts() {
+    return chalk.dim(new Date().toLocaleTimeString());
+}
+/**
+ * Run critic validation on a parsed plan.
+ * Returns the critic result, or null if critic fails (non-blocking).
+ */
+export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel) {
+    const criticPrompt = buildCriticPrompt(prd, plan);
+    console.log(`${ts()} ${taskLabel} ${chalk.dim("Running critic validation...")}`);
+    try {
+        const rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env);
+        const result = parseCriticResponse(rawCriticOutput);
+        const statusIcon = result.score >= AUTO_APPROVAL_THRESHOLD
+            ? chalk.green("✓")
+            : chalk.red("✗");
+        console.log(`${ts()} ${taskLabel} ${statusIcon} Critic score: ${result.score}/100 (threshold: ${AUTO_APPROVAL_THRESHOLD})`);
+        return result;
+    }
+    catch (error) {
+        const errMsg = error instanceof Error ? error.message : String(error);
+        console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Critic failed: ${errMsg.substring(0, 100)}`);
+        return null;
+    }
+}
+export { AUTO_APPROVAL_THRESHOLD };

package/dist/planner.d.ts CHANGED Viewed

@@ -3,7 +3,13 @@
  *
  * Fetches the planning prompt from the cloud API, runs it through
  * Claude CLI locally (using the customer's Claude Max subscription),
- * and posts the raw output back for server-side validation.
+ * validates with a Planner-Critic loop, and posts the approved plan
+ * back for server-side processing.
+ *
+ * Guardrails (matching server-side planning pipeline):
+ *   1. File cap: max 5 targetFiles per story (prevents scope explosion)
+ *   2. Critic validation: LLM scores the plan, rejects below 85/100
+ *   3. Max 3 Planner-Critic iterations before failure
  *
  * Logs are streamed to the cloud dashboard in real-time so the user
  * sees the same planning progress as cloud mode.
@@ -12,8 +18,18 @@ import { type AgentConfig } from "./config.js";
 export interface PlanningTask {
     id: string;
     summary: string;
+    description: string | null;
 }
 /**
- * Run planning for a task: fetch prompt, execute Claude CLI, post result.
+ * Run planning for a task with Planner-Critic validation loop.
+ *
+ * Flow:
+ *   1. Fetch planning prompt from cloud API
+ *   2. Run Claude CLI to generate plan
+ *   3. Parse plan, apply file cap (max 5 files per story)
+ *   4. Run critic validation via Claude CLI
+ *   5. If critic approves (score >= 85): post validated plan to API
+ *   6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
+ *   7. After MAX_ITERATIONS without approval: fail the task
  */
 export declare function planTask(task: PlanningTask, config: AgentConfig): Promise<boolean>;

package/dist/planner.js CHANGED Viewed

@@ -3,7 +3,13 @@
  *
  * Fetches the planning prompt from the cloud API, runs it through
  * Claude CLI locally (using the customer's Claude Max subscription),
- * and posts the raw output back for server-side validation.
+ * validates with a Planner-Critic loop, and posts the approved plan
+ * back for server-side processing.
+ *
+ * Guardrails (matching server-side planning pipeline):
+ *   1. File cap: max 5 targetFiles per story (prevents scope explosion)
+ *   2. Critic validation: LLM scores the plan, rejects below 85/100
+ *   3. Max 3 Planner-Critic iterations before failure
  *
  * Logs are streamed to the cloud dashboard in real-time so the user
  * sees the same planning progress as cloud mode.
@@ -12,6 +18,9 @@ import chalk from "chalk";
 import { spawn } from "child_process";
 import { findClaudePath } from "./config.js";
 import { api } from "./api.js";
+import { parseExecutionPlan, applyFileCap, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
+/** Max Planner-Critic iterations before giving up */
+const MAX_ITERATIONS = 3;
 /** Timestamp prefix */
 function ts() {
     return chalk.dim(new Date().toLocaleTimeString());
@@ -51,14 +60,22 @@ async function postProgress(taskId, phase, elapsedSeconds, detail, charsGenerate
         // Fire and forget
     }
 }
+/** Consistent prefix matching local workermill dashboard format */
+const PREFIX = "[🗺️ planning_agent 🤖]";
+/** Format elapsed seconds as human-readable string (e.g. "28s", "1m 25s") */
+function formatElapsed(seconds) {
+    const mins = Math.floor(seconds / 60);
+    const secs = seconds % 60;
+    return mins > 0 ? `${mins}m ${secs}s` : `${secs}s`;
+}
 function phaseLabel(phase, elapsed) {
     switch (phase) {
-        case "initializing": return "Starting planning agent...";
-        case "reading_repo": return "Reading repository structure...";
-        case "analyzing": return "Analyzing requirements...";
-        case "generating_plan": return `Generating execution plan... (${elapsed}s)`;
-        case "validating": return "Validating plan...";
-        case "complete": return "Planning complete";
+        case "initializing": return `${PREFIX} Starting planning agent...`;
+        case "reading_repo": return `${PREFIX} Reading repository structure...`;
+        case "analyzing": return `${PREFIX} Analyzing requirements...`;
+        case "generating_plan": return `${PREFIX} Planning in progress — analyzing requirements and decomposing into steps (${formatElapsed(elapsed)} elapsed)`;
+        case "validating": return `${PREFIX} Validating plan...`;
+        case "complete": return `${PREFIX} Planning complete`;
     }
 }
 /**
@@ -119,7 +136,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
             // Periodic progress during generation
             if (currentPhase === "generating_plan" && elapsed - lastProgressLogAt >= 30) {
                 lastProgressLogAt = elapsed;
-                const msg = `Generating execution plan... (${elapsed}s, ${charsReceived} chars, ${toolCallCount} tool calls)`;
+                const msg = `${PREFIX} Planning in progress — analyzing requirements and decomposing into steps (${formatElapsed(elapsed)} elapsed)`;
                 postLog(taskId, msg);
                 console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
             }
@@ -212,57 +229,186 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
     });
 }
 /**
- * Run planning for a task: fetch prompt, execute Claude CLI, post result.
+ * Run planning for a task with Planner-Critic validation loop.
+ *
+ * Flow:
+ *   1. Fetch planning prompt from cloud API
+ *   2. Run Claude CLI to generate plan
+ *   3. Parse plan, apply file cap (max 5 files per story)
+ *   4. Run critic validation via Claude CLI
+ *   5. If critic approves (score >= 85): post validated plan to API
+ *   6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
+ *   7. After MAX_ITERATIONS without approval: fail the task
  */
 export async function planTask(task, config) {
     const taskLabel = chalk.cyan(task.id.slice(0, 8));
     console.log(`${ts()} ${taskLabel} Fetching planning prompt...`);
-    await postLog(task.id, "Fetching planning prompt from cloud API...");
+    await postLog(task.id, `${PREFIX} Fetching planning prompt from cloud API...`);
     // 1. Fetch the assembled planning prompt from the cloud API
     const promptResponse = await api.get("/api/agent/planning-prompt", {
         params: { taskId: task.id },
     });
-    const { prompt, model } = promptResponse.data;
+    const { prompt: basePrompt, model } = promptResponse.data;
     const cliModel = model || "sonnet";
-    console.log(`${ts()} ${taskLabel} Running Claude CLI ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
-    await postLog(task.id, `Starting planning agent (model: ${cliModel})...`);
-    // 2. Run Claude CLI asynchronously with progress logging
     const claudePath = process.env.CLAUDE_CLI_PATH || findClaudePath() || "claude";
     const cleanEnv = { ...process.env };
     delete cleanEnv.CLAUDE_CODE_OAUTH_TOKEN;
     const startTime = Date.now();
-    let rawOutput;
+    // PRD for critic validation: use task description, fall back to summary
+    const prd = task.description || task.summary;
+    // 2. Planner-Critic iteration loop
+    let currentPrompt = basePrompt;
+    let bestPlan = null;
+    let bestScore = 0;
+    for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
+        const iterLabel = MAX_ITERATIONS > 1 ? ` (attempt ${iteration}/${MAX_ITERATIONS})` : "";
+        if (iteration > 1) {
+            console.log(`${ts()} ${taskLabel} Running Claude CLI${iterLabel} ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
+            await postLog(task.id, `${PREFIX} Re-planning${iterLabel} using anthropic/${cliModel}`);
+        }
+        else {
+            console.log(`${ts()} ${taskLabel} Running Claude CLI ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
+            await postLog(task.id, `${PREFIX} Starting planning agent using anthropic/${cliModel}`);
+        }
+        // 2a. Run Claude CLI to generate plan
+        let rawOutput;
+        try {
+            rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime);
+        }
+        catch (error) {
+            const elapsed = Math.round((Date.now() - startTime) / 1000);
+            const errMsg = error instanceof Error ? error.message : String(error);
+            console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
+            await postLog(task.id, `${PREFIX} Planning failed after ${formatElapsed(elapsed)}: ${errMsg.substring(0, 200)}`, "error", "error");
+            return false;
+        }
+        const elapsed = Math.round((Date.now() - startTime) / 1000);
+        console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Claude CLI done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
+        // 2b. Parse plan from raw output
+        let plan;
+        try {
+            plan = parseExecutionPlan(rawOutput);
+        }
+        catch (error) {
+            const errMsg = error instanceof Error ? error.message : String(error);
+            console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Plan parse failed: ${errMsg.substring(0, 100)}`);
+            await postLog(task.id, `${PREFIX} Failed to parse execution plan from Claude output: ${errMsg.substring(0, 200)}`, "error", "error");
+            // If we can't parse the plan, post raw output and let server-side try
+            return await postRawPlan(task.id, rawOutput, config.agentId, taskLabel, elapsed);
+        }
+        // 2c. Apply file cap (max 5 files per story)
+        const { truncatedCount, details } = applyFileCap(plan);
+        if (truncatedCount > 0) {
+            const msg = `${PREFIX} File cap applied: ${truncatedCount} stories truncated to max 5 targetFiles`;
+            console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
+            await postLog(task.id, msg);
+            for (const detail of details) {
+                console.log(`${ts()} ${taskLabel}   ${chalk.dim(detail)}`);
+            }
+        }
+        console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories`);
+        await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
+        // 2d. Run critic validation
+        const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel);
+        // Track best plan across iterations
+        if (criticResult && criticResult.score > bestScore) {
+            bestPlan = plan;
+            bestScore = criticResult.score;
+        }
+        else if (!criticResult && !bestPlan) {
+            // Critic failed entirely — use this plan as fallback
+            bestPlan = plan;
+        }
+        // 2e. Check critic result
+        if (!criticResult) {
+            // Critic failed (timeout, parse error, etc.) — post plan without critic gate
+            const msg = `${PREFIX} Critic validation failed — posting plan without critic score`;
+            console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
+            await postLog(task.id, msg);
+            return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed);
+        }
+        if (criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD) {
+            // Approved! Post the file-capped plan
+            const msg = `${PREFIX} Critic approved (score: ${criticResult.score}/100)`;
+            await postLog(task.id, msg);
+            return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed);
+        }
+        // 2f. Rejected — append critic feedback for next iteration
+        if (iteration < MAX_ITERATIONS) {
+            const feedback = formatCriticFeedback(criticResult);
+            currentPrompt = basePrompt + "\n\n" + feedback;
+            const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
+            console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
+            await postLog(task.id, msg);
+            if (criticResult.risks.length > 0) {
+                await postLog(task.id, `${PREFIX} Critic risks: ${criticResult.risks.join("; ")}`);
+            }
+        }
+        else {
+            // Final iteration — rejected
+            const msg = `${PREFIX} Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD})`;
+            console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} ${msg}`);
+            await postLog(task.id, msg, "error", "error");
+            if (criticResult.risks.length > 0) {
+                await postLog(task.id, `${PREFIX} Final risks: ${criticResult.risks.join("; ")}`, "error", "error");
+            }
+            if (criticResult.suggestions && criticResult.suggestions.length > 0) {
+                await postLog(task.id, `${PREFIX} Suggestions: ${criticResult.suggestions.join("; ")}`, "error", "error");
+            }
+        }
+    }
+    // All iterations exhausted — fail
+    return false;
+}
+/**
+ * Post a validated (file-capped) plan to the cloud API.
+ * Re-serializes the plan as a JSON code block since the server-side
+ * parseExecutionPlan() expects that format.
+ */
+async function postValidatedPlan(taskId, plan, agentId, taskLabel, elapsed) {
+    const serialized = serializePlan(plan);
     try {
-        rawOutput = await runClaudeCli(claudePath, cliModel, prompt, cleanEnv, task.id, startTime);
+        const result = await api.post("/api/agent/plan-result", {
+            taskId,
+            rawOutput: serialized,
+            agentId,
+        });
+        const storyCount = result.data.storyCount;
+        console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated: ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
+        await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
+        await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
+        return true;
     }
     catch (error) {
-        const elapsed = Math.round((Date.now() - startTime) / 1000);
-        const errMsg = error instanceof Error ? error.message : String(error);
-        console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
-        await postLog(task.id, `Planning agent failed after ${elapsed}s: ${errMsg.substring(0, 200)}`, "error", "error");
+        const err = error;
+        const detail = err.response?.data?.detail || String(error);
+        console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Server validation failed: ${detail.substring(0, 100)}`);
+        await postLog(taskId, `${PREFIX} Server-side plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
         return false;
     }
-    const elapsed = Math.round((Date.now() - startTime) / 1000);
-    console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Claude CLI done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
-    await postLog(task.id, `Planning complete (${elapsed}s, ${rawOutput.length} chars). Validating plan...`);
-    // 3. Post raw output back to cloud API for validation
+}
+/**
+ * Post raw (unparsed) plan output to the cloud API as a fallback.
+ * Used when local plan parsing fails — let the server try.
+ */
+async function postRawPlan(taskId, rawOutput, agentId, taskLabel, elapsed) {
     try {
         const result = await api.post("/api/agent/plan-result", {
-            taskId: task.id,
+            taskId,
             rawOutput,
-            agentId: config.agentId,
+            agentId,
         });
         const storyCount = result.data.storyCount;
-        console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated: ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
-        await postLog(task.id, `Plan validated: ${storyCount} stories. Task queued for execution.`);
-        await postProgress(task.id, "complete", elapsed, "Planning complete", 0, 0);
+        console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated (server-side): ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
+        await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
+        await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
         return true;
     }
     catch (error) {
         const err = error;
         const detail = err.response?.data?.detail || String(error);
         console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Validation failed: ${detail.substring(0, 100)}`);
-        await postLog(task.id, `Plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
+        await postLog(taskId, `${PREFIX} Plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
         return false;
     }
 }

package/dist/poller.d.ts CHANGED Viewed

File without changes

package/dist/poller.js CHANGED Viewed

File without changes

package/dist/spawner.d.ts CHANGED Viewed

File without changes

package/dist/spawner.js CHANGED Viewed

File without changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@workermill/agent",
-  "version": "0.1.1",
+  "version": "0.2.0",
   "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
   "type": "module",
   "main": "./dist/index.js",