@workermill/agent 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
File without changes
package/dist/api.d.ts CHANGED
File without changes
package/dist/api.js CHANGED
File without changes
package/dist/cli.d.ts CHANGED
File without changes
package/dist/cli.js CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
package/dist/config.d.ts CHANGED
File without changes
package/dist/config.js CHANGED
File without changes
package/dist/index.d.ts CHANGED
File without changes
package/dist/index.js CHANGED
File without changes
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Plan Validator for Remote Agent
3
+ *
4
+ * Validates execution plans locally before posting to the cloud API.
5
+ * Implements the same guardrails as the server-side planning pipeline:
6
+ * 1. File cap: max 5 targetFiles per story (prevents scope explosion)
7
+ * 2. Critic validation: LLM scores the plan, rejects below threshold
8
+ *
9
+ * This ensures remote agent plans get the same quality gates as cloud plans,
10
+ * even though the planning prompt runs locally via Claude CLI.
11
+ */
12
+ export interface PlannedStory {
13
+ id: string;
14
+ title: string;
15
+ description: string;
16
+ persona: string;
17
+ priority: number;
18
+ estimatedEffort: "small" | "medium" | "large";
19
+ dependencies: string[];
20
+ acceptanceCriteria: string[];
21
+ targetFiles?: string[];
22
+ scope?: string;
23
+ }
24
+ export interface ExecutionPlan {
25
+ summary: string;
26
+ stories: PlannedStory[];
27
+ risks: string[];
28
+ assumptions: string[];
29
+ }
30
+ export interface CriticResult {
31
+ approved: boolean;
32
+ score: number;
33
+ risks: string[];
34
+ suggestions?: string[];
35
+ storyFeedback?: Array<{
36
+ storyId: string;
37
+ feedback: string;
38
+ suggestedChanges?: string[];
39
+ }>;
40
+ }
41
+ declare const AUTO_APPROVAL_THRESHOLD = 85;
42
+ /**
43
+ * Parse execution plan JSON from raw Claude CLI output.
44
+ * Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
45
+ */
46
+ export declare function parseExecutionPlan(output: string): ExecutionPlan;
47
+ /**
48
+ * Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
49
+ * Returns details about truncated stories for logging.
50
+ */
51
+ export declare function applyFileCap(plan: ExecutionPlan): {
52
+ truncatedCount: number;
53
+ details: string[];
54
+ };
55
+ /**
56
+ * Re-serialize plan as a JSON code block for posting to the API.
57
+ * The server-side parseExecutionPlan() expects ```json ... ``` blocks.
58
+ */
59
+ export declare function serializePlan(plan: ExecutionPlan): string;
60
+ /**
61
+ * Build the critic prompt with PRD and plan substituted.
62
+ */
63
+ export declare function buildCriticPrompt(prd: string, plan: ExecutionPlan): string;
64
+ /**
65
+ * Parse critic JSON response from raw Claude CLI output.
66
+ */
67
+ export declare function parseCriticResponse(text: string): CriticResult;
68
+ /**
69
+ * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
70
+ * Returns the raw text output.
71
+ */
72
+ export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>): Promise<string>;
73
+ /**
74
+ * Format critic feedback for appending to the planner prompt on re-run.
75
+ */
76
+ export declare function formatCriticFeedback(critic: CriticResult): string;
77
+ /**
78
+ * Run critic validation on a parsed plan.
79
+ * Returns the critic result, or null if critic fails (non-blocking).
80
+ */
81
+ export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string): Promise<CriticResult | null>;
82
+ export { AUTO_APPROVAL_THRESHOLD };
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Plan Validator for Remote Agent
3
+ *
4
+ * Validates execution plans locally before posting to the cloud API.
5
+ * Implements the same guardrails as the server-side planning pipeline:
6
+ * 1. File cap: max 5 targetFiles per story (prevents scope explosion)
7
+ * 2. Critic validation: LLM scores the plan, rejects below threshold
8
+ *
9
+ * This ensures remote agent plans get the same quality gates as cloud plans,
10
+ * even though the planning prompt runs locally via Claude CLI.
11
+ */
12
+ import { spawn } from "child_process";
13
+ import chalk from "chalk";
14
+ // ============================================================================
15
+ // CONSTANTS
16
+ // ============================================================================
17
+ const MAX_TARGET_FILES = 5;
18
+ const AUTO_APPROVAL_THRESHOLD = 85;
19
+ // ============================================================================
20
+ // PLAN PARSING
21
+ // ============================================================================
22
+ /**
23
+ * Parse execution plan JSON from raw Claude CLI output.
24
+ * Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
25
+ */
26
+ export function parseExecutionPlan(output) {
27
+ const jsonMatch = output.match(/```json\s*([\s\S]*?)\s*```/);
28
+ if (jsonMatch) {
29
+ return JSON.parse(jsonMatch[1]);
30
+ }
31
+ const rawJsonMatch = output.match(/\{[\s\S]*"stories"[\s\S]*\}/);
32
+ if (rawJsonMatch) {
33
+ return JSON.parse(rawJsonMatch[0]);
34
+ }
35
+ throw new Error("Could not find JSON execution plan in output");
36
+ }
37
+ // ============================================================================
38
+ // FILE CAP
39
+ // ============================================================================
40
+ /**
41
+ * Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
42
+ * Returns details about truncated stories for logging.
43
+ */
44
+ export function applyFileCap(plan) {
45
+ let truncatedCount = 0;
46
+ const details = [];
47
+ for (const story of plan.stories) {
48
+ if (!story.targetFiles || !Array.isArray(story.targetFiles)) {
49
+ story.targetFiles = [];
50
+ }
51
+ else if (story.targetFiles.length > MAX_TARGET_FILES) {
52
+ const dropped = story.targetFiles.slice(MAX_TARGET_FILES);
53
+ details.push(`${story.id}: ${story.targetFiles.length} files → ${MAX_TARGET_FILES} (dropped: ${dropped.join(", ")})`);
54
+ story.targetFiles = story.targetFiles.slice(0, MAX_TARGET_FILES);
55
+ truncatedCount++;
56
+ }
57
+ }
58
+ return { truncatedCount, details };
59
+ }
60
+ // ============================================================================
61
+ // PLAN SERIALIZATION
62
+ // ============================================================================
63
+ /**
64
+ * Re-serialize plan as a JSON code block for posting to the API.
65
+ * The server-side parseExecutionPlan() expects ```json ... ``` blocks.
66
+ */
67
+ export function serializePlan(plan) {
68
+ return "```json\n" + JSON.stringify(plan, null, 2) + "\n```";
69
+ }
70
+ // ============================================================================
71
+ // CRITIC
72
+ // ============================================================================
73
+ /**
74
+ * Critic prompt — identical to server-side critic-agent.ts CRITIC_PROMPT.
75
+ */
76
+ const CRITIC_PROMPT = `You are a Senior Architect reviewing an execution plan. Your job is to ensure the plan is appropriately sized for the task.
77
+
78
+ Review this execution plan against the PRD:
79
+
80
+ ## PRD (Product Requirements Document)
81
+ {{PRD}}
82
+
83
+ ## PROPOSED EXECUTION PLAN
84
+ {{PLAN}}
85
+
86
+ ## Review Guidelines
87
+
88
+ **IMPORTANT: Match plan size to task complexity**
89
+
90
+ - Simple tasks (typos, config changes, single-file fixes) = 1 step is CORRECT
91
+ - Medium tasks (2-4 files, small features) = 2-3 steps is appropriate
92
+ - Complex tasks (new systems, security) = 3-5 steps is appropriate
93
+
94
+ **Do NOT penalize:**
95
+ - Single-step plans for genuinely simple tasks
96
+ - Using one persona when only one skill is needed
97
+
98
+ **DO check for:**
99
+ 1. **Missing Requirements** - Does the plan cover what the PRD asks for?
100
+ 2. **Vague Instructions** - Will the worker know what to do?
101
+ 3. **Security Issues** - Only for tasks involving auth, user data, or external input
102
+ 4. **Unrealistic Scope** - Any step targeting >3 files MUST score below 85 (auto-rejection threshold). Each step should modify at most 3 files. If a step needs more, split it into multiple steps first.
103
+ 5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
104
+ 6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
105
+
106
+ ## Scoring Guide
107
+
108
+ - **90-100**: Plan matches task complexity, requirements covered
109
+ - **75-89**: Minor gaps but fundamentally sound
110
+ - **50-74**: Significant issues or wrong-sized for the task
111
+ - **0-49**: Fundamentally flawed
112
+
113
+ ## Output Format
114
+
115
+ Respond with ONLY a JSON object (no markdown, no explanation):
116
+ {"approved": boolean, "score": number, "risks": ["risk1", "risk2"], "suggestions": ["suggestion1", "suggestion2"], "storyFeedback": [{"storyId": "step-0", "feedback": "specific feedback", "suggestedChanges": ["change1"]}]}
117
+
118
+ Rules:
119
+ - approved = true if score >= 85 AND plan is right-sized for task
120
+ - risks = specific issues (empty array if none)
121
+ - suggestions = actionable improvements (empty array if none)
122
+ - storyFeedback = per-step feedback (optional, only for steps that need changes)`;
123
+ /**
124
+ * Build the critic prompt with PRD and plan substituted.
125
+ */
126
+ export function buildCriticPrompt(prd, plan) {
127
+ const planJson = JSON.stringify(plan, null, 2);
128
+ return CRITIC_PROMPT.replace("{{PRD}}", prd).replace("{{PLAN}}", planJson);
129
+ }
130
+ /**
131
+ * Parse critic JSON response from raw Claude CLI output.
132
+ */
133
+ export function parseCriticResponse(text) {
134
+ let jsonText = text.trim();
135
+ // Handle markdown code blocks
136
+ if (jsonText.includes("```")) {
137
+ const match = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/);
138
+ if (match)
139
+ jsonText = match[1].trim();
140
+ }
141
+ // Find JSON object if preceded by reasoning text
142
+ const jsonStart = jsonText.indexOf("{");
143
+ if (jsonStart > 0) {
144
+ jsonText = jsonText.substring(jsonStart);
145
+ }
146
+ const result = JSON.parse(jsonText);
147
+ return {
148
+ approved: result.approved,
149
+ score: Math.max(0, Math.min(100, Math.round(result.score))),
150
+ risks: result.risks || [],
151
+ suggestions: result.suggestions,
152
+ storyFeedback: Array.isArray(result.storyFeedback)
153
+ ? result.storyFeedback
154
+ : undefined,
155
+ };
156
+ }
157
+ /**
158
+ * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
159
+ * Returns the raw text output.
160
+ */
161
+ export function runCriticCli(claudePath, model, prompt, env) {
162
+ return new Promise((resolve, reject) => {
163
+ const proc = spawn(claudePath, [
164
+ "--print",
165
+ "--model",
166
+ model,
167
+ "--permission-mode",
168
+ "bypassPermissions",
169
+ ], {
170
+ env,
171
+ stdio: ["pipe", "pipe", "pipe"],
172
+ });
173
+ proc.stdin.write(prompt);
174
+ proc.stdin.end();
175
+ let stdout = "";
176
+ let stderr = "";
177
+ proc.stdout.on("data", (data) => {
178
+ stdout += data.toString();
179
+ });
180
+ proc.stderr.on("data", (data) => {
181
+ stderr += data.toString();
182
+ });
183
+ const timeout = setTimeout(() => {
184
+ proc.kill("SIGTERM");
185
+ reject(new Error("Critic CLI timed out after 3 minutes"));
186
+ }, 180_000);
187
+ proc.on("exit", (code) => {
188
+ clearTimeout(timeout);
189
+ if (code !== 0) {
190
+ reject(new Error(`Critic CLI failed (exit ${code}): ${stderr.substring(0, 300)}`));
191
+ }
192
+ else {
193
+ resolve(stdout);
194
+ }
195
+ });
196
+ proc.on("error", (err) => {
197
+ clearTimeout(timeout);
198
+ reject(err);
199
+ });
200
+ });
201
+ }
202
+ /**
203
+ * Format critic feedback for appending to the planner prompt on re-run.
204
+ */
205
+ export function formatCriticFeedback(critic) {
206
+ const lines = [
207
+ "",
208
+ "## CRITIC FEEDBACK — Your previous plan was REJECTED",
209
+ "",
210
+ `Score: ${critic.score}/100 (need >= ${AUTO_APPROVAL_THRESHOLD} to pass)`,
211
+ "",
212
+ ];
213
+ if (critic.risks.length > 0) {
214
+ lines.push("### Risks Identified:");
215
+ for (const risk of critic.risks) {
216
+ lines.push(`- ${risk}`);
217
+ }
218
+ lines.push("");
219
+ }
220
+ if (critic.suggestions && critic.suggestions.length > 0) {
221
+ lines.push("### Required Changes:");
222
+ for (const suggestion of critic.suggestions) {
223
+ lines.push(`- ${suggestion}`);
224
+ }
225
+ lines.push("");
226
+ }
227
+ if (critic.storyFeedback && critic.storyFeedback.length > 0) {
228
+ lines.push("### Per-Story Feedback:");
229
+ for (const fb of critic.storyFeedback) {
230
+ lines.push(`- **${fb.storyId}**: ${fb.feedback}`);
231
+ if (fb.suggestedChanges) {
232
+ for (const change of fb.suggestedChanges) {
233
+ lines.push(` - ${change}`);
234
+ }
235
+ }
236
+ }
237
+ lines.push("");
238
+ }
239
+ lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
240
+ return lines.join("\n");
241
+ }
242
+ /** Timestamp prefix for console logs */
243
+ function ts() {
244
+ return chalk.dim(new Date().toLocaleTimeString());
245
+ }
246
+ /**
247
+ * Run critic validation on a parsed plan.
248
+ * Returns the critic result, or null if critic fails (non-blocking).
249
+ */
250
+ export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel) {
251
+ const criticPrompt = buildCriticPrompt(prd, plan);
252
+ console.log(`${ts()} ${taskLabel} ${chalk.dim("Running critic validation...")}`);
253
+ try {
254
+ const rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env);
255
+ const result = parseCriticResponse(rawCriticOutput);
256
+ const statusIcon = result.score >= AUTO_APPROVAL_THRESHOLD
257
+ ? chalk.green("✓")
258
+ : chalk.red("✗");
259
+ console.log(`${ts()} ${taskLabel} ${statusIcon} Critic score: ${result.score}/100 (threshold: ${AUTO_APPROVAL_THRESHOLD})`);
260
+ return result;
261
+ }
262
+ catch (error) {
263
+ const errMsg = error instanceof Error ? error.message : String(error);
264
+ console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Critic failed: ${errMsg.substring(0, 100)}`);
265
+ return null;
266
+ }
267
+ }
268
+ export { AUTO_APPROVAL_THRESHOLD };
package/dist/planner.d.ts CHANGED
@@ -3,7 +3,13 @@
3
3
  *
4
4
  * Fetches the planning prompt from the cloud API, runs it through
5
5
  * Claude CLI locally (using the customer's Claude Max subscription),
6
- * and posts the raw output back for server-side validation.
6
+ * validates with a Planner-Critic loop, and posts the approved plan
7
+ * back for server-side processing.
8
+ *
9
+ * Guardrails (matching server-side planning pipeline):
10
+ * 1. File cap: max 5 targetFiles per story (prevents scope explosion)
11
+ * 2. Critic validation: LLM scores the plan, rejects below 85/100
12
+ * 3. Max 3 Planner-Critic iterations before failure
7
13
  *
8
14
  * Logs are streamed to the cloud dashboard in real-time so the user
9
15
  * sees the same planning progress as cloud mode.
@@ -12,8 +18,18 @@ import { type AgentConfig } from "./config.js";
12
18
  export interface PlanningTask {
13
19
  id: string;
14
20
  summary: string;
21
+ description: string | null;
15
22
  }
16
23
  /**
17
- * Run planning for a task: fetch prompt, execute Claude CLI, post result.
24
+ * Run planning for a task with Planner-Critic validation loop.
25
+ *
26
+ * Flow:
27
+ * 1. Fetch planning prompt from cloud API
28
+ * 2. Run Claude CLI to generate plan
29
+ * 3. Parse plan, apply file cap (max 5 files per story)
30
+ * 4. Run critic validation via Claude CLI
31
+ * 5. If critic approves (score >= 85): post validated plan to API
32
+ * 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
33
+ * 7. After MAX_ITERATIONS without approval: fail the task
18
34
  */
19
35
  export declare function planTask(task: PlanningTask, config: AgentConfig): Promise<boolean>;
package/dist/planner.js CHANGED
@@ -3,7 +3,13 @@
3
3
  *
4
4
  * Fetches the planning prompt from the cloud API, runs it through
5
5
  * Claude CLI locally (using the customer's Claude Max subscription),
6
- * and posts the raw output back for server-side validation.
6
+ * validates with a Planner-Critic loop, and posts the approved plan
7
+ * back for server-side processing.
8
+ *
9
+ * Guardrails (matching server-side planning pipeline):
10
+ * 1. File cap: max 5 targetFiles per story (prevents scope explosion)
11
+ * 2. Critic validation: LLM scores the plan, rejects below 85/100
12
+ * 3. Max 3 Planner-Critic iterations before failure
7
13
  *
8
14
  * Logs are streamed to the cloud dashboard in real-time so the user
9
15
  * sees the same planning progress as cloud mode.
@@ -12,6 +18,9 @@ import chalk from "chalk";
12
18
  import { spawn } from "child_process";
13
19
  import { findClaudePath } from "./config.js";
14
20
  import { api } from "./api.js";
21
+ import { parseExecutionPlan, applyFileCap, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
22
+ /** Max Planner-Critic iterations before giving up */
23
+ const MAX_ITERATIONS = 3;
15
24
  /** Timestamp prefix */
16
25
  function ts() {
17
26
  return chalk.dim(new Date().toLocaleTimeString());
@@ -220,7 +229,16 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
220
229
  });
221
230
  }
222
231
  /**
223
- * Run planning for a task: fetch prompt, execute Claude CLI, post result.
232
+ * Run planning for a task with Planner-Critic validation loop.
233
+ *
234
+ * Flow:
235
+ * 1. Fetch planning prompt from cloud API
236
+ * 2. Run Claude CLI to generate plan
237
+ * 3. Parse plan, apply file cap (max 5 files per story)
238
+ * 4. Run critic validation via Claude CLI
239
+ * 5. If critic approves (score >= 85): post validated plan to API
240
+ * 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
241
+ * 7. After MAX_ITERATIONS without approval: fail the task
224
242
  */
225
243
  export async function planTask(task, config) {
226
244
  const taskLabel = chalk.cyan(task.id.slice(0, 8));
@@ -230,47 +248,184 @@ export async function planTask(task, config) {
230
248
  const promptResponse = await api.get("/api/agent/planning-prompt", {
231
249
  params: { taskId: task.id },
232
250
  });
233
- const { prompt, model } = promptResponse.data;
251
+ const { prompt: basePrompt, model } = promptResponse.data;
234
252
  const cliModel = model || "sonnet";
235
- console.log(`${ts()} ${taskLabel} Running Claude CLI ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
236
- await postLog(task.id, `${PREFIX} Starting planning agent using anthropic/${cliModel}`);
237
- // 2. Run Claude CLI asynchronously with progress logging
238
253
  const claudePath = process.env.CLAUDE_CLI_PATH || findClaudePath() || "claude";
239
254
  const cleanEnv = { ...process.env };
240
255
  delete cleanEnv.CLAUDE_CODE_OAUTH_TOKEN;
241
256
  const startTime = Date.now();
242
- let rawOutput;
257
+ // PRD for critic validation: use task description, fall back to summary
258
+ const prd = task.description || task.summary;
259
+ // 2. Planner-Critic iteration loop
260
+ let currentPrompt = basePrompt;
261
+ let bestPlan = null;
262
+ let bestScore = 0;
263
+ for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
264
+ const iterLabel = MAX_ITERATIONS > 1 ? ` (attempt ${iteration}/${MAX_ITERATIONS})` : "";
265
+ if (iteration > 1) {
266
+ console.log(`${ts()} ${taskLabel} Running Claude CLI${iterLabel} ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
267
+ await postLog(task.id, `${PREFIX} Re-planning${iterLabel} using anthropic/${cliModel}`);
268
+ }
269
+ else {
270
+ console.log(`${ts()} ${taskLabel} Running Claude CLI ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
271
+ await postLog(task.id, `${PREFIX} Starting planning agent using anthropic/${cliModel}`);
272
+ }
273
+ // 2a. Run Claude CLI to generate plan
274
+ let rawOutput;
275
+ try {
276
+ rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime);
277
+ }
278
+ catch (error) {
279
+ const elapsed = Math.round((Date.now() - startTime) / 1000);
280
+ const errMsg = error instanceof Error ? error.message : String(error);
281
+ console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
282
+ await postLog(task.id, `${PREFIX} Planning failed after ${formatElapsed(elapsed)}: ${errMsg.substring(0, 200)}`, "error", "error");
283
+ return false;
284
+ }
285
+ const elapsed = Math.round((Date.now() - startTime) / 1000);
286
+ console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Claude CLI done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
287
+ // 2b. Parse plan from raw output
288
+ let plan;
289
+ try {
290
+ plan = parseExecutionPlan(rawOutput);
291
+ }
292
+ catch (error) {
293
+ const errMsg = error instanceof Error ? error.message : String(error);
294
+ console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Plan parse failed: ${errMsg.substring(0, 100)}`);
295
+ await postLog(task.id, `${PREFIX} Failed to parse execution plan from Claude output: ${errMsg.substring(0, 200)}`, "error", "error");
296
+ // If we can't parse the plan, post raw output and let server-side try
297
+ return await postRawPlan(task.id, rawOutput, config.agentId, taskLabel, elapsed);
298
+ }
299
+ // 2c. Apply file cap (max 5 files per story)
300
+ const { truncatedCount, details } = applyFileCap(plan);
301
+ if (truncatedCount > 0) {
302
+ const msg = `${PREFIX} File cap applied: ${truncatedCount} stories truncated to max 5 targetFiles`;
303
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
304
+ await postLog(task.id, msg);
305
+ for (const detail of details) {
306
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(detail)}`);
307
+ }
308
+ }
309
+ console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories`);
310
+ await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
311
+ // 2d. Run critic validation
312
+ const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel);
313
+ // Track best plan across iterations
314
+ if (criticResult && criticResult.score > bestScore) {
315
+ bestPlan = plan;
316
+ bestScore = criticResult.score;
317
+ }
318
+ else if (!criticResult && !bestPlan) {
319
+ // Critic failed entirely — use this plan as fallback
320
+ bestPlan = plan;
321
+ }
322
+ // 2e. Check critic result
323
+ if (!criticResult) {
324
+ // Critic failed (timeout, parse error, etc.) — post plan without critic gate
325
+ const msg = `${PREFIX} Critic validation failed — posting plan without critic score`;
326
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
327
+ await postLog(task.id, msg);
328
+ return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed);
329
+ }
330
+ if (criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD) {
331
+ // Approved! Post the file-capped plan
332
+ const msg = `${PREFIX} Critic approved (score: ${criticResult.score}/100)`;
333
+ console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} ${msg}`);
334
+ await postLog(task.id, msg);
335
+ if (criticResult.risks.length > 0) {
336
+ const risksMsg = `${PREFIX} Critic risks (non-blocking): ${criticResult.risks.join("; ")}`;
337
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
338
+ await postLog(task.id, risksMsg);
339
+ }
340
+ return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed);
341
+ }
342
+ // 2f. Rejected — append critic feedback for next iteration
343
+ if (iteration < MAX_ITERATIONS) {
344
+ const feedback = formatCriticFeedback(criticResult);
345
+ currentPrompt = basePrompt + "\n\n" + feedback;
346
+ const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
347
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
348
+ await postLog(task.id, msg);
349
+ if (criticResult.risks.length > 0) {
350
+ const risksMsg = `${PREFIX} Critic risks: ${criticResult.risks.join("; ")}`;
351
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
352
+ await postLog(task.id, risksMsg);
353
+ }
354
+ if (criticResult.suggestions && criticResult.suggestions.length > 0) {
355
+ const sugMsg = `${PREFIX} Critic suggestions: ${criticResult.suggestions.join("; ")}`;
356
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(sugMsg)}`);
357
+ await postLog(task.id, sugMsg);
358
+ }
359
+ }
360
+ else {
361
+ // Final iteration — rejected
362
+ const msg = `${PREFIX} Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD})`;
363
+ console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} ${msg}`);
364
+ await postLog(task.id, msg, "error", "error");
365
+ if (criticResult.risks.length > 0) {
366
+ const risksMsg = `${PREFIX} Final risks: ${criticResult.risks.join("; ")}`;
367
+ console.error(`${ts()} ${taskLabel} ${risksMsg}`);
368
+ await postLog(task.id, risksMsg, "error", "error");
369
+ }
370
+ if (criticResult.suggestions && criticResult.suggestions.length > 0) {
371
+ const sugMsg = `${PREFIX} Suggestions: ${criticResult.suggestions.join("; ")}`;
372
+ console.error(`${ts()} ${taskLabel} ${sugMsg}`);
373
+ await postLog(task.id, sugMsg, "error", "error");
374
+ }
375
+ }
376
+ }
377
+ // All iterations exhausted — fail
378
+ return false;
379
+ }
380
+ /**
381
+ * Post a validated (file-capped) plan to the cloud API.
382
+ * Re-serializes the plan as a JSON code block since the server-side
383
+ * parseExecutionPlan() expects that format.
384
+ */
385
+ async function postValidatedPlan(taskId, plan, agentId, taskLabel, elapsed) {
386
+ const serialized = serializePlan(plan);
243
387
  try {
244
- rawOutput = await runClaudeCli(claudePath, cliModel, prompt, cleanEnv, task.id, startTime);
388
+ const result = await api.post("/api/agent/plan-result", {
389
+ taskId,
390
+ rawOutput: serialized,
391
+ agentId,
392
+ });
393
+ const storyCount = result.data.storyCount;
394
+ console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated: ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
395
+ await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
396
+ await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
397
+ return true;
245
398
  }
246
399
  catch (error) {
247
- const elapsed = Math.round((Date.now() - startTime) / 1000);
248
- const errMsg = error instanceof Error ? error.message : String(error);
249
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
250
- await postLog(task.id, `${PREFIX} Planning failed after ${formatElapsed(elapsed)}: ${errMsg.substring(0, 200)}`, "error", "error");
400
+ const err = error;
401
+ const detail = err.response?.data?.detail || String(error);
402
+ console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Server validation failed: ${detail.substring(0, 100)}`);
403
+ await postLog(taskId, `${PREFIX} Server-side plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
251
404
  return false;
252
405
  }
253
- const elapsed = Math.round((Date.now() - startTime) / 1000);
254
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Claude CLI done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
255
- await postLog(task.id, `${PREFIX} Planning complete (${formatElapsed(elapsed)}). Validating plan...`);
256
- // 3. Post raw output back to cloud API for validation
406
+ }
407
+ /**
408
+ * Post raw (unparsed) plan output to the cloud API as a fallback.
409
+ * Used when local plan parsing fails let the server try.
410
+ */
411
+ async function postRawPlan(taskId, rawOutput, agentId, taskLabel, elapsed) {
257
412
  try {
258
413
  const result = await api.post("/api/agent/plan-result", {
259
- taskId: task.id,
414
+ taskId,
260
415
  rawOutput,
261
- agentId: config.agentId,
416
+ agentId,
262
417
  });
263
418
  const storyCount = result.data.storyCount;
264
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated: ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
265
- await postLog(task.id, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
266
- await postProgress(task.id, "complete", elapsed, "Planning complete", 0, 0);
419
+ console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated (server-side): ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
420
+ await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
421
+ await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
267
422
  return true;
268
423
  }
269
424
  catch (error) {
270
425
  const err = error;
271
426
  const detail = err.response?.data?.detail || String(error);
272
427
  console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Validation failed: ${detail.substring(0, 100)}`);
273
- await postLog(task.id, `${PREFIX} Plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
428
+ await postLog(taskId, `${PREFIX} Plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
274
429
  return false;
275
430
  }
276
431
  }
package/dist/poller.d.ts CHANGED
File without changes
package/dist/poller.js CHANGED
File without changes
package/dist/spawner.d.ts CHANGED
File without changes
package/dist/spawner.js CHANGED
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workermill/agent",
3
- "version": "0.1.2",
3
+ "version": "0.2.1",
4
4
  "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",