@workermill/agent 0.8.5 β†’ 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,436 +0,0 @@
1
- /**
2
- * Plan Validator for Remote Agent
3
- *
4
- * Validates execution plans locally before posting to the cloud API.
5
- * Implements the same guardrails as the server-side planning pipeline:
6
- * 1. File cap: max 5 targetFiles per story (prevents scope explosion)
7
- * 2. Critic validation: LLM scores the plan, rejects below threshold
8
- *
9
- * This ensures remote agent plans get the same quality gates as cloud plans,
10
- * even though the planning prompt runs locally via Claude CLI.
11
- */
12
- import { spawn } from "child_process";
13
- import chalk from "chalk";
14
- import { generateText } from "./providers.js";
15
- import { api } from "./api.js";
16
- // ============================================================================
17
- // CONSTANTS
18
- // ============================================================================
19
- const MAX_TARGET_FILES = 15;
20
- const AUTO_APPROVAL_THRESHOLD = 85;
21
- // ============================================================================
22
- // PLAN PARSING
23
- // ============================================================================
24
- /**
25
- * Parse execution plan JSON from raw Claude CLI output.
26
- * Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
27
- */
28
- export function parseExecutionPlan(output) {
29
- // Strategy 1: Find ```json ... ``` block using bracket-matching instead of regex.
30
- // The lazy regex ([\s\S]*?) fails when JSON string values contain ``` (e.g., code
31
- // blocks in story descriptions from PRDs with CI/CD YAML examples).
32
- const jsonFenceStart = output.indexOf("```json");
33
- if (jsonFenceStart !== -1) {
34
- // Find the opening { after ```json
35
- const searchFrom = jsonFenceStart + 7; // length of "```json"
36
- const braceStart = output.indexOf("{", searchFrom);
37
- if (braceStart !== -1) {
38
- const extracted = extractBalancedJson(output, braceStart);
39
- if (extracted) {
40
- return JSON.parse(extracted);
41
- }
42
- }
43
- }
44
- // Strategy 2: Find raw JSON with "stories" key using bracket-matching
45
- const storiesIdx = output.indexOf('"stories"');
46
- if (storiesIdx !== -1) {
47
- // Walk backwards to find the opening {
48
- const before = output.substring(0, storiesIdx);
49
- const braceStart = before.lastIndexOf("{");
50
- if (braceStart !== -1) {
51
- const extracted = extractBalancedJson(output, braceStart);
52
- if (extracted) {
53
- return JSON.parse(extracted);
54
- }
55
- }
56
- }
57
- throw new Error("Could not find JSON execution plan in output");
58
- }
59
- /**
60
- * Extract a balanced JSON object from a string starting at the given position.
61
- * Properly handles nested braces, strings with escaped characters, and code
62
- * blocks embedded in JSON string values (which contain triple backticks).
63
- */
64
- function extractBalancedJson(text, start) {
65
- let depth = 0;
66
- let inString = false;
67
- let escape = false;
68
- for (let i = start; i < text.length; i++) {
69
- const ch = text[i];
70
- if (escape) {
71
- escape = false;
72
- continue;
73
- }
74
- if (ch === "\\") {
75
- if (inString)
76
- escape = true;
77
- continue;
78
- }
79
- if (ch === '"') {
80
- inString = !inString;
81
- continue;
82
- }
83
- if (inString)
84
- continue;
85
- if (ch === "{")
86
- depth++;
87
- else if (ch === "}") {
88
- depth--;
89
- if (depth === 0) {
90
- return text.substring(start, i + 1);
91
- }
92
- }
93
- }
94
- return null; // Unbalanced
95
- }
96
- // ============================================================================
97
- // FILE CAP
98
- // ============================================================================
99
- /**
100
- * Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
101
- * Returns details about truncated stories for logging.
102
- */
103
- export function applyFileCap(plan) {
104
- let truncatedCount = 0;
105
- const details = [];
106
- for (const story of plan.stories) {
107
- if (!story.targetFiles || !Array.isArray(story.targetFiles)) {
108
- story.targetFiles = [];
109
- }
110
- else if (story.targetFiles.length > MAX_TARGET_FILES) {
111
- const dropped = story.targetFiles.slice(MAX_TARGET_FILES);
112
- details.push(`${story.id}: ${story.targetFiles.length} files β†’ ${MAX_TARGET_FILES} (dropped: ${dropped.join(", ")})`);
113
- story.targetFiles = story.targetFiles.slice(0, MAX_TARGET_FILES);
114
- truncatedCount++;
115
- }
116
- }
117
- return { truncatedCount, details };
118
- }
119
- // ============================================================================
120
- // STORY CAP
121
- // ============================================================================
122
- /**
123
- * Apply story cap to the plan. Truncates stories beyond maxStories.
124
- * Returns details about dropped stories for logging.
125
- */
126
- export function applyStoryCap(plan, maxStories) {
127
- if (plan.stories.length <= maxStories) {
128
- return { droppedCount: 0, details: [] };
129
- }
130
- const droppedCount = plan.stories.length - maxStories;
131
- const dropped = plan.stories.slice(maxStories);
132
- const details = dropped.map((s) => `${s.id}: "${s.title}" (${s.persona})`);
133
- plan.stories = plan.stories.slice(0, maxStories);
134
- // Fix dependencies that reference dropped stories
135
- const validIds = new Set(plan.stories.map((s) => s.id));
136
- for (const story of plan.stories) {
137
- story.dependencies = story.dependencies.filter((dep) => validIds.has(dep));
138
- }
139
- return { droppedCount, details };
140
- }
141
- // ============================================================================
142
- // FILE OVERLAP VALIDATION
143
- // ============================================================================
144
- /**
145
- * Resolve file overlaps by assigning each shared file to exactly one story.
146
- * When multiple stories list the same targetFile, the first story keeps it
147
- * and it's removed from subsequent stories. This prevents parallel merge
148
- * conflicts during consolidation β€” same auto-fix pattern as applyFileCap.
149
- *
150
- * Returns details about resolved overlaps for logging.
151
- */
152
- export function resolveFileOverlaps(plan) {
153
- const fileOwner = new Map(); // file β†’ first story that claims it
154
- let resolvedCount = 0;
155
- const details = [];
156
- for (const story of plan.stories) {
157
- if (!story.targetFiles || story.targetFiles.length === 0)
158
- continue;
159
- const kept = [];
160
- const removed = [];
161
- for (const file of story.targetFiles) {
162
- const owner = fileOwner.get(file);
163
- if (owner) {
164
- // File already claimed by an earlier story β€” remove from this one
165
- removed.push(file);
166
- }
167
- else {
168
- fileOwner.set(file, story.id);
169
- kept.push(file);
170
- }
171
- }
172
- if (removed.length > 0) {
173
- story.targetFiles = kept;
174
- resolvedCount += removed.length;
175
- details.push(`${story.id}: removed ${removed.join(", ")} (owned by ${removed.map((f) => fileOwner.get(f)).join(", ")})`);
176
- }
177
- }
178
- return { resolvedCount, details };
179
- }
180
- // ============================================================================
181
- // PLAN SERIALIZATION
182
- // ============================================================================
183
- /**
184
- * Re-serialize plan as a JSON code block for posting to the API.
185
- * The server-side parseExecutionPlan() expects ```json ... ``` blocks.
186
- */
187
- export function serializePlan(plan) {
188
- return "```json\n" + JSON.stringify(plan, null, 2) + "\n```";
189
- }
190
- // ============================================================================
191
- // CRITIC
192
- // ============================================================================
193
- /**
194
- * Critic prompt β€” identical to server-side critic-agent.ts CRITIC_PROMPT.
195
- */
196
- const CRITIC_PROMPT = `You are a Senior Architect reviewing an execution plan. Your job is to ensure the plan is appropriately sized for the task.
197
-
198
- Review this execution plan against the PRD:
199
-
200
- ## PRD (Product Requirements Document)
201
- {{PRD}}
202
-
203
- ## PROPOSED EXECUTION PLAN
204
- {{PLAN}}
205
-
206
- ## Review Guidelines
207
-
208
- **IMPORTANT: Match plan size to task complexity**
209
-
210
- - Simple tasks (typos, config changes, single-file fixes) = 1 step is CORRECT
211
- - Medium tasks (2-4 files, small features) = 2-3 steps is appropriate
212
- - Complex tasks (new systems, security) = 3-5 steps is appropriate
213
-
214
- **Do NOT penalize:**
215
- - Single-step plans for genuinely simple tasks
216
- - Using one persona when only one skill is needed
217
-
218
- **DO check for:**
219
- 1. **Missing Requirements** - Does the plan cover what the PRD asks for?
220
- 2. **Scope Clarity** - Is each story's description a brief file scope label (1 line)? Stories should NOT rewrite ticket requirements.
221
- 3. **Security Issues** - Only for tasks involving auth, user data, or external input
222
- 4. **Unrealistic Scope** - Any step targeting >5 files MUST score below 85 (auto-rejection threshold). Each step should modify at most 5 files. If a step needs more, split it into multiple steps first.
223
- 5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
224
- 6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
225
- 7. **Serialization Bottleneck** - If more than half the stories depend on a single story that targets >5 files, the plan has a bottleneck. Deduct 15 points β€” split the foundation or allow more parallel work.
226
- 8. **Requirement Rewriting** - If any story description contains implementation details, acceptance criteria, or rewritten requirements from the PRD, deduct 15 points per offending story. Story descriptions must be ONE-LINE file scope labels (e.g., "Database layer β€” migrations and entity definitions"). The original ticket is the spec.
227
-
228
- ## Scoring Guide
229
-
230
- - **90-100**: Plan matches task complexity, requirements covered
231
- - **75-89**: Minor gaps but fundamentally sound
232
- - **50-74**: Significant issues or wrong-sized for the task
233
- - **0-49**: Fundamentally flawed
234
-
235
- ## Output Format
236
-
237
- Respond with ONLY a JSON object (no markdown, no explanation):
238
- {"approved": boolean, "score": number, "risks": ["risk1", "risk2"], "suggestions": ["suggestion1", "suggestion2"], "storyFeedback": [{"storyId": "step-0", "feedback": "specific feedback", "suggestedChanges": ["change1"]}]}
239
-
240
- Rules:
241
- - approved = true if score >= 85 AND plan is right-sized for task
242
- - risks = specific issues (empty array if none)
243
- - suggestions = actionable improvements (empty array if none)
244
- - storyFeedback = per-step feedback (optional, only for steps that need changes)`;
245
- /**
246
- * Build the critic prompt with PRD and plan substituted.
247
- */
248
- export function buildCriticPrompt(prd, plan) {
249
- const planJson = JSON.stringify(plan, null, 2);
250
- return CRITIC_PROMPT.replace("{{PRD}}", prd).replace("{{PLAN}}", planJson);
251
- }
252
- /**
253
- * Parse critic JSON response from raw Claude CLI output.
254
- */
255
- export function parseCriticResponse(text) {
256
- let jsonText = text.trim();
257
- // Handle markdown code blocks
258
- if (jsonText.includes("```")) {
259
- const match = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/);
260
- if (match)
261
- jsonText = match[1].trim();
262
- }
263
- // Find JSON object if preceded by reasoning text
264
- const jsonStart = jsonText.indexOf("{");
265
- if (jsonStart > 0) {
266
- jsonText = jsonText.substring(jsonStart);
267
- }
268
- const result = JSON.parse(jsonText);
269
- return {
270
- approved: result.approved,
271
- score: Math.max(0, Math.min(100, Math.round(result.score))),
272
- risks: result.risks || [],
273
- suggestions: result.suggestions,
274
- storyFeedback: Array.isArray(result.storyFeedback)
275
- ? result.storyFeedback
276
- : undefined,
277
- };
278
- }
279
- /**
280
- * Run the critic via Claude CLI (lightweight β€” no tools, just reasoning).
281
- * Returns the raw text output.
282
- */
283
- export function runCriticCli(claudePath, model, prompt, env, taskId) {
284
- return new Promise((resolve, reject) => {
285
- const proc = spawn(claudePath, [
286
- "--print",
287
- "--model",
288
- model,
289
- "--permission-mode",
290
- "bypassPermissions",
291
- ], {
292
- env,
293
- stdio: ["pipe", "pipe", "pipe"],
294
- });
295
- proc.stdin.write(prompt);
296
- proc.stdin.end();
297
- let stdout = "";
298
- let stderr = "";
299
- proc.stdout.on("data", (data) => {
300
- const chunk = data.toString();
301
- stdout += chunk;
302
- // Stream critic reasoning to dashboard in real-time
303
- const lines = chunk.split("\n").filter((l) => l.trim());
304
- for (const line of lines) {
305
- const trimmed = line.trim().length > 200
306
- ? line.trim().substring(0, 200) + "…"
307
- : line.trim();
308
- if (trimmed) {
309
- if (taskId) {
310
- postLog(taskId, `${PREFIX} [critic] ${trimmed}`, "output");
311
- }
312
- console.log(`${ts()} ${chalk.dim("πŸ”")} ${chalk.dim(trimmed)}`);
313
- }
314
- }
315
- });
316
- proc.stderr.on("data", (data) => {
317
- stderr += data.toString();
318
- });
319
- const timeout = setTimeout(() => {
320
- proc.kill("SIGTERM");
321
- reject(new Error("Critic CLI timed out after 20 minutes"));
322
- }, 1_200_000);
323
- proc.on("exit", (code) => {
324
- clearTimeout(timeout);
325
- if (code !== 0) {
326
- reject(new Error(`Critic CLI failed (exit ${code}): ${stderr.substring(0, 300)}`));
327
- }
328
- else {
329
- resolve(stdout);
330
- }
331
- });
332
- proc.on("error", (err) => {
333
- clearTimeout(timeout);
334
- reject(err);
335
- });
336
- });
337
- }
338
- /**
339
- * Format critic feedback for appending to the planner prompt on re-run.
340
- */
341
- export function formatCriticFeedback(critic) {
342
- const lines = [
343
- "",
344
- "## CRITIC FEEDBACK β€” Your previous plan was REJECTED",
345
- "",
346
- `Score: ${critic.score}/100 (need >= ${AUTO_APPROVAL_THRESHOLD} to pass)`,
347
- "",
348
- ];
349
- if (critic.risks.length > 0) {
350
- lines.push("### Risks Identified:");
351
- for (const risk of critic.risks) {
352
- lines.push(`- ${risk}`);
353
- }
354
- lines.push("");
355
- }
356
- if (critic.suggestions && critic.suggestions.length > 0) {
357
- lines.push("### Required Changes:");
358
- for (const suggestion of critic.suggestions) {
359
- lines.push(`- ${suggestion}`);
360
- }
361
- lines.push("");
362
- }
363
- if (critic.storyFeedback && critic.storyFeedback.length > 0) {
364
- lines.push("### Per-Story Feedback:");
365
- for (const fb of critic.storyFeedback) {
366
- lines.push(`- **${fb.storyId}**: ${fb.feedback}`);
367
- if (fb.suggestedChanges) {
368
- for (const change of fb.suggestedChanges) {
369
- lines.push(` - ${change}`);
370
- }
371
- }
372
- }
373
- lines.push("");
374
- }
375
- lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
376
- return lines.join("\n");
377
- }
378
- /** Consistent prefix matching planner dashboard format */
379
- const PREFIX = "[πŸ—ΊοΈ planning_agent πŸ€–]";
380
- /** Timestamp prefix for console logs */
381
- function ts() {
382
- return chalk.dim(new Date().toLocaleTimeString());
383
- }
384
- /**
385
- * Post a log message to the cloud dashboard for real-time visibility.
386
- */
387
- async function postLog(taskId, message, type = "system", severity = "info") {
388
- try {
389
- await api.post("/api/control-center/logs", {
390
- taskId,
391
- type,
392
- message,
393
- severity,
394
- });
395
- }
396
- catch {
397
- // Fire and forget β€” don't block critic on log failures
398
- }
399
- }
400
- /**
401
- * Run critic validation on a parsed plan.
402
- * Routes to Claude CLI (Anthropic) or HTTP API (other providers).
403
- * Returns the critic result, or null if critic fails (non-blocking).
404
- */
405
- export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey, taskId) {
406
- const criticPrompt = buildCriticPrompt(prd, plan);
407
- const effectiveProvider = provider || "anthropic";
408
- console.log(`${ts()} ${taskLabel} ${chalk.dim(`Running critic validation (${effectiveProvider})...`)}`);
409
- if (taskId) {
410
- postLog(taskId, `${PREFIX} Running critic validation (${effectiveProvider})...`);
411
- }
412
- try {
413
- let rawCriticOutput;
414
- if (effectiveProvider === "anthropic") {
415
- rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env, taskId);
416
- }
417
- else {
418
- if (!providerApiKey) {
419
- throw new Error(`No API key for critic provider "${effectiveProvider}"`);
420
- }
421
- rawCriticOutput = await generateText(effectiveProvider, model, criticPrompt, providerApiKey, { maxTokens: 4096, temperature: 0.3, timeoutMs: 1_200_000 });
422
- }
423
- const result = parseCriticResponse(rawCriticOutput);
424
- const statusIcon = result.score >= AUTO_APPROVAL_THRESHOLD
425
- ? chalk.green("βœ“")
426
- : chalk.red("βœ—");
427
- console.log(`${ts()} ${taskLabel} ${statusIcon} Critic score: ${result.score}/100 (threshold: ${AUTO_APPROVAL_THRESHOLD})`);
428
- return result;
429
- }
430
- catch (error) {
431
- const errMsg = error instanceof Error ? error.message : String(error);
432
- console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Critic failed: ${errMsg.substring(0, 100)}`);
433
- return null;
434
- }
435
- }
436
- export { AUTO_APPROVAL_THRESHOLD };
package/dist/planner.d.ts DELETED
@@ -1,40 +0,0 @@
1
- /**
2
- * Remote Agent Planner
3
- *
4
- * Fetches the planning prompt from the cloud API, runs it through
5
- * Claude CLI locally (using the customer's Claude Max subscription),
6
- * validates with a Planner-Critic loop, and posts the approved plan
7
- * back for server-side processing.
8
- *
9
- * Guardrails (matching server-side planning pipeline):
10
- * 1. File cap: max 5 targetFiles per story (prevents scope explosion)
11
- * 2. Critic validation: LLM scores the plan, rejects below 85/100
12
- * 3. Max 3 Planner-Critic iterations before failure
13
- *
14
- * Logs are streamed to the cloud dashboard in real-time so the user
15
- * sees the same planning progress as cloud mode.
16
- */
17
- import { type AgentConfig } from "./config.js";
18
- import type { ClaimCredentials } from "./spawner.js";
19
- export interface PlanningTask {
20
- id: string;
21
- summary: string;
22
- description: string | null;
23
- githubRepo?: string;
24
- scmProvider?: string;
25
- }
26
- /**
27
- * Run planning for a task with Planner-Critic validation loop.
28
- *
29
- * Flow:
30
- * 1. Fetch planning prompt from cloud API
31
- * 2. Clone target repo (if available) so planner can explore with tools
32
- * 3. Run Claude CLI to generate plan
33
- * 4. Parse plan, apply file cap (max 5 files per story)
34
- * 5. Run critic validation via Claude CLI
35
- * 6. If critic approves (score >= 80): post validated plan to API
36
- * 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
37
- * 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
38
- * 9. If no plan scored >= 50: fail the task
39
- */
40
- export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;