@workermill/agent 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -0
- package/dist/api.d.ts +0 -0
- package/dist/api.js +0 -0
- package/dist/cli.d.ts +0 -0
- package/dist/cli.js +0 -0
- package/dist/commands/logs.d.ts +0 -0
- package/dist/commands/logs.js +0 -0
- package/dist/commands/pull.d.ts +0 -0
- package/dist/commands/pull.js +0 -0
- package/dist/commands/setup.d.ts +0 -0
- package/dist/commands/setup.js +0 -0
- package/dist/commands/start.d.ts +0 -0
- package/dist/commands/start.js +0 -0
- package/dist/commands/status.d.ts +0 -0
- package/dist/commands/status.js +0 -0
- package/dist/commands/stop.d.ts +0 -0
- package/dist/commands/stop.js +0 -0
- package/dist/config.d.ts +0 -0
- package/dist/config.js +0 -0
- package/dist/index.d.ts +0 -0
- package/dist/index.js +0 -0
- package/dist/plan-validator.d.ts +82 -0
- package/dist/plan-validator.js +268 -0
- package/dist/planner.d.ts +18 -2
- package/dist/planner.js +176 -30
- package/dist/poller.d.ts +0 -0
- package/dist/poller.js +0 -0
- package/dist/spawner.d.ts +0 -0
- package/dist/spawner.js +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
File without changes
|
package/dist/api.d.ts
CHANGED
|
File without changes
|
package/dist/api.js
CHANGED
|
File without changes
|
package/dist/cli.d.ts
CHANGED
|
File without changes
|
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/commands/logs.d.ts
CHANGED
|
File without changes
|
package/dist/commands/logs.js
CHANGED
|
File without changes
|
package/dist/commands/pull.d.ts
CHANGED
|
File without changes
|
package/dist/commands/pull.js
CHANGED
|
File without changes
|
package/dist/commands/setup.d.ts
CHANGED
|
File without changes
|
package/dist/commands/setup.js
CHANGED
|
File without changes
|
package/dist/commands/start.d.ts
CHANGED
|
File without changes
|
package/dist/commands/start.js
CHANGED
|
File without changes
|
|
File without changes
|
package/dist/commands/status.js
CHANGED
|
File without changes
|
package/dist/commands/stop.d.ts
CHANGED
|
File without changes
|
package/dist/commands/stop.js
CHANGED
|
File without changes
|
package/dist/config.d.ts
CHANGED
|
File without changes
|
package/dist/config.js
CHANGED
|
File without changes
|
package/dist/index.d.ts
CHANGED
|
File without changes
|
package/dist/index.js
CHANGED
|
File without changes
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan Validator for Remote Agent
|
|
3
|
+
*
|
|
4
|
+
* Validates execution plans locally before posting to the cloud API.
|
|
5
|
+
* Implements the same guardrails as the server-side planning pipeline:
|
|
6
|
+
* 1. File cap: max 5 targetFiles per story (prevents scope explosion)
|
|
7
|
+
* 2. Critic validation: LLM scores the plan, rejects below threshold
|
|
8
|
+
*
|
|
9
|
+
* This ensures remote agent plans get the same quality gates as cloud plans,
|
|
10
|
+
* even though the planning prompt runs locally via Claude CLI.
|
|
11
|
+
*/
|
|
12
|
+
export interface PlannedStory {
|
|
13
|
+
id: string;
|
|
14
|
+
title: string;
|
|
15
|
+
description: string;
|
|
16
|
+
persona: string;
|
|
17
|
+
priority: number;
|
|
18
|
+
estimatedEffort: "small" | "medium" | "large";
|
|
19
|
+
dependencies: string[];
|
|
20
|
+
acceptanceCriteria: string[];
|
|
21
|
+
targetFiles?: string[];
|
|
22
|
+
scope?: string;
|
|
23
|
+
}
|
|
24
|
+
export interface ExecutionPlan {
|
|
25
|
+
summary: string;
|
|
26
|
+
stories: PlannedStory[];
|
|
27
|
+
risks: string[];
|
|
28
|
+
assumptions: string[];
|
|
29
|
+
}
|
|
30
|
+
export interface CriticResult {
|
|
31
|
+
approved: boolean;
|
|
32
|
+
score: number;
|
|
33
|
+
risks: string[];
|
|
34
|
+
suggestions?: string[];
|
|
35
|
+
storyFeedback?: Array<{
|
|
36
|
+
storyId: string;
|
|
37
|
+
feedback: string;
|
|
38
|
+
suggestedChanges?: string[];
|
|
39
|
+
}>;
|
|
40
|
+
}
|
|
41
|
+
declare const AUTO_APPROVAL_THRESHOLD = 85;
|
|
42
|
+
/**
|
|
43
|
+
* Parse execution plan JSON from raw Claude CLI output.
|
|
44
|
+
* Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
|
|
45
|
+
*/
|
|
46
|
+
export declare function parseExecutionPlan(output: string): ExecutionPlan;
|
|
47
|
+
/**
|
|
48
|
+
* Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
|
|
49
|
+
* Returns details about truncated stories for logging.
|
|
50
|
+
*/
|
|
51
|
+
export declare function applyFileCap(plan: ExecutionPlan): {
|
|
52
|
+
truncatedCount: number;
|
|
53
|
+
details: string[];
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Re-serialize plan as a JSON code block for posting to the API.
|
|
57
|
+
* The server-side parseExecutionPlan() expects ```json ... ``` blocks.
|
|
58
|
+
*/
|
|
59
|
+
export declare function serializePlan(plan: ExecutionPlan): string;
|
|
60
|
+
/**
|
|
61
|
+
* Build the critic prompt with PRD and plan substituted.
|
|
62
|
+
*/
|
|
63
|
+
export declare function buildCriticPrompt(prd: string, plan: ExecutionPlan): string;
|
|
64
|
+
/**
|
|
65
|
+
* Parse critic JSON response from raw Claude CLI output.
|
|
66
|
+
*/
|
|
67
|
+
export declare function parseCriticResponse(text: string): CriticResult;
|
|
68
|
+
/**
|
|
69
|
+
* Run the critic via Claude CLI (lightweight — no tools, just reasoning).
|
|
70
|
+
* Returns the raw text output.
|
|
71
|
+
*/
|
|
72
|
+
export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>): Promise<string>;
|
|
73
|
+
/**
|
|
74
|
+
* Format critic feedback for appending to the planner prompt on re-run.
|
|
75
|
+
*/
|
|
76
|
+
export declare function formatCriticFeedback(critic: CriticResult): string;
|
|
77
|
+
/**
|
|
78
|
+
* Run critic validation on a parsed plan.
|
|
79
|
+
* Returns the critic result, or null if critic fails (non-blocking).
|
|
80
|
+
*/
|
|
81
|
+
export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string): Promise<CriticResult | null>;
|
|
82
|
+
export { AUTO_APPROVAL_THRESHOLD };
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plan Validator for Remote Agent
|
|
3
|
+
*
|
|
4
|
+
* Validates execution plans locally before posting to the cloud API.
|
|
5
|
+
* Implements the same guardrails as the server-side planning pipeline:
|
|
6
|
+
* 1. File cap: max 5 targetFiles per story (prevents scope explosion)
|
|
7
|
+
* 2. Critic validation: LLM scores the plan, rejects below threshold
|
|
8
|
+
*
|
|
9
|
+
* This ensures remote agent plans get the same quality gates as cloud plans,
|
|
10
|
+
* even though the planning prompt runs locally via Claude CLI.
|
|
11
|
+
*/
|
|
12
|
+
import { spawn } from "child_process";
|
|
13
|
+
import chalk from "chalk";
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// CONSTANTS
|
|
16
|
+
// ============================================================================
|
|
17
|
+
const MAX_TARGET_FILES = 5;
|
|
18
|
+
const AUTO_APPROVAL_THRESHOLD = 85;
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// PLAN PARSING
|
|
21
|
+
// ============================================================================
|
|
22
|
+
/**
|
|
23
|
+
* Parse execution plan JSON from raw Claude CLI output.
|
|
24
|
+
* Mirrors server-side parseExecutionPlan() in planning-agent-local.ts.
|
|
25
|
+
*/
|
|
26
|
+
export function parseExecutionPlan(output) {
|
|
27
|
+
const jsonMatch = output.match(/```json\s*([\s\S]*?)\s*```/);
|
|
28
|
+
if (jsonMatch) {
|
|
29
|
+
return JSON.parse(jsonMatch[1]);
|
|
30
|
+
}
|
|
31
|
+
const rawJsonMatch = output.match(/\{[\s\S]*"stories"[\s\S]*\}/);
|
|
32
|
+
if (rawJsonMatch) {
|
|
33
|
+
return JSON.parse(rawJsonMatch[0]);
|
|
34
|
+
}
|
|
35
|
+
throw new Error("Could not find JSON execution plan in output");
|
|
36
|
+
}
|
|
37
|
+
// ============================================================================
|
|
38
|
+
// FILE CAP
|
|
39
|
+
// ============================================================================
|
|
40
|
+
/**
|
|
41
|
+
* Apply file cap to all stories. Truncates targetFiles > MAX_TARGET_FILES.
|
|
42
|
+
* Returns details about truncated stories for logging.
|
|
43
|
+
*/
|
|
44
|
+
export function applyFileCap(plan) {
|
|
45
|
+
let truncatedCount = 0;
|
|
46
|
+
const details = [];
|
|
47
|
+
for (const story of plan.stories) {
|
|
48
|
+
if (!story.targetFiles || !Array.isArray(story.targetFiles)) {
|
|
49
|
+
story.targetFiles = [];
|
|
50
|
+
}
|
|
51
|
+
else if (story.targetFiles.length > MAX_TARGET_FILES) {
|
|
52
|
+
const dropped = story.targetFiles.slice(MAX_TARGET_FILES);
|
|
53
|
+
details.push(`${story.id}: ${story.targetFiles.length} files → ${MAX_TARGET_FILES} (dropped: ${dropped.join(", ")})`);
|
|
54
|
+
story.targetFiles = story.targetFiles.slice(0, MAX_TARGET_FILES);
|
|
55
|
+
truncatedCount++;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return { truncatedCount, details };
|
|
59
|
+
}
|
|
60
|
+
// ============================================================================
|
|
61
|
+
// PLAN SERIALIZATION
|
|
62
|
+
// ============================================================================
|
|
63
|
+
/**
|
|
64
|
+
* Re-serialize plan as a JSON code block for posting to the API.
|
|
65
|
+
* The server-side parseExecutionPlan() expects ```json ... ``` blocks.
|
|
66
|
+
*/
|
|
67
|
+
export function serializePlan(plan) {
|
|
68
|
+
return "```json\n" + JSON.stringify(plan, null, 2) + "\n```";
|
|
69
|
+
}
|
|
70
|
+
// ============================================================================
|
|
71
|
+
// CRITIC
|
|
72
|
+
// ============================================================================
|
|
73
|
+
/**
|
|
74
|
+
* Critic prompt — identical to server-side critic-agent.ts CRITIC_PROMPT.
|
|
75
|
+
*/
|
|
76
|
+
const CRITIC_PROMPT = `You are a Senior Architect reviewing an execution plan. Your job is to ensure the plan is appropriately sized for the task.
|
|
77
|
+
|
|
78
|
+
Review this execution plan against the PRD:
|
|
79
|
+
|
|
80
|
+
## PRD (Product Requirements Document)
|
|
81
|
+
{{PRD}}
|
|
82
|
+
|
|
83
|
+
## PROPOSED EXECUTION PLAN
|
|
84
|
+
{{PLAN}}
|
|
85
|
+
|
|
86
|
+
## Review Guidelines
|
|
87
|
+
|
|
88
|
+
**IMPORTANT: Match plan size to task complexity**
|
|
89
|
+
|
|
90
|
+
- Simple tasks (typos, config changes, single-file fixes) = 1 step is CORRECT
|
|
91
|
+
- Medium tasks (2-4 files, small features) = 2-3 steps is appropriate
|
|
92
|
+
- Complex tasks (new systems, security) = 3-5 steps is appropriate
|
|
93
|
+
|
|
94
|
+
**Do NOT penalize:**
|
|
95
|
+
- Single-step plans for genuinely simple tasks
|
|
96
|
+
- Using one persona when only one skill is needed
|
|
97
|
+
|
|
98
|
+
**DO check for:**
|
|
99
|
+
1. **Missing Requirements** - Does the plan cover what the PRD asks for?
|
|
100
|
+
2. **Vague Instructions** - Will the worker know what to do?
|
|
101
|
+
3. **Security Issues** - Only for tasks involving auth, user data, or external input
|
|
102
|
+
4. **Unrealistic Scope** - Any step targeting >3 files MUST score below 85 (auto-rejection threshold). Each step should modify at most 3 files. If a step needs more, split it into multiple steps first.
|
|
103
|
+
5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
|
|
104
|
+
6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
|
|
105
|
+
|
|
106
|
+
## Scoring Guide
|
|
107
|
+
|
|
108
|
+
- **90-100**: Plan matches task complexity, requirements covered
|
|
109
|
+
- **75-89**: Minor gaps but fundamentally sound
|
|
110
|
+
- **50-74**: Significant issues or wrong-sized for the task
|
|
111
|
+
- **0-49**: Fundamentally flawed
|
|
112
|
+
|
|
113
|
+
## Output Format
|
|
114
|
+
|
|
115
|
+
Respond with ONLY a JSON object (no markdown, no explanation):
|
|
116
|
+
{"approved": boolean, "score": number, "risks": ["risk1", "risk2"], "suggestions": ["suggestion1", "suggestion2"], "storyFeedback": [{"storyId": "step-0", "feedback": "specific feedback", "suggestedChanges": ["change1"]}]}
|
|
117
|
+
|
|
118
|
+
Rules:
|
|
119
|
+
- approved = true if score >= 85 AND plan is right-sized for task
|
|
120
|
+
- risks = specific issues (empty array if none)
|
|
121
|
+
- suggestions = actionable improvements (empty array if none)
|
|
122
|
+
- storyFeedback = per-step feedback (optional, only for steps that need changes)`;
|
|
123
|
+
/**
|
|
124
|
+
* Build the critic prompt with PRD and plan substituted.
|
|
125
|
+
*/
|
|
126
|
+
export function buildCriticPrompt(prd, plan) {
|
|
127
|
+
const planJson = JSON.stringify(plan, null, 2);
|
|
128
|
+
return CRITIC_PROMPT.replace("{{PRD}}", prd).replace("{{PLAN}}", planJson);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Parse critic JSON response from raw Claude CLI output.
|
|
132
|
+
*/
|
|
133
|
+
export function parseCriticResponse(text) {
|
|
134
|
+
let jsonText = text.trim();
|
|
135
|
+
// Handle markdown code blocks
|
|
136
|
+
if (jsonText.includes("```")) {
|
|
137
|
+
const match = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
138
|
+
if (match)
|
|
139
|
+
jsonText = match[1].trim();
|
|
140
|
+
}
|
|
141
|
+
// Find JSON object if preceded by reasoning text
|
|
142
|
+
const jsonStart = jsonText.indexOf("{");
|
|
143
|
+
if (jsonStart > 0) {
|
|
144
|
+
jsonText = jsonText.substring(jsonStart);
|
|
145
|
+
}
|
|
146
|
+
const result = JSON.parse(jsonText);
|
|
147
|
+
return {
|
|
148
|
+
approved: result.approved,
|
|
149
|
+
score: Math.max(0, Math.min(100, Math.round(result.score))),
|
|
150
|
+
risks: result.risks || [],
|
|
151
|
+
suggestions: result.suggestions,
|
|
152
|
+
storyFeedback: Array.isArray(result.storyFeedback)
|
|
153
|
+
? result.storyFeedback
|
|
154
|
+
: undefined,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Run the critic via Claude CLI (lightweight — no tools, just reasoning).
|
|
159
|
+
* Returns the raw text output.
|
|
160
|
+
*/
|
|
161
|
+
export function runCriticCli(claudePath, model, prompt, env) {
|
|
162
|
+
return new Promise((resolve, reject) => {
|
|
163
|
+
const proc = spawn(claudePath, [
|
|
164
|
+
"--print",
|
|
165
|
+
"--model",
|
|
166
|
+
model,
|
|
167
|
+
"--permission-mode",
|
|
168
|
+
"bypassPermissions",
|
|
169
|
+
], {
|
|
170
|
+
env,
|
|
171
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
172
|
+
});
|
|
173
|
+
proc.stdin.write(prompt);
|
|
174
|
+
proc.stdin.end();
|
|
175
|
+
let stdout = "";
|
|
176
|
+
let stderr = "";
|
|
177
|
+
proc.stdout.on("data", (data) => {
|
|
178
|
+
stdout += data.toString();
|
|
179
|
+
});
|
|
180
|
+
proc.stderr.on("data", (data) => {
|
|
181
|
+
stderr += data.toString();
|
|
182
|
+
});
|
|
183
|
+
const timeout = setTimeout(() => {
|
|
184
|
+
proc.kill("SIGTERM");
|
|
185
|
+
reject(new Error("Critic CLI timed out after 3 minutes"));
|
|
186
|
+
}, 180_000);
|
|
187
|
+
proc.on("exit", (code) => {
|
|
188
|
+
clearTimeout(timeout);
|
|
189
|
+
if (code !== 0) {
|
|
190
|
+
reject(new Error(`Critic CLI failed (exit ${code}): ${stderr.substring(0, 300)}`));
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
resolve(stdout);
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
proc.on("error", (err) => {
|
|
197
|
+
clearTimeout(timeout);
|
|
198
|
+
reject(err);
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Format critic feedback for appending to the planner prompt on re-run.
|
|
204
|
+
*/
|
|
205
|
+
export function formatCriticFeedback(critic) {
|
|
206
|
+
const lines = [
|
|
207
|
+
"",
|
|
208
|
+
"## CRITIC FEEDBACK — Your previous plan was REJECTED",
|
|
209
|
+
"",
|
|
210
|
+
`Score: ${critic.score}/100 (need >= ${AUTO_APPROVAL_THRESHOLD} to pass)`,
|
|
211
|
+
"",
|
|
212
|
+
];
|
|
213
|
+
if (critic.risks.length > 0) {
|
|
214
|
+
lines.push("### Risks Identified:");
|
|
215
|
+
for (const risk of critic.risks) {
|
|
216
|
+
lines.push(`- ${risk}`);
|
|
217
|
+
}
|
|
218
|
+
lines.push("");
|
|
219
|
+
}
|
|
220
|
+
if (critic.suggestions && critic.suggestions.length > 0) {
|
|
221
|
+
lines.push("### Required Changes:");
|
|
222
|
+
for (const suggestion of critic.suggestions) {
|
|
223
|
+
lines.push(`- ${suggestion}`);
|
|
224
|
+
}
|
|
225
|
+
lines.push("");
|
|
226
|
+
}
|
|
227
|
+
if (critic.storyFeedback && critic.storyFeedback.length > 0) {
|
|
228
|
+
lines.push("### Per-Story Feedback:");
|
|
229
|
+
for (const fb of critic.storyFeedback) {
|
|
230
|
+
lines.push(`- **${fb.storyId}**: ${fb.feedback}`);
|
|
231
|
+
if (fb.suggestedChanges) {
|
|
232
|
+
for (const change of fb.suggestedChanges) {
|
|
233
|
+
lines.push(` - ${change}`);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
lines.push("");
|
|
238
|
+
}
|
|
239
|
+
lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
|
|
240
|
+
return lines.join("\n");
|
|
241
|
+
}
|
|
242
|
+
/** Timestamp prefix for console logs */
|
|
243
|
+
function ts() {
|
|
244
|
+
return chalk.dim(new Date().toLocaleTimeString());
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Run critic validation on a parsed plan.
|
|
248
|
+
* Returns the critic result, or null if critic fails (non-blocking).
|
|
249
|
+
*/
|
|
250
|
+
export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel) {
|
|
251
|
+
const criticPrompt = buildCriticPrompt(prd, plan);
|
|
252
|
+
console.log(`${ts()} ${taskLabel} ${chalk.dim("Running critic validation...")}`);
|
|
253
|
+
try {
|
|
254
|
+
const rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env);
|
|
255
|
+
const result = parseCriticResponse(rawCriticOutput);
|
|
256
|
+
const statusIcon = result.score >= AUTO_APPROVAL_THRESHOLD
|
|
257
|
+
? chalk.green("✓")
|
|
258
|
+
: chalk.red("✗");
|
|
259
|
+
console.log(`${ts()} ${taskLabel} ${statusIcon} Critic score: ${result.score}/100 (threshold: ${AUTO_APPROVAL_THRESHOLD})`);
|
|
260
|
+
return result;
|
|
261
|
+
}
|
|
262
|
+
catch (error) {
|
|
263
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
264
|
+
console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Critic failed: ${errMsg.substring(0, 100)}`);
|
|
265
|
+
return null;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
export { AUTO_APPROVAL_THRESHOLD };
|
package/dist/planner.d.ts
CHANGED
|
@@ -3,7 +3,13 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Fetches the planning prompt from the cloud API, runs it through
|
|
5
5
|
* Claude CLI locally (using the customer's Claude Max subscription),
|
|
6
|
-
*
|
|
6
|
+
* validates with a Planner-Critic loop, and posts the approved plan
|
|
7
|
+
* back for server-side processing.
|
|
8
|
+
*
|
|
9
|
+
* Guardrails (matching server-side planning pipeline):
|
|
10
|
+
* 1. File cap: max 5 targetFiles per story (prevents scope explosion)
|
|
11
|
+
* 2. Critic validation: LLM scores the plan, rejects below 85/100
|
|
12
|
+
* 3. Max 3 Planner-Critic iterations before failure
|
|
7
13
|
*
|
|
8
14
|
* Logs are streamed to the cloud dashboard in real-time so the user
|
|
9
15
|
* sees the same planning progress as cloud mode.
|
|
@@ -12,8 +18,18 @@ import { type AgentConfig } from "./config.js";
|
|
|
12
18
|
export interface PlanningTask {
|
|
13
19
|
id: string;
|
|
14
20
|
summary: string;
|
|
21
|
+
description: string | null;
|
|
15
22
|
}
|
|
16
23
|
/**
|
|
17
|
-
* Run planning for a task
|
|
24
|
+
* Run planning for a task with Planner-Critic validation loop.
|
|
25
|
+
*
|
|
26
|
+
* Flow:
|
|
27
|
+
* 1. Fetch planning prompt from cloud API
|
|
28
|
+
* 2. Run Claude CLI to generate plan
|
|
29
|
+
* 3. Parse plan, apply file cap (max 5 files per story)
|
|
30
|
+
* 4. Run critic validation via Claude CLI
|
|
31
|
+
* 5. If critic approves (score >= 85): post validated plan to API
|
|
32
|
+
* 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
33
|
+
* 7. After MAX_ITERATIONS without approval: fail the task
|
|
18
34
|
*/
|
|
19
35
|
export declare function planTask(task: PlanningTask, config: AgentConfig): Promise<boolean>;
|
package/dist/planner.js
CHANGED
|
@@ -3,7 +3,13 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Fetches the planning prompt from the cloud API, runs it through
|
|
5
5
|
* Claude CLI locally (using the customer's Claude Max subscription),
|
|
6
|
-
*
|
|
6
|
+
* validates with a Planner-Critic loop, and posts the approved plan
|
|
7
|
+
* back for server-side processing.
|
|
8
|
+
*
|
|
9
|
+
* Guardrails (matching server-side planning pipeline):
|
|
10
|
+
* 1. File cap: max 5 targetFiles per story (prevents scope explosion)
|
|
11
|
+
* 2. Critic validation: LLM scores the plan, rejects below 85/100
|
|
12
|
+
* 3. Max 3 Planner-Critic iterations before failure
|
|
7
13
|
*
|
|
8
14
|
* Logs are streamed to the cloud dashboard in real-time so the user
|
|
9
15
|
* sees the same planning progress as cloud mode.
|
|
@@ -12,6 +18,9 @@ import chalk from "chalk";
|
|
|
12
18
|
import { spawn } from "child_process";
|
|
13
19
|
import { findClaudePath } from "./config.js";
|
|
14
20
|
import { api } from "./api.js";
|
|
21
|
+
import { parseExecutionPlan, applyFileCap, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
|
|
22
|
+
/** Max Planner-Critic iterations before giving up */
|
|
23
|
+
const MAX_ITERATIONS = 3;
|
|
15
24
|
/** Timestamp prefix */
|
|
16
25
|
function ts() {
|
|
17
26
|
return chalk.dim(new Date().toLocaleTimeString());
|
|
@@ -51,14 +60,22 @@ async function postProgress(taskId, phase, elapsedSeconds, detail, charsGenerate
|
|
|
51
60
|
// Fire and forget
|
|
52
61
|
}
|
|
53
62
|
}
|
|
63
|
+
/** Consistent prefix matching local workermill dashboard format */
|
|
64
|
+
const PREFIX = "[🗺️ planning_agent 🤖]";
|
|
65
|
+
/** Format elapsed seconds as human-readable string (e.g. "28s", "1m 25s") */
|
|
66
|
+
function formatElapsed(seconds) {
|
|
67
|
+
const mins = Math.floor(seconds / 60);
|
|
68
|
+
const secs = seconds % 60;
|
|
69
|
+
return mins > 0 ? `${mins}m ${secs}s` : `${secs}s`;
|
|
70
|
+
}
|
|
54
71
|
function phaseLabel(phase, elapsed) {
|
|
55
72
|
switch (phase) {
|
|
56
|
-
case "initializing": return
|
|
57
|
-
case "reading_repo": return
|
|
58
|
-
case "analyzing": return
|
|
59
|
-
case "generating_plan": return
|
|
60
|
-
case "validating": return
|
|
61
|
-
case "complete": return
|
|
73
|
+
case "initializing": return `${PREFIX} Starting planning agent...`;
|
|
74
|
+
case "reading_repo": return `${PREFIX} Reading repository structure...`;
|
|
75
|
+
case "analyzing": return `${PREFIX} Analyzing requirements...`;
|
|
76
|
+
case "generating_plan": return `${PREFIX} Planning in progress — analyzing requirements and decomposing into steps (${formatElapsed(elapsed)} elapsed)`;
|
|
77
|
+
case "validating": return `${PREFIX} Validating plan...`;
|
|
78
|
+
case "complete": return `${PREFIX} Planning complete`;
|
|
62
79
|
}
|
|
63
80
|
}
|
|
64
81
|
/**
|
|
@@ -119,7 +136,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
119
136
|
// Periodic progress during generation
|
|
120
137
|
if (currentPhase === "generating_plan" && elapsed - lastProgressLogAt >= 30) {
|
|
121
138
|
lastProgressLogAt = elapsed;
|
|
122
|
-
const msg =
|
|
139
|
+
const msg = `${PREFIX} Planning in progress — analyzing requirements and decomposing into steps (${formatElapsed(elapsed)} elapsed)`;
|
|
123
140
|
postLog(taskId, msg);
|
|
124
141
|
console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
|
|
125
142
|
}
|
|
@@ -212,57 +229,186 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
212
229
|
});
|
|
213
230
|
}
|
|
214
231
|
/**
|
|
215
|
-
* Run planning for a task
|
|
232
|
+
* Run planning for a task with Planner-Critic validation loop.
|
|
233
|
+
*
|
|
234
|
+
* Flow:
|
|
235
|
+
* 1. Fetch planning prompt from cloud API
|
|
236
|
+
* 2. Run Claude CLI to generate plan
|
|
237
|
+
* 3. Parse plan, apply file cap (max 5 files per story)
|
|
238
|
+
* 4. Run critic validation via Claude CLI
|
|
239
|
+
* 5. If critic approves (score >= 85): post validated plan to API
|
|
240
|
+
* 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
241
|
+
* 7. After MAX_ITERATIONS without approval: fail the task
|
|
216
242
|
*/
|
|
217
243
|
export async function planTask(task, config) {
|
|
218
244
|
const taskLabel = chalk.cyan(task.id.slice(0, 8));
|
|
219
245
|
console.log(`${ts()} ${taskLabel} Fetching planning prompt...`);
|
|
220
|
-
await postLog(task.id,
|
|
246
|
+
await postLog(task.id, `${PREFIX} Fetching planning prompt from cloud API...`);
|
|
221
247
|
// 1. Fetch the assembled planning prompt from the cloud API
|
|
222
248
|
const promptResponse = await api.get("/api/agent/planning-prompt", {
|
|
223
249
|
params: { taskId: task.id },
|
|
224
250
|
});
|
|
225
|
-
const { prompt, model } = promptResponse.data;
|
|
251
|
+
const { prompt: basePrompt, model } = promptResponse.data;
|
|
226
252
|
const cliModel = model || "sonnet";
|
|
227
|
-
console.log(`${ts()} ${taskLabel} Running Claude CLI ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
|
|
228
|
-
await postLog(task.id, `Starting planning agent (model: ${cliModel})...`);
|
|
229
|
-
// 2. Run Claude CLI asynchronously with progress logging
|
|
230
253
|
const claudePath = process.env.CLAUDE_CLI_PATH || findClaudePath() || "claude";
|
|
231
254
|
const cleanEnv = { ...process.env };
|
|
232
255
|
delete cleanEnv.CLAUDE_CODE_OAUTH_TOKEN;
|
|
233
256
|
const startTime = Date.now();
|
|
234
|
-
|
|
257
|
+
// PRD for critic validation: use task description, fall back to summary
|
|
258
|
+
const prd = task.description || task.summary;
|
|
259
|
+
// 2. Planner-Critic iteration loop
|
|
260
|
+
let currentPrompt = basePrompt;
|
|
261
|
+
let bestPlan = null;
|
|
262
|
+
let bestScore = 0;
|
|
263
|
+
for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
|
|
264
|
+
const iterLabel = MAX_ITERATIONS > 1 ? ` (attempt ${iteration}/${MAX_ITERATIONS})` : "";
|
|
265
|
+
if (iteration > 1) {
|
|
266
|
+
console.log(`${ts()} ${taskLabel} Running Claude CLI${iterLabel} ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
|
|
267
|
+
await postLog(task.id, `${PREFIX} Re-planning${iterLabel} using anthropic/${cliModel}`);
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
console.log(`${ts()} ${taskLabel} Running Claude CLI ${chalk.dim(`(model: ${chalk.yellow(cliModel)})`)}`);
|
|
271
|
+
await postLog(task.id, `${PREFIX} Starting planning agent using anthropic/${cliModel}`);
|
|
272
|
+
}
|
|
273
|
+
// 2a. Run Claude CLI to generate plan
|
|
274
|
+
let rawOutput;
|
|
275
|
+
try {
|
|
276
|
+
rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime);
|
|
277
|
+
}
|
|
278
|
+
catch (error) {
|
|
279
|
+
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
280
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
281
|
+
console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
|
|
282
|
+
await postLog(task.id, `${PREFIX} Planning failed after ${formatElapsed(elapsed)}: ${errMsg.substring(0, 200)}`, "error", "error");
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
286
|
+
console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Claude CLI done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
|
|
287
|
+
// 2b. Parse plan from raw output
|
|
288
|
+
let plan;
|
|
289
|
+
try {
|
|
290
|
+
plan = parseExecutionPlan(rawOutput);
|
|
291
|
+
}
|
|
292
|
+
catch (error) {
|
|
293
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
294
|
+
console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Plan parse failed: ${errMsg.substring(0, 100)}`);
|
|
295
|
+
await postLog(task.id, `${PREFIX} Failed to parse execution plan from Claude output: ${errMsg.substring(0, 200)}`, "error", "error");
|
|
296
|
+
// If we can't parse the plan, post raw output and let server-side try
|
|
297
|
+
return await postRawPlan(task.id, rawOutput, config.agentId, taskLabel, elapsed);
|
|
298
|
+
}
|
|
299
|
+
// 2c. Apply file cap (max 5 files per story)
|
|
300
|
+
const { truncatedCount, details } = applyFileCap(plan);
|
|
301
|
+
if (truncatedCount > 0) {
|
|
302
|
+
const msg = `${PREFIX} File cap applied: ${truncatedCount} stories truncated to max 5 targetFiles`;
|
|
303
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
|
|
304
|
+
await postLog(task.id, msg);
|
|
305
|
+
for (const detail of details) {
|
|
306
|
+
console.log(`${ts()} ${taskLabel} ${chalk.dim(detail)}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories`);
|
|
310
|
+
await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
|
|
311
|
+
// 2d. Run critic validation
|
|
312
|
+
const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel);
|
|
313
|
+
// Track best plan across iterations
|
|
314
|
+
if (criticResult && criticResult.score > bestScore) {
|
|
315
|
+
bestPlan = plan;
|
|
316
|
+
bestScore = criticResult.score;
|
|
317
|
+
}
|
|
318
|
+
else if (!criticResult && !bestPlan) {
|
|
319
|
+
// Critic failed entirely — use this plan as fallback
|
|
320
|
+
bestPlan = plan;
|
|
321
|
+
}
|
|
322
|
+
// 2e. Check critic result
|
|
323
|
+
if (!criticResult) {
|
|
324
|
+
// Critic failed (timeout, parse error, etc.) — post plan without critic gate
|
|
325
|
+
const msg = `${PREFIX} Critic validation failed — posting plan without critic score`;
|
|
326
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
|
|
327
|
+
await postLog(task.id, msg);
|
|
328
|
+
return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed);
|
|
329
|
+
}
|
|
330
|
+
if (criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD) {
|
|
331
|
+
// Approved! Post the file-capped plan
|
|
332
|
+
const msg = `${PREFIX} Critic approved (score: ${criticResult.score}/100)`;
|
|
333
|
+
await postLog(task.id, msg);
|
|
334
|
+
return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed);
|
|
335
|
+
}
|
|
336
|
+
// 2f. Rejected — append critic feedback for next iteration
|
|
337
|
+
if (iteration < MAX_ITERATIONS) {
|
|
338
|
+
const feedback = formatCriticFeedback(criticResult);
|
|
339
|
+
currentPrompt = basePrompt + "\n\n" + feedback;
|
|
340
|
+
const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
|
|
341
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
|
|
342
|
+
await postLog(task.id, msg);
|
|
343
|
+
if (criticResult.risks.length > 0) {
|
|
344
|
+
await postLog(task.id, `${PREFIX} Critic risks: ${criticResult.risks.join("; ")}`);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
else {
|
|
348
|
+
// Final iteration — rejected
|
|
349
|
+
const msg = `${PREFIX} Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD})`;
|
|
350
|
+
console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} ${msg}`);
|
|
351
|
+
await postLog(task.id, msg, "error", "error");
|
|
352
|
+
if (criticResult.risks.length > 0) {
|
|
353
|
+
await postLog(task.id, `${PREFIX} Final risks: ${criticResult.risks.join("; ")}`, "error", "error");
|
|
354
|
+
}
|
|
355
|
+
if (criticResult.suggestions && criticResult.suggestions.length > 0) {
|
|
356
|
+
await postLog(task.id, `${PREFIX} Suggestions: ${criticResult.suggestions.join("; ")}`, "error", "error");
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
// All iterations exhausted — fail
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
/**
|
|
364
|
+
* Post a validated (file-capped) plan to the cloud API.
|
|
365
|
+
* Re-serializes the plan as a JSON code block since the server-side
|
|
366
|
+
* parseExecutionPlan() expects that format.
|
|
367
|
+
*/
|
|
368
|
+
async function postValidatedPlan(taskId, plan, agentId, taskLabel, elapsed) {
|
|
369
|
+
const serialized = serializePlan(plan);
|
|
235
370
|
try {
|
|
236
|
-
|
|
371
|
+
const result = await api.post("/api/agent/plan-result", {
|
|
372
|
+
taskId,
|
|
373
|
+
rawOutput: serialized,
|
|
374
|
+
agentId,
|
|
375
|
+
});
|
|
376
|
+
const storyCount = result.data.storyCount;
|
|
377
|
+
console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated: ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
|
|
378
|
+
await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
|
|
379
|
+
await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
|
|
380
|
+
return true;
|
|
237
381
|
}
|
|
238
382
|
catch (error) {
|
|
239
|
-
const
|
|
240
|
-
const
|
|
241
|
-
console.error(`${ts()} ${taskLabel} ${chalk.red("✗")}
|
|
242
|
-
await postLog(
|
|
383
|
+
const err = error;
|
|
384
|
+
const detail = err.response?.data?.detail || String(error);
|
|
385
|
+
console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Server validation failed: ${detail.substring(0, 100)}`);
|
|
386
|
+
await postLog(taskId, `${PREFIX} Server-side plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
|
|
243
387
|
return false;
|
|
244
388
|
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Post raw (unparsed) plan output to the cloud API as a fallback.
|
|
392
|
+
* Used when local plan parsing fails — let the server try.
|
|
393
|
+
*/
|
|
394
|
+
async function postRawPlan(taskId, rawOutput, agentId, taskLabel, elapsed) {
|
|
249
395
|
try {
|
|
250
396
|
const result = await api.post("/api/agent/plan-result", {
|
|
251
|
-
taskId
|
|
397
|
+
taskId,
|
|
252
398
|
rawOutput,
|
|
253
|
-
agentId
|
|
399
|
+
agentId,
|
|
254
400
|
});
|
|
255
401
|
const storyCount = result.data.storyCount;
|
|
256
|
-
console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated: ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
|
|
257
|
-
await postLog(
|
|
258
|
-
await postProgress(
|
|
402
|
+
console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated (server-side): ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
|
|
403
|
+
await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
|
|
404
|
+
await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
|
|
259
405
|
return true;
|
|
260
406
|
}
|
|
261
407
|
catch (error) {
|
|
262
408
|
const err = error;
|
|
263
409
|
const detail = err.response?.data?.detail || String(error);
|
|
264
410
|
console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Validation failed: ${detail.substring(0, 100)}`);
|
|
265
|
-
await postLog(
|
|
411
|
+
await postLog(taskId, `${PREFIX} Plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
|
|
266
412
|
return false;
|
|
267
413
|
}
|
|
268
414
|
}
|
package/dist/poller.d.ts
CHANGED
|
File without changes
|
package/dist/poller.js
CHANGED
|
File without changes
|
package/dist/spawner.d.ts
CHANGED
|
File without changes
|
package/dist/spawner.js
CHANGED
|
File without changes
|