@workermill/agent 0.8.5 → 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/planner.js DELETED
@@ -1,792 +0,0 @@
1
- /**
2
- * Remote Agent Planner
3
- *
4
- * Fetches the planning prompt from the cloud API, runs it through
5
- * Claude CLI locally (using the customer's Claude Max subscription),
6
- * validates with a Planner-Critic loop, and posts the approved plan
7
- * back for server-side processing.
8
- *
9
- * Guardrails (matching server-side planning pipeline):
10
- * 1. File cap: max 5 targetFiles per story (prevents scope explosion)
11
- * 2. Critic validation: LLM scores the plan, rejects below 85/100
12
- * 3. Max 3 Planner-Critic iterations before failure
13
- *
14
- * Logs are streamed to the cloud dashboard in real-time so the user
15
- * sees the same planning progress as cloud mode.
16
- */
17
- import chalk from "chalk";
18
- import { spawn, execSync } from "child_process";
19
- import { findClaudePath } from "./config.js";
20
- import { api } from "./api.js";
21
- import { parseExecutionPlan, applyFileCap, applyStoryCap, resolveFileOverlaps, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
22
- import { generateTextWithTools } from "./ai-sdk-generate.js";
23
- /**
24
- * Extract token usage from a stream-json event.
25
- * Claude reports cumulative tokens, so we use Math.max to track the highest values.
26
- */
27
- function extractTokenUsage(event, usage) {
28
- const paths = [
29
- event.usage,
30
- event.message?.usage,
31
- event.result?.usage,
32
- ];
33
- for (const u of paths) {
34
- if (u && typeof u === "object") {
35
- const d = u;
36
- if (typeof d.input_tokens === "number")
37
- usage.inputTokens = Math.max(usage.inputTokens, d.input_tokens);
38
- if (typeof d.output_tokens === "number")
39
- usage.outputTokens = Math.max(usage.outputTokens, d.output_tokens);
40
- if (typeof d.cache_creation_input_tokens === "number")
41
- usage.cacheCreationTokens = Math.max(usage.cacheCreationTokens, d.cache_creation_input_tokens);
42
- if (typeof d.cache_read_input_tokens === "number")
43
- usage.cacheReadTokens = Math.max(usage.cacheReadTokens, d.cache_read_input_tokens);
44
- }
45
- }
46
- }
47
- /**
48
- * Report partial token usage to the cloud API.
49
- */
50
- async function reportPlanningUsage(taskId, usage, model, mode) {
51
- if (usage.inputTokens === 0 && usage.outputTokens === 0)
52
- return;
53
- try {
54
- await api.post(`/api/tasks/${taskId}/usage/partial`, {
55
- inputTokens: usage.inputTokens,
56
- outputTokens: usage.outputTokens,
57
- cacheCreationTokens: usage.cacheCreationTokens,
58
- cacheReadTokens: usage.cacheReadTokens,
59
- model,
60
- mode,
61
- });
62
- }
63
- catch {
64
- // Fire and forget
65
- }
66
- }
67
- /** Max Planner-Critic iterations before giving up */
68
- const MAX_ITERATIONS = 3;
69
- /** Timestamp prefix */
70
- function ts() {
71
- return chalk.dim(new Date().toLocaleTimeString());
72
- }
73
- /**
74
- * Log queue — sends entries sequentially instead of N concurrent POSTs.
75
- * During planning, flushTextBuffer() can fire 15-30 postLog() calls in a burst.
76
- * Without queuing, those concurrent POSTs saturate the API's DB connection pool
77
- * (max 10), causing poll timeouts, transient 401s, and multi-second stalls.
78
- */
79
- const logQueue = [];
80
- let logDrainPromise = null;
81
- async function drainLogQueue() {
82
- while (logQueue.length > 0) {
83
- // Drain up to 50 entries per batch POST
84
- const batch = logQueue.splice(0, 50);
85
- try {
86
- await api.post("/api/control-center/logs/batch", { entries: batch }, { timeout: 5_000 });
87
- }
88
- catch {
89
- // Best-effort — drop on failure
90
- }
91
- }
92
- }
93
- /**
94
- * Post a log message to the cloud dashboard for real-time visibility.
95
- * Entries are queued and drained sequentially (max 1 in-flight POST).
96
- */
97
- async function postLog(taskId, message, type = "system", severity = "info") {
98
- if (logQueue.length >= 200)
99
- logQueue.shift(); // drop oldest
100
- logQueue.push({ taskId, message, type, severity });
101
- if (!logDrainPromise) {
102
- logDrainPromise = drainLogQueue().finally(() => {
103
- logDrainPromise = null;
104
- });
105
- }
106
- }
107
- /**
108
- * Flush remaining log entries (call before cleanup).
109
- */
110
- async function flushLogQueue() {
111
- if (logDrainPromise)
112
- await logDrainPromise;
113
- if (logQueue.length > 0) {
114
- logDrainPromise = drainLogQueue().finally(() => {
115
- logDrainPromise = null;
116
- });
117
- await logDrainPromise;
118
- }
119
- }
120
- /**
121
- * Post planning progress to the cloud API for SSE relay to the dashboard.
122
- * This drives the animated progress bar (PlanningTerminalBar) in the frontend.
123
- */
124
- async function postProgress(taskId, phase, elapsedSeconds, detail, charsGenerated, toolCallCount) {
125
- try {
126
- await api.post("/api/agent/planning-progress", {
127
- taskId,
128
- phase,
129
- elapsedSeconds,
130
- detail,
131
- charsGenerated,
132
- toolCallCount,
133
- });
134
- }
135
- catch {
136
- // Fire and forget
137
- }
138
- }
139
- /** Consistent prefix matching local workermill dashboard format */
140
- const PREFIX = "[🗺️ planning_agent 🤖]";
141
- /** Format elapsed seconds as human-readable string (e.g. "28s", "1m 25s") */
142
- function formatElapsed(seconds) {
143
- const mins = Math.floor(seconds / 60);
144
- const secs = seconds % 60;
145
- return mins > 0 ? `${mins}m ${secs}s` : `${secs}s`;
146
- }
147
- function phaseLabel(phase, elapsed) {
148
- switch (phase) {
149
- case "initializing": return `${PREFIX} Starting planning agent...`;
150
- case "reading_repo": return `${PREFIX} Reading repository structure...`;
151
- case "analyzing": return `${PREFIX} Analyzing requirements...`;
152
- case "generating_plan": return `${PREFIX} Planning in progress — analyzing requirements and decomposing into steps (${formatElapsed(elapsed)} elapsed)`;
153
- case "validating": return `${PREFIX} Validating plan...`;
154
- case "complete": return `${PREFIX} Planning complete`;
155
- }
156
- }
157
- /**
158
- * Run Claude CLI with stream-json output, posting real-time phase milestones
159
- * to the cloud dashboard — identical terminal experience to cloud planning.
160
- */
161
- function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, cwd) {
162
- const taskLabel = chalk.cyan(taskId.slice(0, 8));
163
- return new Promise((resolve, reject) => {
164
- const cliArgs = [
165
- "--print",
166
- "--verbose",
167
- "--output-format", "stream-json",
168
- "--model", model,
169
- "--permission-mode", "bypassPermissions",
170
- ];
171
- const proc = spawn(claudePath, cliArgs, {
172
- cwd,
173
- env,
174
- stdio: ["pipe", "pipe", "pipe"],
175
- });
176
- proc.stdin.write(prompt);
177
- proc.stdin.end();
178
- let fullText = "";
179
- let resultText = "";
180
- let stderrOutput = "";
181
- let charsReceived = 0;
182
- let toolCallCount = 0;
183
- // Token usage accumulator — extract from stream events using Math.max
184
- const tokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
185
- let resultModel = model;
186
- // Buffered text streaming — flush complete lines to dashboard every 1s.
187
- // LLM deltas are tiny fragments; we accumulate until we see '\n', then
188
- // a 1s interval flushes all complete lines as log entries. On exit we
189
- // flush whatever remains (including any incomplete trailing line).
190
- let textBuffer = "";
191
- function flushTextBuffer(final = false) {
192
- if (!textBuffer)
193
- return;
194
- const parts = textBuffer.split("\n");
195
- // Keep the incomplete trailing fragment unless this is the final flush
196
- const incomplete = final ? "" : (parts.pop() || "");
197
- for (const line of parts) {
198
- if (line.trim()) {
199
- postLog(taskId, `${PREFIX} ${line}`, "output");
200
- // Echo planner thoughts to local terminal
201
- const truncated = line.trim().length > 160 ? line.trim().substring(0, 160) + "…" : line.trim();
202
- console.log(`${ts()} ${taskLabel} ${chalk.dim("💭")} ${chalk.dim(truncated)}`);
203
- }
204
- }
205
- textBuffer = incomplete;
206
- }
207
- // Phase detection state
208
- let currentPhase = "initializing";
209
- let firstTextSeen = false;
210
- const milestoneSent = { started: true, reading: false, analyzing: false, generating: false };
211
- // Post milestone when phase transitions (to dashboard terminal)
212
- function transitionPhase(newPhase) {
213
- if (newPhase === currentPhase)
214
- return;
215
- currentPhase = newPhase;
216
- const elapsed = Math.round((Date.now() - startTime) / 1000);
217
- const msg = phaseLabel(newPhase, elapsed);
218
- postLog(taskId, msg);
219
- console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
220
- }
221
- // Flush buffered LLM text to dashboard every 1s (complete lines only)
222
- const textFlushInterval = setInterval(() => flushTextBuffer(), 500);
223
- // SSE progress updates every 2s — drives PlanningTerminalBar in dashboard
224
- // (same cadence as local dev's progressInterval in planning-agent-local.ts)
225
- const sseProgressInterval = setInterval(() => {
226
- const elapsed = Math.round((Date.now() - startTime) / 1000);
227
- postProgress(taskId, currentPhase, elapsed, phaseLabel(currentPhase, elapsed), charsReceived, toolCallCount);
228
- }, 2_000);
229
- // Phase transition logs + periodic DB logs (every 30s during generation)
230
- let lastProgressLogAt = 0;
231
- const progressInterval = setInterval(() => {
232
- const elapsed = Math.round((Date.now() - startTime) / 1000);
233
- // Time-based phase fallback (in case stream events are sparse)
234
- if (currentPhase === "initializing" && elapsed >= 5) {
235
- transitionPhase("reading_repo");
236
- }
237
- else if (currentPhase === "reading_repo" && elapsed >= 15 && !firstTextSeen) {
238
- transitionPhase("analyzing");
239
- }
240
- // Periodic progress during generation
241
- if (currentPhase === "generating_plan" && elapsed - lastProgressLogAt >= 30) {
242
- lastProgressLogAt = elapsed;
243
- const msg = `${PREFIX} Planning in progress — analyzing requirements and decomposing into steps (${formatElapsed(elapsed)} elapsed)`;
244
- postLog(taskId, msg);
245
- console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
246
- }
247
- }, 5_000);
248
- // Parse streaming JSON lines from Claude CLI
249
- let lineBuffer = "";
250
- proc.stdout.on("data", (data) => {
251
- lineBuffer += data.toString();
252
- const lines = lineBuffer.split("\n");
253
- lineBuffer = lines.pop() || "";
254
- for (const line of lines) {
255
- const trimmed = line.trim();
256
- if (!trimmed)
257
- continue;
258
- try {
259
- const event = JSON.parse(trimmed);
260
- // Claude CLI stream-json wraps content in assistant message events
261
- if (event.type === "assistant" && event.message?.content) {
262
- const content = event.message.content;
263
- if (Array.isArray(content)) {
264
- for (const block of content) {
265
- if (block.type === "text" && block.text) {
266
- fullText += block.text;
267
- charsReceived += block.text.length;
268
- textBuffer += block.text;
269
- if (!firstTextSeen) {
270
- firstTextSeen = true;
271
- if (toolCallCount > 0 && !milestoneSent.analyzing) {
272
- transitionPhase("analyzing");
273
- milestoneSent.analyzing = true;
274
- }
275
- }
276
- if (charsReceived > 500 && !milestoneSent.generating) {
277
- transitionPhase("generating_plan");
278
- milestoneSent.generating = true;
279
- lastProgressLogAt = Math.round((Date.now() - startTime) / 1000);
280
- }
281
- }
282
- else if (block.type === "tool_use") {
283
- toolCallCount++;
284
- if (!milestoneSent.reading) {
285
- transitionPhase("reading_repo");
286
- milestoneSent.reading = true;
287
- }
288
- }
289
- }
290
- }
291
- else if (typeof content === "string" && content) {
292
- fullText += content;
293
- charsReceived += content.length;
294
- textBuffer += content;
295
- }
296
- }
297
- else if (event.type === "content_block_delta" && event.delta?.text) {
298
- // Fallback: raw API streaming format
299
- fullText += event.delta.text;
300
- charsReceived += event.delta.text.length;
301
- textBuffer += event.delta.text;
302
- if (!firstTextSeen) {
303
- firstTextSeen = true;
304
- if (toolCallCount > 0 && !milestoneSent.analyzing) {
305
- transitionPhase("analyzing");
306
- milestoneSent.analyzing = true;
307
- }
308
- }
309
- if (charsReceived > 500 && !milestoneSent.generating) {
310
- transitionPhase("generating_plan");
311
- milestoneSent.generating = true;
312
- lastProgressLogAt = Math.round((Date.now() - startTime) / 1000);
313
- }
314
- }
315
- else if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
316
- toolCallCount++;
317
- if (!milestoneSent.reading) {
318
- transitionPhase("reading_repo");
319
- milestoneSent.reading = true;
320
- }
321
- }
322
- else if (event.type === "result" && event.result) {
323
- resultText = typeof event.result === "string" ? event.result : "";
324
- }
325
- // Extract token usage from any event that carries it
326
- extractTokenUsage(event, tokenUsage);
327
- if (event.type === "result" && event.total_cost_usd !== undefined) {
328
- // Result event also carries model info
329
- if (event.modelUsage && typeof event.modelUsage === "object") {
330
- const models = Object.keys(event.modelUsage);
331
- if (models.length > 0)
332
- resultModel = models[0];
333
- }
334
- }
335
- }
336
- catch {
337
- // Not valid JSON — raw text, accumulate
338
- fullText += trimmed + "\n";
339
- charsReceived += trimmed.length;
340
- }
341
- }
342
- });
343
- proc.stderr.on("data", (chunk) => {
344
- stderrOutput += chunk.toString();
345
- });
346
- // Report partial token usage every 30s during planning
347
- const usageReportInterval = setInterval(() => {
348
- if (tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0) {
349
- reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
350
- }
351
- }, 30_000);
352
- function cleanupAll() {
353
- clearInterval(progressInterval);
354
- clearInterval(sseProgressInterval);
355
- clearInterval(textFlushInterval);
356
- clearInterval(usageReportInterval);
357
- flushTextBuffer(true);
358
- }
359
- const timeout = setTimeout(() => {
360
- cleanupAll();
361
- proc.kill("SIGTERM");
362
- reject(new Error("Claude CLI timed out after 20 minutes"));
363
- }, 1_200_000);
364
- proc.on("exit", (code) => {
365
- clearTimeout(timeout);
366
- cleanupAll();
367
- // Emit final "validating" phase to dashboard
368
- const elapsedAtClose = Math.round((Date.now() - startTime) / 1000);
369
- postProgress(taskId, "validating", elapsedAtClose, "Validating plan...", charsReceived, toolCallCount);
370
- // Final usage report
371
- reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
372
- if (code !== 0) {
373
- reject(new Error(`Claude CLI failed (exit ${code}): ${stderrOutput.substring(0, 300)}`));
374
- }
375
- else {
376
- // Prefer the result event's text (authoritative), fall back to accumulated deltas
377
- resolve(resultText || fullText);
378
- }
379
- });
380
- proc.on("error", (err) => {
381
- clearTimeout(timeout);
382
- cleanupAll();
383
- reject(err);
384
- });
385
- });
386
- }
387
- /**
388
- * Resolve the API key for a given provider from claim credentials.
389
- * For Ollama, returns the base URL instead of an API key.
390
- */
391
- function resolveProviderApiKey(provider, credentials) {
392
- if (!credentials)
393
- return undefined;
394
- switch (provider) {
395
- case "anthropic":
396
- return credentials.anthropicApiKey;
397
- case "openai":
398
- return credentials.openaiApiKey;
399
- case "google":
400
- return credentials.googleApiKey;
401
- case "ollama":
402
- return credentials.ollamaBaseUrl || "http://localhost:11434";
403
- default:
404
- return undefined;
405
- }
406
- }
407
- /**
408
- * Build a git clone URL with authentication for the given SCM provider.
409
- */
410
- function buildCloneUrl(repo, token, scmProvider) {
411
- switch (scmProvider) {
412
- case "bitbucket":
413
- return `https://x-token-auth:${token}@bitbucket.org/${repo}.git`;
414
- case "gitlab":
415
- return `https://oauth2:${token}@gitlab.com/${repo}.git`;
416
- case "github":
417
- default:
418
- return `https://x-access-token:${token}@github.com/${repo}.git`;
419
- }
420
- }
421
- /**
422
- * Clone the target repo to a temp directory so the planner can explore with tools.
423
- * Returns the path on success, or null on failure.
424
- */
425
- async function cloneTargetRepo(repo, token, scmProvider, taskId) {
426
- const taskLabel = chalk.cyan(taskId.slice(0, 8));
427
- const tmpDir = `/tmp/workermill-planning-${taskId.slice(0, 8)}-${Date.now()}`;
428
- try {
429
- const cloneUrl = buildCloneUrl(repo, token, scmProvider);
430
- console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for planner...")}`);
431
- execSync(`git clone --depth 1 --single-branch "${cloneUrl}" "${tmpDir}"`, {
432
- stdio: "ignore",
433
- timeout: 60_000,
434
- });
435
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Repo cloned to ${chalk.dim(tmpDir)}`);
436
- return tmpDir;
437
- }
438
- catch (error) {
439
- const errMsg = error instanceof Error ? error.message : String(error);
440
- console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, planner will run without repo access: ${errMsg.substring(0, 100)}`);
441
- // Cleanup partial clone
442
- try {
443
- execSync(`rm -rf "${tmpDir}"`, { stdio: "ignore" });
444
- }
445
- catch {
446
- /* ignore */
447
- }
448
- return null;
449
- }
450
- }
451
- /**
452
- * Run planning for a task with Planner-Critic validation loop.
453
- *
454
- * Flow:
455
- * 1. Fetch planning prompt from cloud API
456
- * 2. Clone target repo (if available) so planner can explore with tools
457
- * 3. Run Claude CLI to generate plan
458
- * 4. Parse plan, apply file cap (max 5 files per story)
459
- * 5. Run critic validation via Claude CLI
460
- * 6. If critic approves (score >= 80): post validated plan to API
461
- * 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
462
- * 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
463
- * 9. If no plan scored >= 50: fail the task
464
- */
465
- export async function planTask(task, config, credentials) {
466
- const taskLabel = chalk.cyan(task.id.slice(0, 8));
467
- console.log(`${ts()} ${taskLabel} Fetching planning prompt...`);
468
- await postLog(task.id, `${PREFIX} Fetching planning prompt from cloud API...`);
469
- // 1. Fetch the assembled planning prompt from the cloud API
470
- const promptResponse = await api.get("/api/agent/planning-prompt", {
471
- params: { taskId: task.id },
472
- });
473
- const { prompt: basePrompt, model, provider: planningProvider, maxStories: apiMaxStories } = promptResponse.data;
474
- const maxStories = typeof apiMaxStories === "number" ? apiMaxStories : 8;
475
- const cliModel = model;
476
- const provider = (planningProvider || "anthropic");
477
- const isAnthropicPlanning = provider === "anthropic";
478
- const claudePath = process.env.CLAUDE_CLI_PATH || findClaudePath() || "claude";
479
- const cleanEnv = { ...process.env };
480
- delete cleanEnv.CLAUDE_CODE_OAUTH_TOKEN;
481
- // Resolve provider API key for non-Anthropic planning
482
- const providerApiKey = resolveProviderApiKey(provider, credentials);
483
- const startTime = Date.now();
484
- // PRD for critic validation: use task description, fall back to summary
485
- const prd = task.description || task.summary;
486
- // Clone target repo so the planner can explore with tools
487
- let repoPath = null;
488
- if (task.githubRepo) {
489
- const scmProvider = task.scmProvider || "github";
490
- const scmToken = scmProvider === "bitbucket"
491
- ? config.bitbucketToken
492
- : scmProvider === "gitlab"
493
- ? config.gitlabToken
494
- : config.githubToken;
495
- if (scmToken) {
496
- repoPath = await cloneTargetRepo(task.githubRepo, scmToken, scmProvider, task.id);
497
- }
498
- else {
499
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, planner will run without repo access`);
500
- }
501
- }
502
- // 2. Planner-Critic iteration loop
503
- let currentPrompt = basePrompt;
504
- let bestPlan = null;
505
- let bestScore = 0;
506
- // Track critic history across iterations for analytics
507
- const criticHistory = [];
508
- let totalFileCapTruncations = 0;
509
- try {
510
- for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
511
- const iterLabel = MAX_ITERATIONS > 1 ? ` (attempt ${iteration}/${MAX_ITERATIONS})` : "";
512
- const providerLabel = `${provider}/${cliModel}`;
513
- if (iteration > 1) {
514
- console.log(`${ts()} ${taskLabel} Running planner${iterLabel} ${chalk.dim(`(${chalk.yellow(providerLabel)})`)}`);
515
- await postLog(task.id, `${PREFIX} Re-planning${iterLabel} using ${providerLabel}`);
516
- }
517
- else {
518
- console.log(`${ts()} ${taskLabel} Running planner ${chalk.dim(`(${chalk.yellow(providerLabel)})`)}`);
519
- await postLog(task.id, `${PREFIX} Starting planning agent using ${providerLabel}`);
520
- }
521
- // 2a. Generate plan via Claude CLI (Anthropic) or HTTP API (other providers)
522
- let rawOutput;
523
- try {
524
- if (isAnthropicPlanning) {
525
- rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, repoPath || undefined);
526
- }
527
- else {
528
- if (!providerApiKey) {
529
- throw new Error(`No API key available for provider "${provider}". Configure it in Settings > Integrations.`);
530
- }
531
- const genStart = Math.round((Date.now() - startTime) / 1000);
532
- await postProgress(task.id, "generating_plan", genStart, "Generating plan via AI SDK...", 0, 0);
533
- // Use AI SDK with tool access to cloned repo (if available)
534
- rawOutput = await generateTextWithTools({
535
- provider,
536
- model: cliModel,
537
- apiKey: providerApiKey,
538
- prompt: currentPrompt,
539
- workingDir: repoPath || undefined,
540
- enableTools: !!repoPath, // Only enable tools if we have a cloned repo
541
- maxSteps: 10,
542
- });
543
- // Post "validating" phase so the dashboard progress bar transitions correctly
544
- const genEnd = Math.round((Date.now() - startTime) / 1000);
545
- await postProgress(task.id, "validating", genEnd, "Validating plan...", rawOutput.length, 0);
546
- }
547
- }
548
- catch (error) {
549
- const elapsed = Math.round((Date.now() - startTime) / 1000);
550
- const errMsg = error instanceof Error ? error.message : String(error);
551
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
552
- await postLog(task.id, `${PREFIX} Planning failed after ${formatElapsed(elapsed)}: ${errMsg.substring(0, 200)}`, "error", "error");
553
- return false;
554
- }
555
- const elapsed = Math.round((Date.now() - startTime) / 1000);
556
- const doneLabel = isAnthropicPlanning ? "Claude CLI" : `${provider} API`;
557
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} ${doneLabel} done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
558
- // 2b. Parse plan from raw output
559
- let plan;
560
- try {
561
- plan = parseExecutionPlan(rawOutput);
562
- }
563
- catch (error) {
564
- const errMsg = error instanceof Error ? error.message : String(error);
565
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Plan parse failed: ${errMsg.substring(0, 100)}`);
566
- await postLog(task.id, `${PREFIX} Failed to parse execution plan from Claude output: ${errMsg.substring(0, 200)}`, "error", "error");
567
- // If we can't parse the plan, post raw output and let server-side try
568
- return await postRawPlan(task.id, rawOutput, config.agentId, taskLabel, elapsed);
569
- }
570
- // 2c. Apply file cap (max 5 files per story)
571
- const { truncatedCount, details } = applyFileCap(plan);
572
- if (truncatedCount > 0) {
573
- totalFileCapTruncations += truncatedCount;
574
- const msg = `${PREFIX} File cap applied: ${truncatedCount} stories truncated to max 5 targetFiles`;
575
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
576
- await postLog(task.id, msg);
577
- for (const detail of details) {
578
- console.log(`${ts()} ${taskLabel} ${chalk.dim(detail)}`);
579
- }
580
- }
581
- // 2c2. Apply story cap (max stories from org calibration)
582
- const { droppedCount: storyDropCount, details: storyDropDetails } = applyStoryCap(plan, maxStories);
583
- if (storyDropCount > 0) {
584
- const msg = `${PREFIX} Story cap applied: ${storyDropCount} stories dropped (max ${maxStories})`;
585
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
586
- await postLog(task.id, msg);
587
- for (const detail of storyDropDetails) {
588
- console.log(`${ts()} ${taskLabel} ${chalk.dim(detail)}`);
589
- }
590
- }
591
- // 2c3. Resolve file overlaps (assign each shared file to first story only)
592
- const { resolvedCount: overlapCount, details: overlapDetails } = resolveFileOverlaps(plan);
593
- if (overlapCount > 0) {
594
- const msg = `${PREFIX} File overlap resolved: ${overlapCount} shared file(s) de-duped across stories`;
595
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
596
- await postLog(task.id, msg);
597
- for (const detail of overlapDetails) {
598
- console.log(`${ts()} ${taskLabel} ${chalk.dim(detail)}`);
599
- }
600
- }
601
- console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories (max ${maxStories})`);
602
- await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
603
- // 2d. Run critic validation
604
- const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey, task.id);
605
- // Track best plan across iterations
606
- if (criticResult && criticResult.score > bestScore) {
607
- bestPlan = plan;
608
- bestScore = criticResult.score;
609
- }
610
- else if (!criticResult && !bestPlan) {
611
- // Critic failed entirely — use this plan as fallback
612
- bestPlan = plan;
613
- }
614
- // Record critic history for this iteration
615
- if (criticResult) {
616
- criticHistory.push({
617
- iteration,
618
- score: criticResult.score,
619
- approved: criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD,
620
- risks: criticResult.risks,
621
- suggestions: criticResult.suggestions,
622
- filesCapApplied: truncatedCount > 0 ? truncatedCount : undefined,
623
- });
624
- }
625
- // 2e. Check critic result
626
- if (!criticResult) {
627
- // Critic failed (timeout, parse error, etc.) — post plan without critic gate
628
- const msg = `${PREFIX} ⚠️ CRITIC BYPASSED — Critic validation failed (timeout/parse error). Posting plan WITHOUT quality gate.`;
629
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
630
- await postLog(task.id, msg, "error", "warning");
631
- const planningDurationMs = Date.now() - startTime;
632
- return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, undefined, undefined, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
633
- }
634
- if (criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD) {
635
- // Approved! Post the file-capped plan
636
- const msg = `${PREFIX} Critic approved (score: ${criticResult.score}/100)`;
637
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} ${msg}`);
638
- await postLog(task.id, msg);
639
- if (criticResult.risks.length > 0) {
640
- const risksMsg = `${PREFIX} Critic risks (non-blocking): ${criticResult.risks.join("; ")}`;
641
- console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
642
- await postLog(task.id, risksMsg);
643
- }
644
- const planningDurationMs = Date.now() - startTime;
645
- return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, criticResult.score, criticResult.risks, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
646
- }
647
- // 2f. Rejected — append critic feedback for next iteration
648
- if (iteration < MAX_ITERATIONS) {
649
- const feedback = formatCriticFeedback(criticResult);
650
- currentPrompt = basePrompt + "\n\n" + feedback;
651
- const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
652
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
653
- await postLog(task.id, msg);
654
- if (criticResult.risks.length > 0) {
655
- const risksMsg = `${PREFIX} Critic risks: ${criticResult.risks.join("; ")}`;
656
- console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
657
- await postLog(task.id, risksMsg);
658
- }
659
- if (criticResult.suggestions && criticResult.suggestions.length > 0) {
660
- const sugMsg = `${PREFIX} Critic suggestions: ${criticResult.suggestions.join("; ")}`;
661
- console.log(`${ts()} ${taskLabel} ${chalk.dim(sugMsg)}`);
662
- await postLog(task.id, sugMsg);
663
- }
664
- }
665
- else {
666
- // Final iteration — rejected
667
- const msg = `${PREFIX} Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD})`;
668
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} ${msg}`);
669
- await postLog(task.id, msg, "error", "error");
670
- if (criticResult.risks.length > 0) {
671
- const risksMsg = `${PREFIX} Final risks: ${criticResult.risks.join("; ")}`;
672
- console.error(`${ts()} ${taskLabel} ${risksMsg}`);
673
- await postLog(task.id, risksMsg, "error", "error");
674
- }
675
- if (criticResult.suggestions && criticResult.suggestions.length > 0) {
676
- const sugMsg = `${PREFIX} Suggestions: ${criticResult.suggestions.join("; ")}`;
677
- console.error(`${ts()} ${taskLabel} ${sugMsg}`);
678
- await postLog(task.id, sugMsg, "error", "error");
679
- }
680
- }
681
- }
682
- // All iterations exhausted — try best-plan fallback before failing.
683
- // If we have a plan that scored >= BEST_PLAN_FALLBACK_THRESHOLD, post it
684
- // with a warning instead of discarding it entirely.
685
- const BEST_PLAN_FALLBACK_THRESHOLD = 50;
686
- if (bestPlan && bestScore >= BEST_PLAN_FALLBACK_THRESHOLD) {
687
- const elapsed = Math.round((Date.now() - startTime) / 1000);
688
- const msg = `${PREFIX} Best-plan fallback: posting plan with score ${bestScore}/100 (below ${AUTO_APPROVAL_THRESHOLD} threshold, above ${BEST_PLAN_FALLBACK_THRESHOLD} minimum)`;
689
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
690
- await postLog(task.id, msg);
691
- const planningDurationMs = Date.now() - startTime;
692
- const fallbackPosted = await postValidatedPlan(task.id, bestPlan, config.agentId, taskLabel, elapsed, bestScore, [`Best-plan fallback: critic rejected after ${MAX_ITERATIONS} iterations`], criticHistory, totalFileCapTruncations, planningDurationMs, MAX_ITERATIONS);
693
- if (fallbackPosted) {
694
- return true;
695
- }
696
- // Fallback post failed (404, 409, etc.) — fall through to plan-failed
697
- // so the task doesn't stay stuck in "planning" status forever.
698
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${PREFIX} Fallback post rejected by server, reporting plan-failed`);
699
- await postLog(task.id, `${PREFIX} Fallback plan rejected by server — reporting failure`);
700
- }
701
- // No usable plan (or fallback rejected) — report failure to server so
702
- // the task doesn't stay in "planning" status forever (infinite retry loop).
703
- try {
704
- const failReason = bestPlan && bestScore >= BEST_PLAN_FALLBACK_THRESHOLD
705
- ? `Best-plan fallback rejected by server after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100)`
706
- : `Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}, fallback minimum: ${BEST_PLAN_FALLBACK_THRESHOLD})`;
707
- await api.post("/api/agent/plan-failed", {
708
- taskId: task.id,
709
- agentId: config.agentId,
710
- reason: failReason,
711
- criticHistory,
712
- });
713
- }
714
- catch {
715
- // Best-effort — if the endpoint doesn't exist yet, the task will still
716
- // be picked up again, but at least we tried.
717
- }
718
- return false;
719
- }
720
- finally {
721
- // Drain any remaining log entries before cleanup
722
- await flushLogQueue();
723
- // Cleanup temp clone
724
- if (repoPath) {
725
- try {
726
- execSync(`rm -rf "${repoPath}"`, { stdio: "ignore" });
727
- }
728
- catch {
729
- /* ignore */
730
- }
731
- }
732
- }
733
- }
734
- /**
735
- * Post a validated (file-capped) plan to the cloud API.
736
- * Re-serializes the plan as a JSON code block since the server-side
737
- * parseExecutionPlan() expects that format.
738
- */
739
- async function postValidatedPlan(taskId, plan, agentId, taskLabel, elapsed, criticScore, criticRisks, criticHistory, fileCapTruncations, planningDurationMs, criticIterations) {
740
- const serialized = serializePlan(plan);
741
- try {
742
- const result = await api.post("/api/agent/plan-result", {
743
- taskId,
744
- rawOutput: serialized,
745
- agentId,
746
- criticScore,
747
- criticRisks,
748
- criticHistory,
749
- criticIterations,
750
- fileCapTruncations,
751
- planningDurationMs,
752
- });
753
- const storyCount = result.data.storyCount;
754
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated: ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
755
- await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
756
- await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
757
- return true;
758
- }
759
- catch (error) {
760
- const err = error;
761
- const detail = err.response?.data?.error || err.response?.data?.detail || String(error);
762
- const statusCode = err.response?.status ? ` (${err.response.status})` : "";
763
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Server validation failed${statusCode}: ${detail.substring(0, 100)}`);
764
- await postLog(taskId, `${PREFIX} Server-side plan validation failed${statusCode}: ${detail.substring(0, 200)}`, "error", "error");
765
- return false;
766
- }
767
- }
768
- /**
769
- * Post raw (unparsed) plan output to the cloud API as a fallback.
770
- * Used when local plan parsing fails — let the server try.
771
- */
772
- async function postRawPlan(taskId, rawOutput, agentId, taskLabel, elapsed) {
773
- try {
774
- const result = await api.post("/api/agent/plan-result", {
775
- taskId,
776
- rawOutput,
777
- agentId,
778
- });
779
- const storyCount = result.data.storyCount;
780
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Plan validated (server-side): ${chalk.bold(storyCount)} stories → ${chalk.green("queued")}`);
781
- await postLog(taskId, `${PREFIX} Plan validated: ${storyCount} stories. Task queued for execution.`);
782
- await postProgress(taskId, "complete", elapsed, "Planning complete", 0, 0);
783
- return true;
784
- }
785
- catch (error) {
786
- const err = error;
787
- const detail = err.response?.data?.detail || String(error);
788
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Validation failed: ${detail.substring(0, 100)}`);
789
- await postLog(taskId, `${PREFIX} Plan validation failed: ${detail.substring(0, 200)}`, "error", "error");
790
- return false;
791
- }
792
- }