@workermill/agent 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,14 +25,23 @@ export async function startCommand(options) {
25
25
  const failing = prereqs.filter((p) => !p.ok);
26
26
  // Auto-pull worker image if it's the only missing prereq
27
27
  const imageMissing = failing.find((p) => p.name === "Worker image");
28
- const otherFailing = failing.filter((p) => p.name !== "Worker image");
29
- if (otherFailing.length > 0) {
28
+ // Claude CLI and auth are soft prerequisites — only needed for Anthropic provider.
29
+ // Non-Anthropic orgs can plan+execute without Claude CLI.
30
+ const softPrereqs = new Set(["Claude CLI", "Claude auth"]);
31
+ const hardFailing = failing.filter((p) => p.name !== "Worker image" && !softPrereqs.has(p.name));
32
+ const softFailing = failing.filter((p) => softPrereqs.has(p.name));
33
+ if (hardFailing.length > 0) {
30
34
  console.log(chalk.red("Prerequisites check failed:"));
31
- for (const p of otherFailing) {
35
+ for (const p of hardFailing) {
32
36
  console.log(chalk.red(` ✗ ${p.name}: ${p.detail}`));
33
37
  }
34
38
  process.exit(1);
35
39
  }
40
+ if (softFailing.length > 0) {
41
+ for (const p of softFailing) {
42
+ console.log(chalk.yellow(` ⚠ ${p.name}: ${p.detail} (required for Anthropic provider)`));
43
+ }
44
+ }
36
45
  if (imageMissing) {
37
46
  console.log(chalk.yellow(` Worker image not found locally. Pulling ${config.workerImage}...`));
38
47
  const { spawnSync } = await import("child_process");
package/dist/config.d.ts CHANGED
@@ -16,6 +16,7 @@ export interface AgentConfig {
16
16
  bitbucketToken: string;
17
17
  gitlabToken: string;
18
18
  workerImage: string;
19
+ teamPlanningEnabled: boolean;
19
20
  }
20
21
  export interface FileConfig {
21
22
  apiUrl: string;
@@ -30,6 +31,7 @@ export interface FileConfig {
30
31
  gitlab: string;
31
32
  };
32
33
  workerImage: string;
34
+ teamPlanningEnabled?: boolean;
33
35
  setupCompletedAt: string;
34
36
  }
35
37
  export declare function getConfigDir(): string;
package/dist/config.js CHANGED
@@ -75,6 +75,7 @@ export function loadConfigFromFile() {
75
75
  bitbucketToken: fc.tokens?.bitbucket || "",
76
76
  gitlabToken: fc.tokens?.gitlab || "",
77
77
  workerImage,
78
+ teamPlanningEnabled: fc.teamPlanningEnabled ?? true,
78
79
  };
79
80
  }
80
81
  /**
@@ -119,6 +120,7 @@ export function loadConfig() {
119
120
  bitbucketToken: process.env.BITBUCKET_TOKEN || "",
120
121
  gitlabToken: process.env.GITLAB_TOKEN || "",
121
122
  workerImage: process.env.WORKER_IMAGE || "workermill-worker:local",
123
+ teamPlanningEnabled: process.env.TEAM_PLANNING_ENABLED !== "false",
122
124
  };
123
125
  }
124
126
  /**
package/dist/planner.d.ts CHANGED
@@ -20,6 +20,8 @@ export interface PlanningTask {
20
20
  id: string;
21
21
  summary: string;
22
22
  description: string | null;
23
+ githubRepo?: string;
24
+ scmProvider?: string;
23
25
  }
24
26
  /**
25
27
  * Run planning for a task with Planner-Critic validation loop.
package/dist/planner.js CHANGED
@@ -15,7 +15,7 @@
15
15
  * sees the same planning progress as cloud mode.
16
16
  */
17
17
  import chalk from "chalk";
18
- import { spawn } from "child_process";
18
+ import { spawn, execSync } from "child_process";
19
19
  import { findClaudePath } from "./config.js";
20
20
  import { api } from "./api.js";
21
21
  import { parseExecutionPlan, applyFileCap, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
@@ -204,8 +204,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
204
204
  clearInterval(progressInterval);
205
205
  clearInterval(sseProgressInterval);
206
206
  proc.kill("SIGTERM");
207
- reject(new Error("Claude CLI timed out after 10 minutes"));
208
- }, 600_000);
207
+ reject(new Error("Claude CLI timed out after 20 minutes"));
208
+ }, 1_200_000);
209
209
  proc.on("exit", (code) => {
210
210
  clearTimeout(timeout);
211
211
  clearInterval(progressInterval);
@@ -249,6 +249,209 @@ function resolveProviderApiKey(provider, credentials) {
249
249
  return undefined;
250
250
  }
251
251
  }
252
+ /**
253
+ * Build a git clone URL with authentication for the given SCM provider.
254
+ */
255
+ function buildCloneUrl(repo, token, scmProvider) {
256
+ switch (scmProvider) {
257
+ case "bitbucket":
258
+ return `https://x-token-auth:${token}@bitbucket.org/${repo}.git`;
259
+ case "gitlab":
260
+ return `https://oauth2:${token}@gitlab.com/${repo}.git`;
261
+ case "github":
262
+ default:
263
+ return `https://x-access-token:${token}@github.com/${repo}.git`;
264
+ }
265
+ }
266
+ /**
267
+ * Clone the target repo to a temp directory for team planning analysis.
268
+ * Returns the path on success, or null on failure (fallback to single-agent).
269
+ */
270
+ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
271
+ const taskLabel = chalk.cyan(taskId.slice(0, 8));
272
+ const tmpDir = `/tmp/workermill-planning-${taskId.slice(0, 8)}-${Date.now()}`;
273
+ try {
274
+ const cloneUrl = buildCloneUrl(repo, token, scmProvider);
275
+ console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for team planning...")}`);
276
+ execSync(`git clone --depth 1 --single-branch "${cloneUrl}" "${tmpDir}"`, {
277
+ stdio: "ignore",
278
+ timeout: 60_000,
279
+ });
280
+ console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Repo cloned to ${chalk.dim(tmpDir)}`);
281
+ return tmpDir;
282
+ }
283
+ catch (error) {
284
+ const errMsg = error instanceof Error ? error.message : String(error);
285
+ console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, falling back to single-agent: ${errMsg.substring(0, 100)}`);
286
+ // Cleanup partial clone
287
+ try {
288
+ execSync(`rm -rf "${tmpDir}"`, { stdio: "ignore" });
289
+ }
290
+ catch {
291
+ /* ignore */
292
+ }
293
+ return null;
294
+ }
295
+ }
296
+ /**
297
+ * Run an analyst agent via Claude CLI with tool access to the cloned repo.
298
+ * Returns the analyst's report text, or an empty string on failure.
299
+ */
300
+ function runAnalyst(claudePath, model, prompt, repoPath, env, timeoutMs = 120_000) {
301
+ return new Promise((resolve) => {
302
+ const proc = spawn(claudePath, [
303
+ "--print",
304
+ "--model",
305
+ model,
306
+ "--permission-mode",
307
+ "bypassPermissions",
308
+ "--output-format",
309
+ "stream-json",
310
+ ], {
311
+ cwd: repoPath,
312
+ env,
313
+ stdio: ["pipe", "pipe", "pipe"],
314
+ });
315
+ // Write prompt via stdin (same as runClaudeCli — not via -p arg)
316
+ proc.stdin.write(prompt);
317
+ proc.stdin.end();
318
+ let resultText = "";
319
+ let fullText = "";
320
+ let stderrOutput = "";
321
+ let lineBuffer = "";
322
+ proc.stderr.on("data", (chunk) => {
323
+ stderrOutput += chunk.toString();
324
+ });
325
+ proc.stdout.on("data", (data) => {
326
+ lineBuffer += data.toString();
327
+ const lines = lineBuffer.split("\n");
328
+ lineBuffer = lines.pop() || "";
329
+ for (const line of lines) {
330
+ const trimmed = line.trim();
331
+ if (!trimmed)
332
+ continue;
333
+ try {
334
+ const event = JSON.parse(trimmed);
335
+ if (event.type === "content_block_delta" && event.delta?.text) {
336
+ fullText += event.delta.text;
337
+ }
338
+ else if (event.type === "result" && event.result) {
339
+ resultText =
340
+ typeof event.result === "string" ? event.result : "";
341
+ }
342
+ }
343
+ catch {
344
+ fullText += trimmed + "\n";
345
+ }
346
+ }
347
+ });
348
+ const timeout = setTimeout(() => {
349
+ proc.kill("SIGTERM");
350
+ resolve(resultText || fullText || "");
351
+ }, timeoutMs);
352
+ proc.on("exit", (code) => {
353
+ clearTimeout(timeout);
354
+ if (code !== 0 && stderrOutput) {
355
+ console.error(`${chalk.yellow("⚠")} Analyst exited with code ${code}: ${stderrOutput.substring(0, 200)}`);
356
+ }
357
+ resolve(resultText || fullText || "");
358
+ });
359
+ proc.on("error", (err) => {
360
+ clearTimeout(timeout);
361
+ console.error(`${chalk.yellow("⚠")} Analyst spawn error: ${err.message}`);
362
+ resolve("");
363
+ });
364
+ });
365
+ }
366
+ /** Analyst prompt templates */
367
+ const CODEBASE_ANALYST_PROMPT = `You are analyzing a codebase to help plan a development task.
368
+ Use Glob and Read to explore the repository structure.
369
+ Report:
370
+ 1. Key directories and their purposes
371
+ 2. Frameworks, languages, and patterns used
372
+ 3. Existing test patterns and locations
373
+ 4. CI/CD configuration
374
+ 5. Key configuration files (.env, tsconfig, etc.)
375
+ Keep your report under 2000 words. Focus on facts, not opinions.`;
376
+ function makeRequirementsAnalystPrompt(task) {
377
+ return `Given this task description:
378
+
379
+ Title: ${task.summary}
380
+ ${task.description ? `\nDescription:\n${task.description}` : ""}
381
+
382
+ Analyze the requirements and report:
383
+ 1. Explicit acceptance criteria (what MUST be done)
384
+ 2. Implicit requirements (what's assumed but not stated)
385
+ 3. Ambiguities that could lead to wrong implementation
386
+ 4. Affected components based on the requirement scope
387
+ 5. Suggested personas for each component
388
+ Keep your report under 1500 words.`;
389
+ }
390
+ function makeRiskAssessorPrompt(task) {
391
+ return `You are assessing risks for a development task on this codebase.
392
+ The task: ${task.summary}
393
+ ${task.description ? `\nDescription:\n${task.description}` : ""}
394
+
395
+ Use Grep and Read to check for potential blockers.
396
+ Report:
397
+ 1. Files likely to be modified (search for relevant code)
398
+ 2. Files that are heavily coupled (imports/dependencies)
399
+ 3. Existing tests that may need updating
400
+ 4. Environment/config dependencies
401
+ 5. Migration or deployment considerations
402
+ Keep your report under 1500 words.`;
403
+ }
404
+ /**
405
+ * Run team planning: spawn 3 parallel analyst agents, then synthesize
406
+ * their reports into an enhanced planning prompt for the final planner.
407
+ *
408
+ * Falls back to single-agent planning if anything goes wrong.
409
+ */
410
+ async function runTeamPlanning(task, basePrompt, claudePath, model, env, repoPath, taskId, startTime) {
411
+ const taskLabel = chalk.cyan(taskId.slice(0, 8));
412
+ console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — running 3 analysts in parallel...`);
413
+ await postLog(taskId, `${PREFIX} Team planning: running codebase, requirements, and risk analysts in parallel...`);
414
+ await postProgress(taskId, "reading_repo", Math.round((Date.now() - startTime) / 1000), "Running parallel analysis agents...", 0, 0);
415
+ const analysisModel = model.includes("opus") ? "sonnet" : model;
416
+ const [codebaseResult, requirementsResult, riskResult] = await Promise.allSettled([
417
+ runAnalyst(claudePath, analysisModel, CODEBASE_ANALYST_PROMPT, repoPath, env),
418
+ runAnalyst(claudePath, analysisModel, makeRequirementsAnalystPrompt(task), repoPath, env),
419
+ runAnalyst(claudePath, analysisModel, makeRiskAssessorPrompt(task), repoPath, env),
420
+ ]);
421
+ const codebaseReport = codebaseResult.status === "fulfilled" ? codebaseResult.value : "";
422
+ const requirementsReport = requirementsResult.status === "fulfilled" ? requirementsResult.value : "";
423
+ const riskReport = riskResult.status === "fulfilled" ? riskResult.value : "";
424
+ const successCount = [codebaseReport, requirementsReport, riskReport].filter((r) => r.length > 0).length;
425
+ const analysisElapsed = Math.round((Date.now() - startTime) / 1000);
426
+ console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Analysis complete: ${successCount}/3 reports (${analysisElapsed}s)`);
427
+ await postLog(taskId, `${PREFIX} Team analysis complete: ${successCount}/3 reports in ${formatElapsed(analysisElapsed)}. Synthesizing plan...`);
428
+ await postProgress(taskId, "analyzing", analysisElapsed, "Synthesizing analysis reports...", 0, 0);
429
+ // Build enhanced prompt with analysis reports
430
+ const sections = [];
431
+ if (codebaseReport) {
432
+ sections.push(`## Codebase Analysis (from automated analysis)\n\n${codebaseReport}`);
433
+ }
434
+ if (requirementsReport) {
435
+ sections.push(`## Requirements Analysis\n\n${requirementsReport}`);
436
+ }
437
+ if (riskReport) {
438
+ sections.push(`## Risk Assessment\n\n${riskReport}`);
439
+ }
440
+ if (sections.length === 0) {
441
+ // All analysts failed — fall through to regular planning
442
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} All analysts failed, falling back to single-agent planning`);
443
+ await postLog(taskId, `${PREFIX} All analysis agents failed — falling back to single-agent planning`);
444
+ return runClaudeCli(claudePath, model, basePrompt, env, taskId, startTime);
445
+ }
446
+ const enhancedPrompt = basePrompt +
447
+ "\n\n" +
448
+ sections.join("\n\n") +
449
+ "\n\n" +
450
+ "Use these analyses to produce a more accurate execution plan.\n" +
451
+ "Prefer actual file paths discovered in the codebase analysis over guessed paths.";
452
+ // Run the final synthesizer planner with the enhanced prompt
453
+ return runClaudeCli(claudePath, model, enhancedPrompt, env, taskId, startTime);
454
+ }
252
455
  /**
253
456
  * Run planning for a task with Planner-Critic validation loop.
254
457
  *
@@ -281,6 +484,22 @@ export async function planTask(task, config, credentials) {
281
484
  const startTime = Date.now();
282
485
  // PRD for critic validation: use task description, fall back to summary
283
486
  const prd = task.description || task.summary;
487
+ // Clone repo for team planning if enabled
488
+ let repoPath = null;
489
+ if (isAnthropicPlanning && config.teamPlanningEnabled && task.githubRepo) {
490
+ const scmProvider = task.scmProvider || "github";
491
+ const scmToken = scmProvider === "bitbucket"
492
+ ? config.bitbucketToken
493
+ : scmProvider === "gitlab"
494
+ ? config.gitlabToken
495
+ : config.githubToken;
496
+ if (scmToken) {
497
+ repoPath = await cloneTargetRepo(task.githubRepo, scmToken, scmProvider, task.id);
498
+ }
499
+ else {
500
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, skipping team planning`);
501
+ }
502
+ }
284
503
  // 2. Planner-Critic iteration loop
285
504
  let currentPrompt = basePrompt;
286
505
  let bestPlan = null;
@@ -288,146 +507,167 @@ export async function planTask(task, config, credentials) {
288
507
  // Track critic history across iterations for analytics
289
508
  const criticHistory = [];
290
509
  let totalFileCapTruncations = 0;
291
- for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
292
- const iterLabel = MAX_ITERATIONS > 1 ? ` (attempt ${iteration}/${MAX_ITERATIONS})` : "";
293
- const providerLabel = `${provider}/${cliModel}`;
294
- if (iteration > 1) {
295
- console.log(`${ts()} ${taskLabel} Running planner${iterLabel} ${chalk.dim(`(${chalk.yellow(providerLabel)})`)}`);
296
- await postLog(task.id, `${PREFIX} Re-planning${iterLabel} using ${providerLabel}`);
297
- }
298
- else {
299
- console.log(`${ts()} ${taskLabel} Running planner ${chalk.dim(`(${chalk.yellow(providerLabel)})`)}`);
300
- await postLog(task.id, `${PREFIX} Starting planning agent using ${providerLabel}`);
301
- }
302
- // 2a. Generate plan via Claude CLI (Anthropic) or HTTP API (other providers)
303
- let rawOutput;
304
- try {
305
- if (isAnthropicPlanning) {
306
- rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime);
510
+ try {
511
+ for (let iteration = 1; iteration <= MAX_ITERATIONS; iteration++) {
512
+ const iterLabel = MAX_ITERATIONS > 1 ? ` (attempt ${iteration}/${MAX_ITERATIONS})` : "";
513
+ const providerLabel = `${provider}/${cliModel}`;
514
+ if (iteration > 1) {
515
+ console.log(`${ts()} ${taskLabel} Running planner${iterLabel} ${chalk.dim(`(${chalk.yellow(providerLabel)})`)}`);
516
+ await postLog(task.id, `${PREFIX} Re-planning${iterLabel} using ${providerLabel}`);
307
517
  }
308
518
  else {
309
- if (!providerApiKey) {
310
- throw new Error(`No API key available for provider "${provider}". Configure it in Settings > Integrations.`);
519
+ console.log(`${ts()} ${taskLabel} Running planner ${chalk.dim(`(${chalk.yellow(providerLabel)})`)}`);
520
+ await postLog(task.id, `${PREFIX} Starting planning agent using ${providerLabel}`);
521
+ }
522
+ // 2a. Generate plan via Claude CLI (Anthropic) or HTTP API (other providers)
523
+ let rawOutput;
524
+ try {
525
+ if (isAnthropicPlanning && config.teamPlanningEnabled && repoPath && iteration === 1) {
526
+ rawOutput = await runTeamPlanning(task, currentPrompt, claudePath, cliModel, cleanEnv, repoPath, task.id, startTime);
527
+ }
528
+ else if (isAnthropicPlanning) {
529
+ rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime);
530
+ }
531
+ else {
532
+ if (!providerApiKey) {
533
+ throw new Error(`No API key available for provider "${provider}". Configure it in Settings > Integrations.`);
534
+ }
535
+ const genStart = Math.round((Date.now() - startTime) / 1000);
536
+ await postProgress(task.id, "generating_plan", genStart, "Generating plan via API...", 0, 0);
537
+ rawOutput = await generateText(provider, cliModel, currentPrompt, providerApiKey);
538
+ // Post "validating" phase so the dashboard progress bar transitions correctly
539
+ const genEnd = Math.round((Date.now() - startTime) / 1000);
540
+ await postProgress(task.id, "validating", genEnd, "Validating plan...", rawOutput.length, 0);
311
541
  }
312
- await postProgress(task.id, "generating_plan", 0, "Generating plan via API...", 0, 0);
313
- rawOutput = await generateText(provider, cliModel, currentPrompt, providerApiKey);
314
542
  }
315
- }
316
- catch (error) {
543
+ catch (error) {
544
+ const elapsed = Math.round((Date.now() - startTime) / 1000);
545
+ const errMsg = error instanceof Error ? error.message : String(error);
546
+ console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
547
+ await postLog(task.id, `${PREFIX} Planning failed after ${formatElapsed(elapsed)}: ${errMsg.substring(0, 200)}`, "error", "error");
548
+ return false;
549
+ }
317
550
  const elapsed = Math.round((Date.now() - startTime) / 1000);
318
- const errMsg = error instanceof Error ? error.message : String(error);
319
- console.error(`${ts()} ${taskLabel} ${chalk.red("")} Failed after ${elapsed}s: ${errMsg.substring(0, 100)}`);
320
- await postLog(task.id, `${PREFIX} Planning failed after ${formatElapsed(elapsed)}: ${errMsg.substring(0, 200)}`, "error", "error");
321
- return false;
322
- }
323
- const elapsed = Math.round((Date.now() - startTime) / 1000);
324
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Claude CLI done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
325
- // 2b. Parse plan from raw output
326
- let plan;
327
- try {
328
- plan = parseExecutionPlan(rawOutput);
329
- }
330
- catch (error) {
331
- const errMsg = error instanceof Error ? error.message : String(error);
332
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Plan parse failed: ${errMsg.substring(0, 100)}`);
333
- await postLog(task.id, `${PREFIX} Failed to parse execution plan from Claude output: ${errMsg.substring(0, 200)}`, "error", "error");
334
- // If we can't parse the plan, post raw output and let server-side try
335
- return await postRawPlan(task.id, rawOutput, config.agentId, taskLabel, elapsed);
336
- }
337
- // 2c. Apply file cap (max 5 files per story)
338
- const { truncatedCount, details } = applyFileCap(plan);
339
- if (truncatedCount > 0) {
340
- totalFileCapTruncations += truncatedCount;
341
- const msg = `${PREFIX} File cap applied: ${truncatedCount} stories truncated to max 5 targetFiles`;
342
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
343
- await postLog(task.id, msg);
344
- for (const detail of details) {
345
- console.log(`${ts()} ${taskLabel} ${chalk.dim(detail)}`);
551
+ const doneLabel = isAnthropicPlanning ? "Claude CLI" : `${provider} API`;
552
+ console.log(`${ts()} ${taskLabel} ${chalk.green("")} ${doneLabel} done ${chalk.dim(`(${elapsed}s, ${rawOutput.length} chars)`)}`);
553
+ // 2b. Parse plan from raw output
554
+ let plan;
555
+ try {
556
+ plan = parseExecutionPlan(rawOutput);
346
557
  }
347
- }
348
- console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories`);
349
- await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
350
- // 2d. Run critic validation
351
- const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey);
352
- // Track best plan across iterations
353
- if (criticResult && criticResult.score > bestScore) {
354
- bestPlan = plan;
355
- bestScore = criticResult.score;
356
- }
357
- else if (!criticResult && !bestPlan) {
358
- // Critic failed entirely — use this plan as fallback
359
- bestPlan = plan;
360
- }
361
- // Record critic history for this iteration
362
- if (criticResult) {
363
- criticHistory.push({
364
- iteration,
365
- score: criticResult.score,
366
- approved: criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD,
367
- risks: criticResult.risks,
368
- suggestions: criticResult.suggestions,
369
- filesCapApplied: truncatedCount > 0 ? truncatedCount : undefined,
370
- });
371
- }
372
- // 2e. Check critic result
373
- if (!criticResult) {
374
- // Critic failed (timeout, parse error, etc.) — post plan without critic gate
375
- const msg = `${PREFIX} Critic validation failed — posting plan without critic score`;
376
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
377
- await postLog(task.id, msg);
378
- const planningDurationMs = Date.now() - startTime;
379
- return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, undefined, undefined, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
380
- }
381
- if (criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD) {
382
- // Approved! Post the file-capped plan
383
- const msg = `${PREFIX} Critic approved (score: ${criticResult.score}/100)`;
384
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} ${msg}`);
385
- await postLog(task.id, msg);
386
- if (criticResult.risks.length > 0) {
387
- const risksMsg = `${PREFIX} Critic risks (non-blocking): ${criticResult.risks.join("; ")}`;
388
- console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
389
- await postLog(task.id, risksMsg);
558
+ catch (error) {
559
+ const errMsg = error instanceof Error ? error.message : String(error);
560
+ console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Plan parse failed: ${errMsg.substring(0, 100)}`);
561
+ await postLog(task.id, `${PREFIX} Failed to parse execution plan from Claude output: ${errMsg.substring(0, 200)}`, "error", "error");
562
+ // If we can't parse the plan, post raw output and let server-side try
563
+ return await postRawPlan(task.id, rawOutput, config.agentId, taskLabel, elapsed);
390
564
  }
391
- const planningDurationMs = Date.now() - startTime;
392
- return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, criticResult.score, criticResult.risks, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
393
- }
394
- // 2f. Rejected — append critic feedback for next iteration
395
- if (iteration < MAX_ITERATIONS) {
396
- const feedback = formatCriticFeedback(criticResult);
397
- currentPrompt = basePrompt + "\n\n" + feedback;
398
- const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
399
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
400
- await postLog(task.id, msg);
401
- if (criticResult.risks.length > 0) {
402
- const risksMsg = `${PREFIX} Critic risks: ${criticResult.risks.join("; ")}`;
403
- console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
404
- await postLog(task.id, risksMsg);
565
+ // 2c. Apply file cap (max 5 files per story)
566
+ const { truncatedCount, details } = applyFileCap(plan);
567
+ if (truncatedCount > 0) {
568
+ totalFileCapTruncations += truncatedCount;
569
+ const msg = `${PREFIX} File cap applied: ${truncatedCount} stories truncated to max 5 targetFiles`;
570
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
571
+ await postLog(task.id, msg);
572
+ for (const detail of details) {
573
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(detail)}`);
574
+ }
575
+ }
576
+ console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories`);
577
+ await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
578
+ // 2d. Run critic validation
579
+ const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey);
580
+ // Track best plan across iterations
581
+ if (criticResult && criticResult.score > bestScore) {
582
+ bestPlan = plan;
583
+ bestScore = criticResult.score;
405
584
  }
406
- if (criticResult.suggestions && criticResult.suggestions.length > 0) {
407
- const sugMsg = `${PREFIX} Critic suggestions: ${criticResult.suggestions.join("; ")}`;
408
- console.log(`${ts()} ${taskLabel} ${chalk.dim(sugMsg)}`);
409
- await postLog(task.id, sugMsg);
585
+ else if (!criticResult && !bestPlan) {
586
+ // Critic failed entirely use this plan as fallback
587
+ bestPlan = plan;
588
+ }
589
+ // Record critic history for this iteration
590
+ if (criticResult) {
591
+ criticHistory.push({
592
+ iteration,
593
+ score: criticResult.score,
594
+ approved: criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD,
595
+ risks: criticResult.risks,
596
+ suggestions: criticResult.suggestions,
597
+ filesCapApplied: truncatedCount > 0 ? truncatedCount : undefined,
598
+ });
599
+ }
600
+ // 2e. Check critic result
601
+ if (!criticResult) {
602
+ // Critic failed (timeout, parse error, etc.) — post plan without critic gate
603
+ const msg = `${PREFIX} Critic validation failed — posting plan without critic score`;
604
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
605
+ await postLog(task.id, msg);
606
+ const planningDurationMs = Date.now() - startTime;
607
+ return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, undefined, undefined, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
608
+ }
609
+ if (criticResult.approved || criticResult.score >= AUTO_APPROVAL_THRESHOLD) {
610
+ // Approved! Post the file-capped plan
611
+ const msg = `${PREFIX} Critic approved (score: ${criticResult.score}/100)`;
612
+ console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} ${msg}`);
613
+ await postLog(task.id, msg);
614
+ if (criticResult.risks.length > 0) {
615
+ const risksMsg = `${PREFIX} Critic risks (non-blocking): ${criticResult.risks.join("; ")}`;
616
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
617
+ await postLog(task.id, risksMsg);
618
+ }
619
+ const planningDurationMs = Date.now() - startTime;
620
+ return await postValidatedPlan(task.id, plan, config.agentId, taskLabel, elapsed, criticResult.score, criticResult.risks, criticHistory, totalFileCapTruncations, planningDurationMs, iteration);
621
+ }
622
+ // 2f. Rejected — append critic feedback for next iteration
623
+ if (iteration < MAX_ITERATIONS) {
624
+ const feedback = formatCriticFeedback(criticResult);
625
+ currentPrompt = basePrompt + "\n\n" + feedback;
626
+ const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
627
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
628
+ await postLog(task.id, msg);
629
+ if (criticResult.risks.length > 0) {
630
+ const risksMsg = `${PREFIX} Critic risks: ${criticResult.risks.join("; ")}`;
631
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(risksMsg)}`);
632
+ await postLog(task.id, risksMsg);
633
+ }
634
+ if (criticResult.suggestions && criticResult.suggestions.length > 0) {
635
+ const sugMsg = `${PREFIX} Critic suggestions: ${criticResult.suggestions.join("; ")}`;
636
+ console.log(`${ts()} ${taskLabel} ${chalk.dim(sugMsg)}`);
637
+ await postLog(task.id, sugMsg);
638
+ }
639
+ }
640
+ else {
641
+ // Final iteration — rejected
642
+ const msg = `${PREFIX} Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD})`;
643
+ console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} ${msg}`);
644
+ await postLog(task.id, msg, "error", "error");
645
+ if (criticResult.risks.length > 0) {
646
+ const risksMsg = `${PREFIX} Final risks: ${criticResult.risks.join("; ")}`;
647
+ console.error(`${ts()} ${taskLabel} ${risksMsg}`);
648
+ await postLog(task.id, risksMsg, "error", "error");
649
+ }
650
+ if (criticResult.suggestions && criticResult.suggestions.length > 0) {
651
+ const sugMsg = `${PREFIX} Suggestions: ${criticResult.suggestions.join("; ")}`;
652
+ console.error(`${ts()} ${taskLabel} ${sugMsg}`);
653
+ await postLog(task.id, sugMsg, "error", "error");
654
+ }
410
655
  }
411
656
  }
412
- else {
413
- // Final iteration — rejected
414
- const msg = `${PREFIX} Critic rejected after ${MAX_ITERATIONS} iterations (best score: ${bestScore}/100, threshold: ${AUTO_APPROVAL_THRESHOLD})`;
415
- console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} ${msg}`);
416
- await postLog(task.id, msg, "error", "error");
417
- if (criticResult.risks.length > 0) {
418
- const risksMsg = `${PREFIX} Final risks: ${criticResult.risks.join("; ")}`;
419
- console.error(`${ts()} ${taskLabel} ${risksMsg}`);
420
- await postLog(task.id, risksMsg, "error", "error");
657
+ // All iterations exhausted — fail
658
+ return false;
659
+ }
660
+ finally {
661
+ // Cleanup temp clone
662
+ if (repoPath) {
663
+ try {
664
+ execSync(`rm -rf "${repoPath}"`, { stdio: "ignore" });
421
665
  }
422
- if (criticResult.suggestions && criticResult.suggestions.length > 0) {
423
- const sugMsg = `${PREFIX} Suggestions: ${criticResult.suggestions.join("; ")}`;
424
- console.error(`${ts()} ${taskLabel} ${sugMsg}`);
425
- await postLog(task.id, sugMsg, "error", "error");
666
+ catch {
667
+ /* ignore */
426
668
  }
427
669
  }
428
670
  }
429
- // All iterations exhausted — fail
430
- return false;
431
671
  }
432
672
  /**
433
673
  * Post a validated (file-capped) plan to the cloud API.
package/dist/spawner.js CHANGED
@@ -132,17 +132,20 @@ export async function spawnWorker(task, config, orgConfig, credentials) {
132
132
  else {
133
133
  dockerArgs.push("--network", "host");
134
134
  }
135
- // Mount Claude credentials
135
+ // Mount Claude credentials (required for Anthropic workers, optional for others)
136
+ const workerProvider = task.workerProvider || "anthropic";
136
137
  const claudeConfigDir = findClaudeConfigDir();
137
- if (!claudeConfigDir) {
138
+ if (!claudeConfigDir && workerProvider === "anthropic") {
138
139
  console.error(`${ts()} ${taskLabel} ${chalk.red("✗")} Claude credentials not found. Run 'claude' and complete the sign-in flow.`);
139
140
  return;
140
141
  }
141
- // Copy credentials to a temp dir with relaxed permissions for container access
142
- // (avoids weakening permissions on the user's actual credentials file)
143
- const credFile = path.join(claudeConfigDir, ".credentials.json");
144
- const dockerClaudeDir = toDockerPath(claudeConfigDir);
145
- dockerArgs.push("-v", `${dockerClaudeDir}:/home/worker/.claude`);
142
+ if (claudeConfigDir) {
143
+ const dockerClaudeDir = toDockerPath(claudeConfigDir);
144
+ dockerArgs.push("-v", `${dockerClaudeDir}:/home/worker/.claude`);
145
+ }
146
+ else {
147
+ console.log(`${ts()} ${taskLabel} ${chalk.dim("Skipping Claude mount (non-Anthropic worker)")}`);
148
+ }
146
149
  // Build environment variables — KEY DIFFERENCE: API_BASE_URL points to cloud
147
150
  const scmProvider = (task.scmProvider || "github");
148
151
  const scmToken = getScmToken(scmProvider, config);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workermill/agent",
3
- "version": "0.3.1",
3
+ "version": "0.4.1",
4
4
  "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",