@workermill/agent 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
File without changes
package/dist/config.d.ts CHANGED
@@ -16,8 +16,6 @@ export interface AgentConfig {
16
16
  bitbucketToken: string;
17
17
  gitlabToken: string;
18
18
  workerImage: string;
19
- teamPlanningEnabled: boolean;
20
- analystModel?: string;
21
19
  }
22
20
  export interface FileConfig {
23
21
  apiUrl: string;
@@ -32,8 +30,6 @@ export interface FileConfig {
32
30
  gitlab: string;
33
31
  };
34
32
  workerImage: string;
35
- teamPlanningEnabled?: boolean;
36
- analystModel?: string;
37
33
  setupCompletedAt: string;
38
34
  }
39
35
  export declare function getConfigDir(): string;
package/dist/config.js CHANGED
@@ -75,8 +75,6 @@ export function loadConfigFromFile() {
75
75
  bitbucketToken: fc.tokens?.bitbucket || "",
76
76
  gitlabToken: fc.tokens?.gitlab || "",
77
77
  workerImage,
78
- teamPlanningEnabled: fc.teamPlanningEnabled ?? true,
79
- analystModel: fc.analystModel,
80
78
  };
81
79
  }
82
80
  /**
@@ -121,8 +119,6 @@ export function loadConfig() {
121
119
  bitbucketToken: process.env.BITBUCKET_TOKEN || "",
122
120
  gitlabToken: process.env.GITLAB_TOKEN || "",
123
121
  workerImage: process.env.WORKER_IMAGE || "workermill-worker:local",
124
- teamPlanningEnabled: process.env.TEAM_PLANNING_ENABLED !== "false",
125
- analystModel: process.env.ANALYST_MODEL,
126
122
  };
127
123
  }
128
124
  /**
@@ -222,6 +222,7 @@ Review this execution plan against the PRD:
222
222
  4. **Unrealistic Scope** - Any step targeting >5 files MUST score below 80 (auto-rejection threshold). Each step should modify at most 5 files. If a step needs more, split it into multiple steps first.
223
223
  5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
224
224
  6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
225
+ 7. **Serialization Bottleneck** - If more than half the stories depend on a single story that targets >5 files, the plan has a bottleneck. Deduct 15 points — split the foundation or allow more parallel work.
225
226
 
226
227
  ## Scoring Guide
227
228
 
package/dist/planner.d.ts CHANGED
@@ -28,12 +28,13 @@ export interface PlanningTask {
28
28
  *
29
29
  * Flow:
30
30
  * 1. Fetch planning prompt from cloud API
31
- * 2. Run Claude CLI to generate plan
32
- * 3. Parse plan, apply file cap (max 5 files per story)
33
- * 4. Run critic validation via Claude CLI
34
- * 5. If critic approves (score >= 80): post validated plan to API
35
- * 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
36
- * 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
37
- * 8. If no plan scored >= 50: fail the task
31
+ * 2. Clone target repo (if available) so planner can explore with tools
32
+ * 3. Run Claude CLI to generate plan
33
+ * 4. Parse plan, apply file cap (max 5 files per story)
34
+ * 5. Run critic validation via Claude CLI
35
+ * 6. If critic approves (score >= 80): post validated plan to API
36
+ * 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
37
+ * 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
38
+ * 9. If no plan scored >= 50: fail the task
38
39
  */
39
40
  export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;
package/dist/planner.js CHANGED
@@ -80,9 +80,10 @@ const logQueue = [];
80
80
  let logDrainPromise = null;
81
81
  async function drainLogQueue() {
82
82
  while (logQueue.length > 0) {
83
- const entry = logQueue.shift();
83
+ // Drain up to 50 entries per batch POST
84
+ const batch = logQueue.splice(0, 50);
84
85
  try {
85
- await api.post("/api/control-center/logs", entry, { timeout: 5_000 });
86
+ await api.post("/api/control-center/logs/batch", { entries: batch }, { timeout: 5_000 });
86
87
  }
87
88
  catch {
88
89
  // Best-effort — drop on failure
@@ -157,7 +158,7 @@ function phaseLabel(phase, elapsed) {
157
158
  * Run Claude CLI with stream-json output, posting real-time phase milestones
158
159
  * to the cloud dashboard — identical terminal experience to cloud planning.
159
160
  */
160
- function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disableTools = false) {
161
+ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, cwd) {
161
162
  const taskLabel = chalk.cyan(taskId.slice(0, 8));
162
163
  return new Promise((resolve, reject) => {
163
164
  const cliArgs = [
@@ -167,12 +168,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disable
167
168
  "--model", model,
168
169
  "--permission-mode", "bypassPermissions",
169
170
  ];
170
- // When analysts already explored the repo, strip tools so the planner
171
- // doesn't waste turns re-exploring — it has all context in the prompt.
172
- if (disableTools) {
173
- cliArgs.push("--allowedTools", "");
174
- }
175
171
  const proc = spawn(claudePath, cliArgs, {
172
+ cwd,
176
173
  env,
177
174
  stdio: ["pipe", "pipe", "pipe"],
178
175
  });
@@ -222,7 +219,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disable
222
219
  console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
223
220
  }
224
221
  // Flush buffered LLM text to dashboard every 1s (complete lines only)
225
- const textFlushInterval = setInterval(() => flushTextBuffer(), 1_000);
222
+ const textFlushInterval = setInterval(() => flushTextBuffer(), 500);
226
223
  // SSE progress updates every 2s — drives PlanningTerminalBar in dashboard
227
224
  // (same cadence as local dev's progressInterval in planning-agent-local.ts)
228
225
  const sseProgressInterval = setInterval(() => {
@@ -422,15 +419,15 @@ function buildCloneUrl(repo, token, scmProvider) {
422
419
  }
423
420
  }
424
421
  /**
425
- * Clone the target repo to a temp directory for team planning analysis.
426
- * Returns the path on success, or null on failure (fallback to single-agent).
422
+ * Clone the target repo to a temp directory so the planner can explore with tools.
423
+ * Returns the path on success, or null on failure.
427
424
  */
428
425
  async function cloneTargetRepo(repo, token, scmProvider, taskId) {
429
426
  const taskLabel = chalk.cyan(taskId.slice(0, 8));
430
427
  const tmpDir = `/tmp/workermill-planning-${taskId.slice(0, 8)}-${Date.now()}`;
431
428
  try {
432
429
  const cloneUrl = buildCloneUrl(repo, token, scmProvider);
433
- console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for team planning...")}`);
430
+ console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for planner...")}`);
434
431
  execSync(`git clone --depth 1 --single-branch "${cloneUrl}" "${tmpDir}"`, {
435
432
  stdio: "ignore",
436
433
  timeout: 60_000,
@@ -440,7 +437,7 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
440
437
  }
441
438
  catch (error) {
442
439
  const errMsg = error instanceof Error ? error.message : String(error);
443
- console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, falling back to single-agent: ${errMsg.substring(0, 100)}`);
440
+ console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, planner will run without repo access: ${errMsg.substring(0, 100)}`);
444
441
  // Cleanup partial clone
445
442
  try {
446
443
  execSync(`rm -rf "${tmpDir}"`, { stdio: "ignore" });
@@ -451,349 +448,19 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
451
448
  return null;
452
449
  }
453
450
  }
454
- /**
455
- * Run an analyst agent via Claude CLI with tool access to the cloned repo.
456
- * Returns the analyst's report text, or an empty string on failure.
457
- */
458
- function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs = 900_000, taskId) {
459
- const label = chalk.blue(`[${name}]`);
460
- const modelLabel = chalk.yellow(model);
461
- return new Promise((resolve) => {
462
- console.log(`${ts()} ${label} Starting analyst using ${modelLabel}...`);
463
- if (taskId) {
464
- postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${model}...`);
465
- }
466
- const proc = spawn(claudePath, [
467
- "--print",
468
- "--verbose",
469
- "--output-format", "stream-json",
470
- "--model", model,
471
- "--permission-mode", "bypassPermissions",
472
- ], {
473
- cwd: repoPath,
474
- env,
475
- stdio: ["pipe", "pipe", "pipe"],
476
- });
477
- // Write prompt via stdin (same as runClaudeCli)
478
- proc.stdin.write(prompt);
479
- proc.stdin.end();
480
- let resultText = "";
481
- let fullText = "";
482
- let stderrOutput = "";
483
- let lineBuffer = "";
484
- let toolCalls = 0;
485
- let timedOut = false;
486
- const startMs = Date.now();
487
- proc.stderr.on("data", (chunk) => {
488
- const text = chunk.toString();
489
- stderrOutput += text;
490
- // Show stderr in real-time so we can see what's happening
491
- for (const line of text.split("\n").filter((l) => l.trim())) {
492
- console.log(`${ts()} ${label} ${chalk.red("stderr:")} ${line.trim()}`);
493
- }
494
- });
495
- proc.stdout.on("data", (data) => {
496
- lineBuffer += data.toString();
497
- const lines = lineBuffer.split("\n");
498
- lineBuffer = lines.pop() || "";
499
- for (const line of lines) {
500
- const trimmed = line.trim();
501
- if (!trimmed)
502
- continue;
503
- try {
504
- const event = JSON.parse(trimmed);
505
- // Claude CLI stream-json wraps content in assistant message events
506
- if (event.type === "assistant" && event.message?.content) {
507
- const content = event.message.content;
508
- if (Array.isArray(content)) {
509
- for (const block of content) {
510
- if (block.type === "text" && block.text) {
511
- fullText += block.text;
512
- // Log analyst reasoning (first line, truncated)
513
- const thought = block.text.trim().split("\n")[0].substring(0, 120);
514
- if (thought) {
515
- console.log(`${ts()} ${label} ${chalk.dim("💭")} ${chalk.dim(thought)}`);
516
- if (taskId)
517
- postLog(taskId, `${PREFIX} [${name}] 💭 ${thought}`);
518
- }
519
- }
520
- else if (block.type === "tool_use") {
521
- toolCalls++;
522
- const toolName = block.name || "unknown";
523
- // Show tool name + input preview (file path, pattern, etc.)
524
- const inputStr = block.input ? JSON.stringify(block.input) : "";
525
- const inputPreview = inputStr.length > 80 ? inputStr.substring(0, 80) + "…" : inputStr;
526
- console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}${inputPreview ? chalk.dim(` ${inputPreview}`) : ""}`);
527
- if (taskId)
528
- postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName} ${inputPreview}`);
529
- }
530
- }
531
- }
532
- else if (typeof content === "string") {
533
- fullText += content;
534
- }
535
- }
536
- else if (event.type === "content_block_delta" && event.delta?.text) {
537
- // Fallback: raw API streaming format (may appear in some CLI versions)
538
- fullText += event.delta.text;
539
- }
540
- else if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
541
- toolCalls++;
542
- const toolName = event.content_block?.name || "unknown";
543
- console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}`);
544
- if (taskId)
545
- postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName}`);
546
- }
547
- else if (event.type === "result" && event.result) {
548
- resultText =
549
- typeof event.result === "string" ? event.result : "";
550
- }
551
- }
552
- catch {
553
- fullText += trimmed + "\n";
554
- }
555
- }
556
- });
557
- const timeout = setTimeout(() => {
558
- timedOut = true;
559
- proc.kill("SIGTERM");
560
- const elapsed = Math.round((Date.now() - startMs) / 1000);
561
- console.log(`${ts()} ${label} ${chalk.yellow("⚠ Timed out")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${fullText.length} chars)`);
562
- if (taskId)
563
- postLog(taskId, `${PREFIX} [${name}] ⚠ Timed out after ${formatElapsed(elapsed)}`);
564
- resolve(resultText || fullText || "");
565
- }, timeoutMs);
566
- proc.on("exit", (code) => {
567
- clearTimeout(timeout);
568
- const elapsed = Math.round((Date.now() - startMs) / 1000);
569
- if (timedOut)
570
- return; // already resolved
571
- const output = resultText || fullText || "";
572
- if (code === 0 && output.length > 0) {
573
- console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
574
- if (taskId)
575
- postLog(taskId, `${PREFIX} [${name}] ✓ Done in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
576
- }
577
- else if (code !== 0) {
578
- console.log(`${ts()} ${label} ${chalk.red(`✗ Exited ${code}`)} after ${formatElapsed(elapsed)} — ${stderrOutput.substring(0, 150) || "no stderr"}`);
579
- if (taskId)
580
- postLog(taskId, `${PREFIX} [${name}] ✗ Exited ${code} after ${formatElapsed(elapsed)}`);
581
- }
582
- else {
583
- console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls)`);
584
- if (taskId)
585
- postLog(taskId, `${PREFIX} [${name}] ⚠ Empty output after ${formatElapsed(elapsed)}`);
586
- }
587
- resolve(output);
588
- });
589
- proc.on("error", (err) => {
590
- clearTimeout(timeout);
591
- console.log(`${ts()} ${label} ${chalk.red("✗ Spawn failed:")} ${err.message}`);
592
- resolve("");
593
- });
594
- });
595
- }
596
- /**
597
- * Run an analyst agent via Vercel AI SDK with tool access to the cloned repo.
598
- * Used for non-Anthropic providers (OpenAI, Google, Ollama) that can't use Claude CLI.
599
- * Returns the analyst's report text, or an empty string on failure.
600
- */
601
- async function runAnalystWithSdk(name, provider, model, apiKey, prompt, repoPath, timeoutMs = 900_000, taskId) {
602
- const label = chalk.blue(`[${name}]`);
603
- const modelLabel = chalk.yellow(`${provider}/${model}`);
604
- const startMs = Date.now();
605
- console.log(`${ts()} ${label} Starting analyst using ${modelLabel} via AI SDK...`);
606
- if (taskId)
607
- postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${provider}/${model} via AI SDK...`);
608
- try {
609
- const result = await generateTextWithTools({
610
- provider,
611
- model,
612
- apiKey,
613
- prompt,
614
- workingDir: repoPath,
615
- maxTokens: 16384,
616
- temperature: 0.3,
617
- timeoutMs,
618
- maxSteps: 20, // Allow thorough exploration
619
- enableTools: true,
620
- });
621
- const elapsed = Math.round((Date.now() - startMs) / 1000);
622
- if (result && result.length > 0) {
623
- console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${elapsed}s (${result.length} chars)`);
624
- return result;
625
- }
626
- console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${elapsed}s`);
627
- return "";
628
- }
629
- catch (error) {
630
- const elapsed = Math.round((Date.now() - startMs) / 1000);
631
- const errMsg = error instanceof Error ? error.message : String(error);
632
- console.log(`${ts()} ${label} ${chalk.red(`✗ Failed`)} after ${elapsed}s: ${errMsg.substring(0, 150)}`);
633
- return "";
634
- }
635
- }
636
- /** Analyst prompt templates */
637
- const CODEBASE_ANALYST_PROMPT = `You are a codebase analyst. Your job is to explore this repository using tools and report what you find.
638
-
639
- IMPORTANT: You MUST use tools to explore the repository. Do NOT guess or make assumptions.
640
-
641
- Step 1: Run Glob with pattern "**/*" to see the top-level directory structure.
642
- Step 2: Read key files: package.json, tsconfig.json, README.md, .env.example, or equivalents.
643
- Step 3: Run Glob on src/ or the main source directory to understand the code layout.
644
- Step 4: Read 2-3 representative source files to understand patterns and frameworks.
645
-
646
- After exploring, write a report covering:
647
- 1. Directory structure and organization
648
- 2. Languages, frameworks, and key dependencies (from package.json, requirements.txt, etc.)
649
- 3. Existing test files and testing patterns (search for test/, __tests__, *.test.*, *.spec.*)
650
- 4. CI/CD configuration (search for .github/workflows/, Jenkinsfile, etc.)
651
- 5. Configuration files and environment setup
652
-
653
- Keep your report under 2000 words. Only report facts you verified with tools.`;
654
- function makeRequirementsAnalystPrompt(task) {
655
- return `You are a requirements analyst. Analyze the following task and the repository to identify what needs to be built.
656
-
657
- Task: ${task.summary}
658
- ${task.description ? `\nDescription:\n${task.description}` : ""}
659
-
660
- IMPORTANT: You MUST use tools to understand the existing codebase before analyzing requirements.
661
-
662
- Step 1: Run Glob with pattern "**/*" to see what already exists in the repository.
663
- Step 2: Read any existing README, docs, or configuration to understand the current state.
664
- Step 3: Search for any code related to the task requirements using Grep.
665
-
666
- After exploring, write a report covering:
667
- 1. Explicit acceptance criteria — what MUST be built based on the description
668
- 2. Implicit requirements — what's assumed but not stated (auth, error handling, etc.)
669
- 3. What already exists vs what needs to be created (based on your file exploration)
670
- 4. Ambiguities that could lead to wrong implementation
671
- 5. Suggested components/modules and which persona should own each
672
-
673
- Keep your report under 1500 words.`;
674
- }
675
- function makeRiskAssessorPrompt(task) {
676
- return `You are a risk assessor. Your job is to search this repository for potential risks and blockers for a development task.
677
-
678
- Task: ${task.summary}
679
- ${task.description ? `\nDescription:\n${task.description}` : ""}
680
-
681
- IMPORTANT: You MUST use tools to search the codebase. Do NOT guess file paths or make assumptions.
682
-
683
- Step 1: Run Glob with pattern "**/*" to see the full repository structure.
684
- Step 2: Use Grep to search for code related to the task (relevant keywords, APIs, components).
685
- Step 3: Read files that are likely to be modified or affected by this task.
686
- Step 4: Search for existing tests (Grep for "test", "spec", "describe", "it(") to find test coverage.
687
-
688
- After exploring, write a report covering:
689
- 1. Specific files that will need to be modified (exact paths from your search)
690
- 2. Files with heavy coupling or shared dependencies (imports you found)
691
- 3. Existing tests that will need updating (exact file paths)
692
- 4. Environment, config, or migration requirements
693
- 5. Deployment or infrastructure risks
694
-
695
- Keep your report under 1500 words. Only report facts you verified with tools.`;
696
- }
697
- /**
698
- * Run team planning: spawn 3 parallel analyst agents, then synthesize
699
- * their reports into an enhanced planning prompt for the final planner.
700
- *
701
- * Falls back to single-agent planning if anything goes wrong.
702
- */
703
- /**
704
- * Run team analysis: spawn 3 parallel analyst agents once, then return
705
- * an enhanced prompt with their reports appended. Returns null if all
706
- * analysts fail (caller should fall back to basePrompt).
707
- *
708
- * This runs ONCE before the planner-critic loop — analyst prompts don't
709
- * include critic feedback, so re-running them on iteration 2+ is waste.
710
- */
711
- async function runTeamAnalysis(task, basePrompt, claudePath, model, env, repoPath, taskId, startTime, provider = "anthropic", providerApiKey) {
712
- const taskLabel = chalk.cyan(taskId.slice(0, 8));
713
- console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — running 3 analysts in parallel...`);
714
- await postLog(taskId, `${PREFIX} Team planning: running codebase, requirements, and risk analysts in parallel...`);
715
- await postProgress(taskId, "reading_repo", Math.round((Date.now() - startTime) / 1000), "Running parallel analysis agents...", 0, 0);
716
- const analysisModel = model;
717
- const MAX_TEAM_RETRIES = 3;
718
- const useCliAnalysts = provider === "anthropic";
719
- // Helper: dispatch analyst to Claude CLI or AI SDK based on provider
720
- const dispatchAnalyst = (name, prompt) => {
721
- if (useCliAnalysts) {
722
- return runAnalyst(name, claudePath, analysisModel, prompt, repoPath, env, 900_000, taskId);
723
- }
724
- if (!providerApiKey) {
725
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No API key for ${provider} analysts, skipping ${name}`);
726
- return Promise.resolve("");
727
- }
728
- return runAnalystWithSdk(name, provider, analysisModel, providerApiKey, prompt, repoPath, 900_000, taskId);
729
- };
730
- let codebaseReport = "";
731
- let requirementsReport = "";
732
- let riskReport = "";
733
- for (let attempt = 1; attempt <= MAX_TEAM_RETRIES; attempt++) {
734
- if (attempt > 1) {
735
- console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — retry ${attempt}/${MAX_TEAM_RETRIES}...`);
736
- await postLog(taskId, `${PREFIX} Team analysis retry ${attempt}/${MAX_TEAM_RETRIES}...`);
737
- }
738
- const [codebaseResult, requirementsResult, riskResult] = await Promise.allSettled([
739
- codebaseReport ? Promise.resolve(codebaseReport) : dispatchAnalyst("Codebase", CODEBASE_ANALYST_PROMPT),
740
- requirementsReport ? Promise.resolve(requirementsReport) : dispatchAnalyst("Requirements", makeRequirementsAnalystPrompt(task)),
741
- riskReport ? Promise.resolve(riskReport) : dispatchAnalyst("Risk", makeRiskAssessorPrompt(task)),
742
- ]);
743
- if (!codebaseReport && codebaseResult.status === "fulfilled") {
744
- codebaseReport = codebaseResult.value;
745
- }
746
- if (!requirementsReport && requirementsResult.status === "fulfilled") {
747
- requirementsReport = requirementsResult.value;
748
- }
749
- if (!riskReport && riskResult.status === "fulfilled") {
750
- riskReport = riskResult.value;
751
- }
752
- const successCount = [codebaseReport, requirementsReport, riskReport].filter((r) => r.length > 0).length;
753
- const analysisElapsed = Math.round((Date.now() - startTime) / 1000);
754
- console.log(`${ts()} ${taskLabel} Analysis attempt ${attempt}: ${successCount}/3 reports (${analysisElapsed}s)`);
755
- if (successCount > 0) {
756
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Analysis complete: ${successCount}/3 reports (${analysisElapsed}s)`);
757
- await postLog(taskId, `${PREFIX} Team analysis complete: ${successCount}/3 reports in ${formatElapsed(analysisElapsed)}. Synthesizing plan...`);
758
- await postProgress(taskId, "analyzing", analysisElapsed, "Synthesizing analysis reports...", 0, 0);
759
- break;
760
- }
761
- if (attempt === MAX_TEAM_RETRIES) {
762
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} All analysts failed after ${MAX_TEAM_RETRIES} attempts, falling back to single-agent planning`);
763
- await postLog(taskId, `${PREFIX} All analysis agents failed after ${MAX_TEAM_RETRIES} attempts — falling back to single-agent planning`);
764
- return null;
765
- }
766
- }
767
- // Build enhanced prompt with analysis reports
768
- const sections = [];
769
- if (codebaseReport) {
770
- sections.push(`## Codebase Analysis (from automated analysis)\n\n${codebaseReport}`);
771
- }
772
- if (requirementsReport) {
773
- sections.push(`## Requirements Analysis\n\n${requirementsReport}`);
774
- }
775
- if (riskReport) {
776
- sections.push(`## Risk Assessment\n\n${riskReport}`);
777
- }
778
- return (basePrompt +
779
- "\n\n" +
780
- sections.join("\n\n") +
781
- "\n\n" +
782
- "Use these analyses to produce a more accurate execution plan.\n" +
783
- "Prefer actual file paths discovered in the codebase analysis over guessed paths.");
784
- }
785
451
  /**
786
452
  * Run planning for a task with Planner-Critic validation loop.
787
453
  *
788
454
  * Flow:
789
455
  * 1. Fetch planning prompt from cloud API
790
- * 2. Run Claude CLI to generate plan
791
- * 3. Parse plan, apply file cap (max 5 files per story)
792
- * 4. Run critic validation via Claude CLI
793
- * 5. If critic approves (score >= 80): post validated plan to API
794
- * 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
795
- * 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
796
- * 8. If no plan scored >= 50: fail the task
456
+ * 2. Clone target repo (if available) so planner can explore with tools
457
+ * 3. Run Claude CLI to generate plan
458
+ * 4. Parse plan, apply file cap (max 5 files per story)
459
+ * 5. Run critic validation via Claude CLI
460
+ * 6. If critic approves (score >= 80): post validated plan to API
461
+ * 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
462
+ * 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
463
+ * 9. If no plan scored >= 50: fail the task
797
464
  */
798
465
  export async function planTask(task, config, credentials) {
799
466
  const taskLabel = chalk.cyan(task.id.slice(0, 8));
@@ -816,12 +483,9 @@ export async function planTask(task, config, credentials) {
816
483
  const startTime = Date.now();
817
484
  // PRD for critic validation: use task description, fall back to summary
818
485
  const prd = task.description || task.summary;
819
- // Run team analysis ONCE before the planner-critic loop.
820
- // Analyst prompts don't include critic feedback, so re-running them
821
- // on iteration 2+ wastes compute (they'd produce the same reports).
486
+ // Clone target repo so the planner can explore with tools
822
487
  let repoPath = null;
823
- let enhancedBasePrompt = basePrompt;
824
- if (config.teamPlanningEnabled && task.githubRepo) {
488
+ if (task.githubRepo) {
825
489
  const scmProvider = task.scmProvider || "github";
826
490
  const scmToken = scmProvider === "bitbucket"
827
491
  ? config.bitbucketToken
@@ -832,23 +496,11 @@ export async function planTask(task, config, credentials) {
832
496
  repoPath = await cloneTargetRepo(task.githubRepo, scmToken, scmProvider, task.id);
833
497
  }
834
498
  else {
835
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, skipping team planning`);
836
- }
837
- if (repoPath) {
838
- const analystModel = config.analystModel || cliModel;
839
- const analystBackend = isAnthropicPlanning ? "Claude CLI" : `${provider} AI SDK`;
840
- console.log(`${ts()} ${taskLabel} Analysts using model: ${chalk.yellow(analystModel)} via ${chalk.dim(analystBackend)} (planner: ${chalk.yellow(cliModel)})`);
841
- const analysisResult = await runTeamAnalysis(task, basePrompt, claudePath, analystModel, cleanEnv, repoPath, task.id, startTime, provider, providerApiKey);
842
- if (analysisResult) {
843
- enhancedBasePrompt = analysisResult;
844
- }
845
- // else: all analysts failed, fall back to basePrompt
499
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, planner will run without repo access`);
846
500
  }
847
501
  }
848
502
  // 2. Planner-Critic iteration loop
849
- // Use enhancedBasePrompt (with analyst reports) as the base for all iterations.
850
- // Critic feedback gets appended on re-plan, but analyst reports are fixed.
851
- let currentPrompt = enhancedBasePrompt;
503
+ let currentPrompt = basePrompt;
852
504
  let bestPlan = null;
853
505
  let bestScore = 0;
854
506
  // Track critic history across iterations for analytics
@@ -870,9 +522,7 @@ export async function planTask(task, config, credentials) {
870
522
  let rawOutput;
871
523
  try {
872
524
  if (isAnthropicPlanning) {
873
- // Disable tools when analysts already provided repo context
874
- const hasAnalystContext = enhancedBasePrompt !== basePrompt;
875
- rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, hasAnalystContext);
525
+ rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, repoPath || undefined);
876
526
  }
877
527
  else {
878
528
  if (!providerApiKey) {
@@ -997,7 +647,7 @@ export async function planTask(task, config, credentials) {
997
647
  // 2f. Rejected — append critic feedback for next iteration
998
648
  if (iteration < MAX_ITERATIONS) {
999
649
  const feedback = formatCriticFeedback(criticResult);
1000
- currentPrompt = enhancedBasePrompt + "\n\n" + feedback;
650
+ currentPrompt = basePrompt + "\n\n" + feedback;
1001
651
  const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
1002
652
  console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
1003
653
  await postLog(task.id, msg);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workermill/agent",
3
- "version": "0.8.1",
3
+ "version": "0.8.3",
4
4
  "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",