@workermill/agent 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.d.ts CHANGED
@@ -16,8 +16,6 @@ export interface AgentConfig {
16
16
  bitbucketToken: string;
17
17
  gitlabToken: string;
18
18
  workerImage: string;
19
- teamPlanningEnabled: boolean;
20
- analystModel?: string;
21
19
  }
22
20
  export interface FileConfig {
23
21
  apiUrl: string;
@@ -32,8 +30,6 @@ export interface FileConfig {
32
30
  gitlab: string;
33
31
  };
34
32
  workerImage: string;
35
- teamPlanningEnabled?: boolean;
36
- analystModel?: string;
37
33
  setupCompletedAt: string;
38
34
  }
39
35
  export declare function getConfigDir(): string;
package/dist/config.js CHANGED
@@ -75,8 +75,6 @@ export function loadConfigFromFile() {
75
75
  bitbucketToken: fc.tokens?.bitbucket || "",
76
76
  gitlabToken: fc.tokens?.gitlab || "",
77
77
  workerImage,
78
- teamPlanningEnabled: fc.teamPlanningEnabled ?? true,
79
- analystModel: fc.analystModel,
80
78
  };
81
79
  }
82
80
  /**
@@ -121,8 +119,6 @@ export function loadConfig() {
121
119
  bitbucketToken: process.env.BITBUCKET_TOKEN || "",
122
120
  gitlabToken: process.env.GITLAB_TOKEN || "",
123
121
  workerImage: process.env.WORKER_IMAGE || "workermill-worker:local",
124
- teamPlanningEnabled: process.env.TEAM_PLANNING_ENABLED !== "false",
125
- analystModel: process.env.ANALYST_MODEL,
126
122
  };
127
123
  }
128
124
  /**
package/dist/planner.d.ts CHANGED
@@ -28,12 +28,13 @@ export interface PlanningTask {
28
28
  *
29
29
  * Flow:
30
30
  * 1. Fetch planning prompt from cloud API
31
- * 2. Run Claude CLI to generate plan
32
- * 3. Parse plan, apply file cap (max 5 files per story)
33
- * 4. Run critic validation via Claude CLI
34
- * 5. If critic approves (score >= 80): post validated plan to API
35
- * 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
36
- * 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
37
- * 8. If no plan scored >= 50: fail the task
31
+ * 2. Clone target repo (if available) so planner can explore with tools
32
+ * 3. Run Claude CLI to generate plan
33
+ * 4. Parse plan, apply file cap (max 5 files per story)
34
+ * 5. Run critic validation via Claude CLI
35
+ * 6. If critic approves (score >= 80): post validated plan to API
36
+ * 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
37
+ * 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
38
+ * 9. If no plan scored >= 50: fail the task
38
39
  */
39
40
  export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;
package/dist/planner.js CHANGED
@@ -157,7 +157,7 @@ function phaseLabel(phase, elapsed) {
157
157
  * Run Claude CLI with stream-json output, posting real-time phase milestones
158
158
  * to the cloud dashboard — identical terminal experience to cloud planning.
159
159
  */
160
- function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disableTools = false) {
160
+ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, cwd) {
161
161
  const taskLabel = chalk.cyan(taskId.slice(0, 8));
162
162
  return new Promise((resolve, reject) => {
163
163
  const cliArgs = [
@@ -167,12 +167,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disable
167
167
  "--model", model,
168
168
  "--permission-mode", "bypassPermissions",
169
169
  ];
170
- // When analysts already explored the repo, strip tools so the planner
171
- // doesn't waste turns re-exploring — it has all context in the prompt.
172
- if (disableTools) {
173
- cliArgs.push("--allowedTools", "");
174
- }
175
170
  const proc = spawn(claudePath, cliArgs, {
171
+ cwd,
176
172
  env,
177
173
  stdio: ["pipe", "pipe", "pipe"],
178
174
  });
@@ -422,15 +418,15 @@ function buildCloneUrl(repo, token, scmProvider) {
422
418
  }
423
419
  }
424
420
  /**
425
- * Clone the target repo to a temp directory for team planning analysis.
426
- * Returns the path on success, or null on failure (fallback to single-agent).
421
+ * Clone the target repo to a temp directory so the planner can explore with tools.
422
+ * Returns the path on success, or null on failure.
427
423
  */
428
424
  async function cloneTargetRepo(repo, token, scmProvider, taskId) {
429
425
  const taskLabel = chalk.cyan(taskId.slice(0, 8));
430
426
  const tmpDir = `/tmp/workermill-planning-${taskId.slice(0, 8)}-${Date.now()}`;
431
427
  try {
432
428
  const cloneUrl = buildCloneUrl(repo, token, scmProvider);
433
- console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for team planning...")}`);
429
+ console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for planner...")}`);
434
430
  execSync(`git clone --depth 1 --single-branch "${cloneUrl}" "${tmpDir}"`, {
435
431
  stdio: "ignore",
436
432
  timeout: 60_000,
@@ -440,7 +436,7 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
440
436
  }
441
437
  catch (error) {
442
438
  const errMsg = error instanceof Error ? error.message : String(error);
443
- console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, falling back to single-agent: ${errMsg.substring(0, 100)}`);
439
+ console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, planner will run without repo access: ${errMsg.substring(0, 100)}`);
444
440
  // Cleanup partial clone
445
441
  try {
446
442
  execSync(`rm -rf "${tmpDir}"`, { stdio: "ignore" });
@@ -451,349 +447,19 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
451
447
  return null;
452
448
  }
453
449
  }
454
- /**
455
- * Run an analyst agent via Claude CLI with tool access to the cloned repo.
456
- * Returns the analyst's report text, or an empty string on failure.
457
- */
458
- function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs = 900_000, taskId) {
459
- const label = chalk.blue(`[${name}]`);
460
- const modelLabel = chalk.yellow(model);
461
- return new Promise((resolve) => {
462
- console.log(`${ts()} ${label} Starting analyst using ${modelLabel}...`);
463
- if (taskId) {
464
- postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${model}...`);
465
- }
466
- const proc = spawn(claudePath, [
467
- "--print",
468
- "--verbose",
469
- "--output-format", "stream-json",
470
- "--model", model,
471
- "--permission-mode", "bypassPermissions",
472
- ], {
473
- cwd: repoPath,
474
- env,
475
- stdio: ["pipe", "pipe", "pipe"],
476
- });
477
- // Write prompt via stdin (same as runClaudeCli)
478
- proc.stdin.write(prompt);
479
- proc.stdin.end();
480
- let resultText = "";
481
- let fullText = "";
482
- let stderrOutput = "";
483
- let lineBuffer = "";
484
- let toolCalls = 0;
485
- let timedOut = false;
486
- const startMs = Date.now();
487
- proc.stderr.on("data", (chunk) => {
488
- const text = chunk.toString();
489
- stderrOutput += text;
490
- // Show stderr in real-time so we can see what's happening
491
- for (const line of text.split("\n").filter((l) => l.trim())) {
492
- console.log(`${ts()} ${label} ${chalk.red("stderr:")} ${line.trim()}`);
493
- }
494
- });
495
- proc.stdout.on("data", (data) => {
496
- lineBuffer += data.toString();
497
- const lines = lineBuffer.split("\n");
498
- lineBuffer = lines.pop() || "";
499
- for (const line of lines) {
500
- const trimmed = line.trim();
501
- if (!trimmed)
502
- continue;
503
- try {
504
- const event = JSON.parse(trimmed);
505
- // Claude CLI stream-json wraps content in assistant message events
506
- if (event.type === "assistant" && event.message?.content) {
507
- const content = event.message.content;
508
- if (Array.isArray(content)) {
509
- for (const block of content) {
510
- if (block.type === "text" && block.text) {
511
- fullText += block.text;
512
- // Log analyst reasoning (first line, truncated)
513
- const thought = block.text.trim().split("\n")[0].substring(0, 120);
514
- if (thought) {
515
- console.log(`${ts()} ${label} ${chalk.dim("💭")} ${chalk.dim(thought)}`);
516
- if (taskId)
517
- postLog(taskId, `${PREFIX} [${name}] 💭 ${thought}`);
518
- }
519
- }
520
- else if (block.type === "tool_use") {
521
- toolCalls++;
522
- const toolName = block.name || "unknown";
523
- // Show tool name + input preview (file path, pattern, etc.)
524
- const inputStr = block.input ? JSON.stringify(block.input) : "";
525
- const inputPreview = inputStr.length > 80 ? inputStr.substring(0, 80) + "…" : inputStr;
526
- console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}${inputPreview ? chalk.dim(` ${inputPreview}`) : ""}`);
527
- if (taskId)
528
- postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName} ${inputPreview}`);
529
- }
530
- }
531
- }
532
- else if (typeof content === "string") {
533
- fullText += content;
534
- }
535
- }
536
- else if (event.type === "content_block_delta" && event.delta?.text) {
537
- // Fallback: raw API streaming format (may appear in some CLI versions)
538
- fullText += event.delta.text;
539
- }
540
- else if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
541
- toolCalls++;
542
- const toolName = event.content_block?.name || "unknown";
543
- console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}`);
544
- if (taskId)
545
- postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName}`);
546
- }
547
- else if (event.type === "result" && event.result) {
548
- resultText =
549
- typeof event.result === "string" ? event.result : "";
550
- }
551
- }
552
- catch {
553
- fullText += trimmed + "\n";
554
- }
555
- }
556
- });
557
- const timeout = setTimeout(() => {
558
- timedOut = true;
559
- proc.kill("SIGTERM");
560
- const elapsed = Math.round((Date.now() - startMs) / 1000);
561
- console.log(`${ts()} ${label} ${chalk.yellow("⚠ Timed out")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${fullText.length} chars)`);
562
- if (taskId)
563
- postLog(taskId, `${PREFIX} [${name}] ⚠ Timed out after ${formatElapsed(elapsed)}`);
564
- resolve(resultText || fullText || "");
565
- }, timeoutMs);
566
- proc.on("exit", (code) => {
567
- clearTimeout(timeout);
568
- const elapsed = Math.round((Date.now() - startMs) / 1000);
569
- if (timedOut)
570
- return; // already resolved
571
- const output = resultText || fullText || "";
572
- if (code === 0 && output.length > 0) {
573
- console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
574
- if (taskId)
575
- postLog(taskId, `${PREFIX} [${name}] ✓ Done in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
576
- }
577
- else if (code !== 0) {
578
- console.log(`${ts()} ${label} ${chalk.red(`✗ Exited ${code}`)} after ${formatElapsed(elapsed)} — ${stderrOutput.substring(0, 150) || "no stderr"}`);
579
- if (taskId)
580
- postLog(taskId, `${PREFIX} [${name}] ✗ Exited ${code} after ${formatElapsed(elapsed)}`);
581
- }
582
- else {
583
- console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls)`);
584
- if (taskId)
585
- postLog(taskId, `${PREFIX} [${name}] ⚠ Empty output after ${formatElapsed(elapsed)}`);
586
- }
587
- resolve(output);
588
- });
589
- proc.on("error", (err) => {
590
- clearTimeout(timeout);
591
- console.log(`${ts()} ${label} ${chalk.red("✗ Spawn failed:")} ${err.message}`);
592
- resolve("");
593
- });
594
- });
595
- }
596
- /**
597
- * Run an analyst agent via Vercel AI SDK with tool access to the cloned repo.
598
- * Used for non-Anthropic providers (OpenAI, Google, Ollama) that can't use Claude CLI.
599
- * Returns the analyst's report text, or an empty string on failure.
600
- */
601
- async function runAnalystWithSdk(name, provider, model, apiKey, prompt, repoPath, timeoutMs = 900_000, taskId) {
602
- const label = chalk.blue(`[${name}]`);
603
- const modelLabel = chalk.yellow(`${provider}/${model}`);
604
- const startMs = Date.now();
605
- console.log(`${ts()} ${label} Starting analyst using ${modelLabel} via AI SDK...`);
606
- if (taskId)
607
- postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${provider}/${model} via AI SDK...`);
608
- try {
609
- const result = await generateTextWithTools({
610
- provider,
611
- model,
612
- apiKey,
613
- prompt,
614
- workingDir: repoPath,
615
- maxTokens: 16384,
616
- temperature: 0.3,
617
- timeoutMs,
618
- maxSteps: 20, // Allow thorough exploration
619
- enableTools: true,
620
- });
621
- const elapsed = Math.round((Date.now() - startMs) / 1000);
622
- if (result && result.length > 0) {
623
- console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${elapsed}s (${result.length} chars)`);
624
- return result;
625
- }
626
- console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${elapsed}s`);
627
- return "";
628
- }
629
- catch (error) {
630
- const elapsed = Math.round((Date.now() - startMs) / 1000);
631
- const errMsg = error instanceof Error ? error.message : String(error);
632
- console.log(`${ts()} ${label} ${chalk.red(`✗ Failed`)} after ${elapsed}s: ${errMsg.substring(0, 150)}`);
633
- return "";
634
- }
635
- }
636
- /** Analyst prompt templates */
637
- const CODEBASE_ANALYST_PROMPT = `You are a codebase analyst. Your job is to explore this repository using tools and report what you find.
638
-
639
- IMPORTANT: You MUST use tools to explore the repository. Do NOT guess or make assumptions.
640
-
641
- Step 1: Run Glob with pattern "**/*" to see the top-level directory structure.
642
- Step 2: Read key files: package.json, tsconfig.json, README.md, .env.example, or equivalents.
643
- Step 3: Run Glob on src/ or the main source directory to understand the code layout.
644
- Step 4: Read 2-3 representative source files to understand patterns and frameworks.
645
-
646
- After exploring, write a report covering:
647
- 1. Directory structure and organization
648
- 2. Languages, frameworks, and key dependencies (from package.json, requirements.txt, etc.)
649
- 3. Existing test files and testing patterns (search for test/, __tests__, *.test.*, *.spec.*)
650
- 4. CI/CD configuration (search for .github/workflows/, Jenkinsfile, etc.)
651
- 5. Configuration files and environment setup
652
-
653
- Keep your report under 2000 words. Only report facts you verified with tools.`;
654
- function makeRequirementsAnalystPrompt(task) {
655
- return `You are a requirements analyst. Analyze the following task and the repository to identify what needs to be built.
656
-
657
- Task: ${task.summary}
658
- ${task.description ? `\nDescription:\n${task.description}` : ""}
659
-
660
- IMPORTANT: You MUST use tools to understand the existing codebase before analyzing requirements.
661
-
662
- Step 1: Run Glob with pattern "**/*" to see what already exists in the repository.
663
- Step 2: Read any existing README, docs, or configuration to understand the current state.
664
- Step 3: Search for any code related to the task requirements using Grep.
665
-
666
- After exploring, write a report covering:
667
- 1. Explicit acceptance criteria — what MUST be built based on the description
668
- 2. Implicit requirements — what's assumed but not stated (auth, error handling, etc.)
669
- 3. What already exists vs what needs to be created (based on your file exploration)
670
- 4. Ambiguities that could lead to wrong implementation
671
- 5. Suggested components/modules and which persona should own each
672
-
673
- Keep your report under 1500 words.`;
674
- }
675
- function makeRiskAssessorPrompt(task) {
676
- return `You are a risk assessor. Your job is to search this repository for potential risks and blockers for a development task.
677
-
678
- Task: ${task.summary}
679
- ${task.description ? `\nDescription:\n${task.description}` : ""}
680
-
681
- IMPORTANT: You MUST use tools to search the codebase. Do NOT guess file paths or make assumptions.
682
-
683
- Step 1: Run Glob with pattern "**/*" to see the full repository structure.
684
- Step 2: Use Grep to search for code related to the task (relevant keywords, APIs, components).
685
- Step 3: Read files that are likely to be modified or affected by this task.
686
- Step 4: Search for existing tests (Grep for "test", "spec", "describe", "it(") to find test coverage.
687
-
688
- After exploring, write a report covering:
689
- 1. Specific files that will need to be modified (exact paths from your search)
690
- 2. Files with heavy coupling or shared dependencies (imports you found)
691
- 3. Existing tests that will need updating (exact file paths)
692
- 4. Environment, config, or migration requirements
693
- 5. Deployment or infrastructure risks
694
-
695
- Keep your report under 1500 words. Only report facts you verified with tools.`;
696
- }
697
- /**
698
- * Run team planning: spawn 3 parallel analyst agents, then synthesize
699
- * their reports into an enhanced planning prompt for the final planner.
700
- *
701
- * Falls back to single-agent planning if anything goes wrong.
702
- */
703
- /**
704
- * Run team analysis: spawn 3 parallel analyst agents once, then return
705
- * an enhanced prompt with their reports appended. Returns null if all
706
- * analysts fail (caller should fall back to basePrompt).
707
- *
708
- * This runs ONCE before the planner-critic loop — analyst prompts don't
709
- * include critic feedback, so re-running them on iteration 2+ is waste.
710
- */
711
- async function runTeamAnalysis(task, basePrompt, claudePath, model, env, repoPath, taskId, startTime, provider = "anthropic", providerApiKey) {
712
- const taskLabel = chalk.cyan(taskId.slice(0, 8));
713
- console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — running 3 analysts in parallel...`);
714
- await postLog(taskId, `${PREFIX} Team planning: running codebase, requirements, and risk analysts in parallel...`);
715
- await postProgress(taskId, "reading_repo", Math.round((Date.now() - startTime) / 1000), "Running parallel analysis agents...", 0, 0);
716
- const analysisModel = model;
717
- const MAX_TEAM_RETRIES = 3;
718
- const useCliAnalysts = provider === "anthropic";
719
- // Helper: dispatch analyst to Claude CLI or AI SDK based on provider
720
- const dispatchAnalyst = (name, prompt) => {
721
- if (useCliAnalysts) {
722
- return runAnalyst(name, claudePath, analysisModel, prompt, repoPath, env, 900_000, taskId);
723
- }
724
- if (!providerApiKey) {
725
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No API key for ${provider} analysts, skipping ${name}`);
726
- return Promise.resolve("");
727
- }
728
- return runAnalystWithSdk(name, provider, analysisModel, providerApiKey, prompt, repoPath, 900_000, taskId);
729
- };
730
- let codebaseReport = "";
731
- let requirementsReport = "";
732
- let riskReport = "";
733
- for (let attempt = 1; attempt <= MAX_TEAM_RETRIES; attempt++) {
734
- if (attempt > 1) {
735
- console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — retry ${attempt}/${MAX_TEAM_RETRIES}...`);
736
- await postLog(taskId, `${PREFIX} Team analysis retry ${attempt}/${MAX_TEAM_RETRIES}...`);
737
- }
738
- const [codebaseResult, requirementsResult, riskResult] = await Promise.allSettled([
739
- codebaseReport ? Promise.resolve(codebaseReport) : dispatchAnalyst("Codebase", CODEBASE_ANALYST_PROMPT),
740
- requirementsReport ? Promise.resolve(requirementsReport) : dispatchAnalyst("Requirements", makeRequirementsAnalystPrompt(task)),
741
- riskReport ? Promise.resolve(riskReport) : dispatchAnalyst("Risk", makeRiskAssessorPrompt(task)),
742
- ]);
743
- if (!codebaseReport && codebaseResult.status === "fulfilled") {
744
- codebaseReport = codebaseResult.value;
745
- }
746
- if (!requirementsReport && requirementsResult.status === "fulfilled") {
747
- requirementsReport = requirementsResult.value;
748
- }
749
- if (!riskReport && riskResult.status === "fulfilled") {
750
- riskReport = riskResult.value;
751
- }
752
- const successCount = [codebaseReport, requirementsReport, riskReport].filter((r) => r.length > 0).length;
753
- const analysisElapsed = Math.round((Date.now() - startTime) / 1000);
754
- console.log(`${ts()} ${taskLabel} Analysis attempt ${attempt}: ${successCount}/3 reports (${analysisElapsed}s)`);
755
- if (successCount > 0) {
756
- console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Analysis complete: ${successCount}/3 reports (${analysisElapsed}s)`);
757
- await postLog(taskId, `${PREFIX} Team analysis complete: ${successCount}/3 reports in ${formatElapsed(analysisElapsed)}. Synthesizing plan...`);
758
- await postProgress(taskId, "analyzing", analysisElapsed, "Synthesizing analysis reports...", 0, 0);
759
- break;
760
- }
761
- if (attempt === MAX_TEAM_RETRIES) {
762
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} All analysts failed after ${MAX_TEAM_RETRIES} attempts, falling back to single-agent planning`);
763
- await postLog(taskId, `${PREFIX} All analysis agents failed after ${MAX_TEAM_RETRIES} attempts — falling back to single-agent planning`);
764
- return null;
765
- }
766
- }
767
- // Build enhanced prompt with analysis reports
768
- const sections = [];
769
- if (codebaseReport) {
770
- sections.push(`## Codebase Analysis (from automated analysis)\n\n${codebaseReport}`);
771
- }
772
- if (requirementsReport) {
773
- sections.push(`## Requirements Analysis\n\n${requirementsReport}`);
774
- }
775
- if (riskReport) {
776
- sections.push(`## Risk Assessment\n\n${riskReport}`);
777
- }
778
- return (basePrompt +
779
- "\n\n" +
780
- sections.join("\n\n") +
781
- "\n\n" +
782
- "Use these analyses to produce a more accurate execution plan.\n" +
783
- "Prefer actual file paths discovered in the codebase analysis over guessed paths.");
784
- }
785
450
  /**
786
451
  * Run planning for a task with Planner-Critic validation loop.
787
452
  *
788
453
  * Flow:
789
454
  * 1. Fetch planning prompt from cloud API
790
- * 2. Run Claude CLI to generate plan
791
- * 3. Parse plan, apply file cap (max 5 files per story)
792
- * 4. Run critic validation via Claude CLI
793
- * 5. If critic approves (score >= 80): post validated plan to API
794
- * 6. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
795
- * 7. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
796
- * 8. If no plan scored >= 50: fail the task
455
+ * 2. Clone target repo (if available) so planner can explore with tools
456
+ * 3. Run Claude CLI to generate plan
457
+ * 4. Parse plan, apply file cap (max 5 files per story)
458
+ * 5. Run critic validation via Claude CLI
459
+ * 6. If critic approves (score >= 80): post validated plan to API
460
+ * 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
461
+ * 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
462
+ * 9. If no plan scored >= 50: fail the task
797
463
  */
798
464
  export async function planTask(task, config, credentials) {
799
465
  const taskLabel = chalk.cyan(task.id.slice(0, 8));
@@ -816,12 +482,9 @@ export async function planTask(task, config, credentials) {
816
482
  const startTime = Date.now();
817
483
  // PRD for critic validation: use task description, fall back to summary
818
484
  const prd = task.description || task.summary;
819
- // Run team analysis ONCE before the planner-critic loop.
820
- // Analyst prompts don't include critic feedback, so re-running them
821
- // on iteration 2+ wastes compute (they'd produce the same reports).
485
+ // Clone target repo so the planner can explore with tools
822
486
  let repoPath = null;
823
- let enhancedBasePrompt = basePrompt;
824
- if (config.teamPlanningEnabled && task.githubRepo) {
487
+ if (task.githubRepo) {
825
488
  const scmProvider = task.scmProvider || "github";
826
489
  const scmToken = scmProvider === "bitbucket"
827
490
  ? config.bitbucketToken
@@ -832,23 +495,11 @@ export async function planTask(task, config, credentials) {
832
495
  repoPath = await cloneTargetRepo(task.githubRepo, scmToken, scmProvider, task.id);
833
496
  }
834
497
  else {
835
- console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, skipping team planning`);
836
- }
837
- if (repoPath) {
838
- const analystModel = config.analystModel || cliModel;
839
- const analystBackend = isAnthropicPlanning ? "Claude CLI" : `${provider} AI SDK`;
840
- console.log(`${ts()} ${taskLabel} Analysts using model: ${chalk.yellow(analystModel)} via ${chalk.dim(analystBackend)} (planner: ${chalk.yellow(cliModel)})`);
841
- const analysisResult = await runTeamAnalysis(task, basePrompt, claudePath, analystModel, cleanEnv, repoPath, task.id, startTime, provider, providerApiKey);
842
- if (analysisResult) {
843
- enhancedBasePrompt = analysisResult;
844
- }
845
- // else: all analysts failed, fall back to basePrompt
498
+ console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, planner will run without repo access`);
846
499
  }
847
500
  }
848
501
  // 2. Planner-Critic iteration loop
849
- // Use enhancedBasePrompt (with analyst reports) as the base for all iterations.
850
- // Critic feedback gets appended on re-plan, but analyst reports are fixed.
851
- let currentPrompt = enhancedBasePrompt;
502
+ let currentPrompt = basePrompt;
852
503
  let bestPlan = null;
853
504
  let bestScore = 0;
854
505
  // Track critic history across iterations for analytics
@@ -870,9 +521,7 @@ export async function planTask(task, config, credentials) {
870
521
  let rawOutput;
871
522
  try {
872
523
  if (isAnthropicPlanning) {
873
- // Disable tools when analysts already provided repo context
874
- const hasAnalystContext = enhancedBasePrompt !== basePrompt;
875
- rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, hasAnalystContext);
524
+ rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, repoPath || undefined);
876
525
  }
877
526
  else {
878
527
  if (!providerApiKey) {
@@ -997,7 +646,7 @@ export async function planTask(task, config, credentials) {
997
646
  // 2f. Rejected — append critic feedback for next iteration
998
647
  if (iteration < MAX_ITERATIONS) {
999
648
  const feedback = formatCriticFeedback(criticResult);
1000
- currentPrompt = enhancedBasePrompt + "\n\n" + feedback;
649
+ currentPrompt = basePrompt + "\n\n" + feedback;
1001
650
  const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
1002
651
  console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
1003
652
  await postLog(task.id, msg);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workermill/agent",
3
- "version": "0.8.1",
3
+ "version": "0.8.2",
4
4
  "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",