@workermill/agent 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +0 -4
- package/dist/config.js +0 -4
- package/dist/planner.d.ts +8 -7
- package/dist/planner.js +20 -371
- package/package.json +1 -1
package/dist/config.d.ts
CHANGED
|
@@ -16,8 +16,6 @@ export interface AgentConfig {
|
|
|
16
16
|
bitbucketToken: string;
|
|
17
17
|
gitlabToken: string;
|
|
18
18
|
workerImage: string;
|
|
19
|
-
teamPlanningEnabled: boolean;
|
|
20
|
-
analystModel?: string;
|
|
21
19
|
}
|
|
22
20
|
export interface FileConfig {
|
|
23
21
|
apiUrl: string;
|
|
@@ -32,8 +30,6 @@ export interface FileConfig {
|
|
|
32
30
|
gitlab: string;
|
|
33
31
|
};
|
|
34
32
|
workerImage: string;
|
|
35
|
-
teamPlanningEnabled?: boolean;
|
|
36
|
-
analystModel?: string;
|
|
37
33
|
setupCompletedAt: string;
|
|
38
34
|
}
|
|
39
35
|
export declare function getConfigDir(): string;
|
package/dist/config.js
CHANGED
|
@@ -75,8 +75,6 @@ export function loadConfigFromFile() {
|
|
|
75
75
|
bitbucketToken: fc.tokens?.bitbucket || "",
|
|
76
76
|
gitlabToken: fc.tokens?.gitlab || "",
|
|
77
77
|
workerImage,
|
|
78
|
-
teamPlanningEnabled: fc.teamPlanningEnabled ?? true,
|
|
79
|
-
analystModel: fc.analystModel,
|
|
80
78
|
};
|
|
81
79
|
}
|
|
82
80
|
/**
|
|
@@ -121,8 +119,6 @@ export function loadConfig() {
|
|
|
121
119
|
bitbucketToken: process.env.BITBUCKET_TOKEN || "",
|
|
122
120
|
gitlabToken: process.env.GITLAB_TOKEN || "",
|
|
123
121
|
workerImage: process.env.WORKER_IMAGE || "workermill-worker:local",
|
|
124
|
-
teamPlanningEnabled: process.env.TEAM_PLANNING_ENABLED !== "false",
|
|
125
|
-
analystModel: process.env.ANALYST_MODEL,
|
|
126
122
|
};
|
|
127
123
|
}
|
|
128
124
|
/**
|
package/dist/planner.d.ts
CHANGED
|
@@ -28,12 +28,13 @@ export interface PlanningTask {
|
|
|
28
28
|
*
|
|
29
29
|
* Flow:
|
|
30
30
|
* 1. Fetch planning prompt from cloud API
|
|
31
|
-
* 2.
|
|
32
|
-
* 3.
|
|
33
|
-
* 4.
|
|
34
|
-
* 5.
|
|
35
|
-
* 6. If critic
|
|
36
|
-
* 7.
|
|
37
|
-
* 8.
|
|
31
|
+
* 2. Clone target repo (if available) so planner can explore with tools
|
|
32
|
+
* 3. Run Claude CLI to generate plan
|
|
33
|
+
* 4. Parse plan, apply file cap (max 5 files per story)
|
|
34
|
+
* 5. Run critic validation via Claude CLI
|
|
35
|
+
* 6. If critic approves (score >= 80): post validated plan to API
|
|
36
|
+
* 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
37
|
+
* 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
38
|
+
* 9. If no plan scored >= 50: fail the task
|
|
38
39
|
*/
|
|
39
40
|
export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;
|
package/dist/planner.js
CHANGED
|
@@ -157,7 +157,7 @@ function phaseLabel(phase, elapsed) {
|
|
|
157
157
|
* Run Claude CLI with stream-json output, posting real-time phase milestones
|
|
158
158
|
* to the cloud dashboard — identical terminal experience to cloud planning.
|
|
159
159
|
*/
|
|
160
|
-
function runClaudeCli(claudePath, model, prompt, env, taskId, startTime,
|
|
160
|
+
function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, cwd) {
|
|
161
161
|
const taskLabel = chalk.cyan(taskId.slice(0, 8));
|
|
162
162
|
return new Promise((resolve, reject) => {
|
|
163
163
|
const cliArgs = [
|
|
@@ -167,12 +167,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disable
|
|
|
167
167
|
"--model", model,
|
|
168
168
|
"--permission-mode", "bypassPermissions",
|
|
169
169
|
];
|
|
170
|
-
// When analysts already explored the repo, strip tools so the planner
|
|
171
|
-
// doesn't waste turns re-exploring — it has all context in the prompt.
|
|
172
|
-
if (disableTools) {
|
|
173
|
-
cliArgs.push("--allowedTools", "");
|
|
174
|
-
}
|
|
175
170
|
const proc = spawn(claudePath, cliArgs, {
|
|
171
|
+
cwd,
|
|
176
172
|
env,
|
|
177
173
|
stdio: ["pipe", "pipe", "pipe"],
|
|
178
174
|
});
|
|
@@ -422,15 +418,15 @@ function buildCloneUrl(repo, token, scmProvider) {
|
|
|
422
418
|
}
|
|
423
419
|
}
|
|
424
420
|
/**
|
|
425
|
-
* Clone the target repo to a temp directory
|
|
426
|
-
* Returns the path on success, or null on failure
|
|
421
|
+
* Clone the target repo to a temp directory so the planner can explore with tools.
|
|
422
|
+
* Returns the path on success, or null on failure.
|
|
427
423
|
*/
|
|
428
424
|
async function cloneTargetRepo(repo, token, scmProvider, taskId) {
|
|
429
425
|
const taskLabel = chalk.cyan(taskId.slice(0, 8));
|
|
430
426
|
const tmpDir = `/tmp/workermill-planning-${taskId.slice(0, 8)}-${Date.now()}`;
|
|
431
427
|
try {
|
|
432
428
|
const cloneUrl = buildCloneUrl(repo, token, scmProvider);
|
|
433
|
-
console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for
|
|
429
|
+
console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for planner...")}`);
|
|
434
430
|
execSync(`git clone --depth 1 --single-branch "${cloneUrl}" "${tmpDir}"`, {
|
|
435
431
|
stdio: "ignore",
|
|
436
432
|
timeout: 60_000,
|
|
@@ -440,7 +436,7 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
|
|
|
440
436
|
}
|
|
441
437
|
catch (error) {
|
|
442
438
|
const errMsg = error instanceof Error ? error.message : String(error);
|
|
443
|
-
console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed,
|
|
439
|
+
console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, planner will run without repo access: ${errMsg.substring(0, 100)}`);
|
|
444
440
|
// Cleanup partial clone
|
|
445
441
|
try {
|
|
446
442
|
execSync(`rm -rf "${tmpDir}"`, { stdio: "ignore" });
|
|
@@ -451,349 +447,19 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
|
|
|
451
447
|
return null;
|
|
452
448
|
}
|
|
453
449
|
}
|
|
454
|
-
/**
|
|
455
|
-
* Run an analyst agent via Claude CLI with tool access to the cloned repo.
|
|
456
|
-
* Returns the analyst's report text, or an empty string on failure.
|
|
457
|
-
*/
|
|
458
|
-
function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs = 900_000, taskId) {
|
|
459
|
-
const label = chalk.blue(`[${name}]`);
|
|
460
|
-
const modelLabel = chalk.yellow(model);
|
|
461
|
-
return new Promise((resolve) => {
|
|
462
|
-
console.log(`${ts()} ${label} Starting analyst using ${modelLabel}...`);
|
|
463
|
-
if (taskId) {
|
|
464
|
-
postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${model}...`);
|
|
465
|
-
}
|
|
466
|
-
const proc = spawn(claudePath, [
|
|
467
|
-
"--print",
|
|
468
|
-
"--verbose",
|
|
469
|
-
"--output-format", "stream-json",
|
|
470
|
-
"--model", model,
|
|
471
|
-
"--permission-mode", "bypassPermissions",
|
|
472
|
-
], {
|
|
473
|
-
cwd: repoPath,
|
|
474
|
-
env,
|
|
475
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
476
|
-
});
|
|
477
|
-
// Write prompt via stdin (same as runClaudeCli)
|
|
478
|
-
proc.stdin.write(prompt);
|
|
479
|
-
proc.stdin.end();
|
|
480
|
-
let resultText = "";
|
|
481
|
-
let fullText = "";
|
|
482
|
-
let stderrOutput = "";
|
|
483
|
-
let lineBuffer = "";
|
|
484
|
-
let toolCalls = 0;
|
|
485
|
-
let timedOut = false;
|
|
486
|
-
const startMs = Date.now();
|
|
487
|
-
proc.stderr.on("data", (chunk) => {
|
|
488
|
-
const text = chunk.toString();
|
|
489
|
-
stderrOutput += text;
|
|
490
|
-
// Show stderr in real-time so we can see what's happening
|
|
491
|
-
for (const line of text.split("\n").filter((l) => l.trim())) {
|
|
492
|
-
console.log(`${ts()} ${label} ${chalk.red("stderr:")} ${line.trim()}`);
|
|
493
|
-
}
|
|
494
|
-
});
|
|
495
|
-
proc.stdout.on("data", (data) => {
|
|
496
|
-
lineBuffer += data.toString();
|
|
497
|
-
const lines = lineBuffer.split("\n");
|
|
498
|
-
lineBuffer = lines.pop() || "";
|
|
499
|
-
for (const line of lines) {
|
|
500
|
-
const trimmed = line.trim();
|
|
501
|
-
if (!trimmed)
|
|
502
|
-
continue;
|
|
503
|
-
try {
|
|
504
|
-
const event = JSON.parse(trimmed);
|
|
505
|
-
// Claude CLI stream-json wraps content in assistant message events
|
|
506
|
-
if (event.type === "assistant" && event.message?.content) {
|
|
507
|
-
const content = event.message.content;
|
|
508
|
-
if (Array.isArray(content)) {
|
|
509
|
-
for (const block of content) {
|
|
510
|
-
if (block.type === "text" && block.text) {
|
|
511
|
-
fullText += block.text;
|
|
512
|
-
// Log analyst reasoning (first line, truncated)
|
|
513
|
-
const thought = block.text.trim().split("\n")[0].substring(0, 120);
|
|
514
|
-
if (thought) {
|
|
515
|
-
console.log(`${ts()} ${label} ${chalk.dim("💭")} ${chalk.dim(thought)}`);
|
|
516
|
-
if (taskId)
|
|
517
|
-
postLog(taskId, `${PREFIX} [${name}] 💭 ${thought}`);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
else if (block.type === "tool_use") {
|
|
521
|
-
toolCalls++;
|
|
522
|
-
const toolName = block.name || "unknown";
|
|
523
|
-
// Show tool name + input preview (file path, pattern, etc.)
|
|
524
|
-
const inputStr = block.input ? JSON.stringify(block.input) : "";
|
|
525
|
-
const inputPreview = inputStr.length > 80 ? inputStr.substring(0, 80) + "…" : inputStr;
|
|
526
|
-
console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}${inputPreview ? chalk.dim(` ${inputPreview}`) : ""}`);
|
|
527
|
-
if (taskId)
|
|
528
|
-
postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName} ${inputPreview}`);
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
|
-
else if (typeof content === "string") {
|
|
533
|
-
fullText += content;
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
else if (event.type === "content_block_delta" && event.delta?.text) {
|
|
537
|
-
// Fallback: raw API streaming format (may appear in some CLI versions)
|
|
538
|
-
fullText += event.delta.text;
|
|
539
|
-
}
|
|
540
|
-
else if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
|
|
541
|
-
toolCalls++;
|
|
542
|
-
const toolName = event.content_block?.name || "unknown";
|
|
543
|
-
console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}`);
|
|
544
|
-
if (taskId)
|
|
545
|
-
postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName}`);
|
|
546
|
-
}
|
|
547
|
-
else if (event.type === "result" && event.result) {
|
|
548
|
-
resultText =
|
|
549
|
-
typeof event.result === "string" ? event.result : "";
|
|
550
|
-
}
|
|
551
|
-
}
|
|
552
|
-
catch {
|
|
553
|
-
fullText += trimmed + "\n";
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
});
|
|
557
|
-
const timeout = setTimeout(() => {
|
|
558
|
-
timedOut = true;
|
|
559
|
-
proc.kill("SIGTERM");
|
|
560
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
561
|
-
console.log(`${ts()} ${label} ${chalk.yellow("⚠ Timed out")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${fullText.length} chars)`);
|
|
562
|
-
if (taskId)
|
|
563
|
-
postLog(taskId, `${PREFIX} [${name}] ⚠ Timed out after ${formatElapsed(elapsed)}`);
|
|
564
|
-
resolve(resultText || fullText || "");
|
|
565
|
-
}, timeoutMs);
|
|
566
|
-
proc.on("exit", (code) => {
|
|
567
|
-
clearTimeout(timeout);
|
|
568
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
569
|
-
if (timedOut)
|
|
570
|
-
return; // already resolved
|
|
571
|
-
const output = resultText || fullText || "";
|
|
572
|
-
if (code === 0 && output.length > 0) {
|
|
573
|
-
console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
|
|
574
|
-
if (taskId)
|
|
575
|
-
postLog(taskId, `${PREFIX} [${name}] ✓ Done in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
|
|
576
|
-
}
|
|
577
|
-
else if (code !== 0) {
|
|
578
|
-
console.log(`${ts()} ${label} ${chalk.red(`✗ Exited ${code}`)} after ${formatElapsed(elapsed)} — ${stderrOutput.substring(0, 150) || "no stderr"}`);
|
|
579
|
-
if (taskId)
|
|
580
|
-
postLog(taskId, `${PREFIX} [${name}] ✗ Exited ${code} after ${formatElapsed(elapsed)}`);
|
|
581
|
-
}
|
|
582
|
-
else {
|
|
583
|
-
console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls)`);
|
|
584
|
-
if (taskId)
|
|
585
|
-
postLog(taskId, `${PREFIX} [${name}] ⚠ Empty output after ${formatElapsed(elapsed)}`);
|
|
586
|
-
}
|
|
587
|
-
resolve(output);
|
|
588
|
-
});
|
|
589
|
-
proc.on("error", (err) => {
|
|
590
|
-
clearTimeout(timeout);
|
|
591
|
-
console.log(`${ts()} ${label} ${chalk.red("✗ Spawn failed:")} ${err.message}`);
|
|
592
|
-
resolve("");
|
|
593
|
-
});
|
|
594
|
-
});
|
|
595
|
-
}
|
|
596
|
-
/**
|
|
597
|
-
* Run an analyst agent via Vercel AI SDK with tool access to the cloned repo.
|
|
598
|
-
* Used for non-Anthropic providers (OpenAI, Google, Ollama) that can't use Claude CLI.
|
|
599
|
-
* Returns the analyst's report text, or an empty string on failure.
|
|
600
|
-
*/
|
|
601
|
-
async function runAnalystWithSdk(name, provider, model, apiKey, prompt, repoPath, timeoutMs = 900_000, taskId) {
|
|
602
|
-
const label = chalk.blue(`[${name}]`);
|
|
603
|
-
const modelLabel = chalk.yellow(`${provider}/${model}`);
|
|
604
|
-
const startMs = Date.now();
|
|
605
|
-
console.log(`${ts()} ${label} Starting analyst using ${modelLabel} via AI SDK...`);
|
|
606
|
-
if (taskId)
|
|
607
|
-
postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${provider}/${model} via AI SDK...`);
|
|
608
|
-
try {
|
|
609
|
-
const result = await generateTextWithTools({
|
|
610
|
-
provider,
|
|
611
|
-
model,
|
|
612
|
-
apiKey,
|
|
613
|
-
prompt,
|
|
614
|
-
workingDir: repoPath,
|
|
615
|
-
maxTokens: 16384,
|
|
616
|
-
temperature: 0.3,
|
|
617
|
-
timeoutMs,
|
|
618
|
-
maxSteps: 20, // Allow thorough exploration
|
|
619
|
-
enableTools: true,
|
|
620
|
-
});
|
|
621
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
622
|
-
if (result && result.length > 0) {
|
|
623
|
-
console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${elapsed}s (${result.length} chars)`);
|
|
624
|
-
return result;
|
|
625
|
-
}
|
|
626
|
-
console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${elapsed}s`);
|
|
627
|
-
return "";
|
|
628
|
-
}
|
|
629
|
-
catch (error) {
|
|
630
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
631
|
-
const errMsg = error instanceof Error ? error.message : String(error);
|
|
632
|
-
console.log(`${ts()} ${label} ${chalk.red(`✗ Failed`)} after ${elapsed}s: ${errMsg.substring(0, 150)}`);
|
|
633
|
-
return "";
|
|
634
|
-
}
|
|
635
|
-
}
|
|
636
|
-
/** Analyst prompt templates */
|
|
637
|
-
const CODEBASE_ANALYST_PROMPT = `You are a codebase analyst. Your job is to explore this repository using tools and report what you find.
|
|
638
|
-
|
|
639
|
-
IMPORTANT: You MUST use tools to explore the repository. Do NOT guess or make assumptions.
|
|
640
|
-
|
|
641
|
-
Step 1: Run Glob with pattern "**/*" to see the top-level directory structure.
|
|
642
|
-
Step 2: Read key files: package.json, tsconfig.json, README.md, .env.example, or equivalents.
|
|
643
|
-
Step 3: Run Glob on src/ or the main source directory to understand the code layout.
|
|
644
|
-
Step 4: Read 2-3 representative source files to understand patterns and frameworks.
|
|
645
|
-
|
|
646
|
-
After exploring, write a report covering:
|
|
647
|
-
1. Directory structure and organization
|
|
648
|
-
2. Languages, frameworks, and key dependencies (from package.json, requirements.txt, etc.)
|
|
649
|
-
3. Existing test files and testing patterns (search for test/, __tests__, *.test.*, *.spec.*)
|
|
650
|
-
4. CI/CD configuration (search for .github/workflows/, Jenkinsfile, etc.)
|
|
651
|
-
5. Configuration files and environment setup
|
|
652
|
-
|
|
653
|
-
Keep your report under 2000 words. Only report facts you verified with tools.`;
|
|
654
|
-
function makeRequirementsAnalystPrompt(task) {
|
|
655
|
-
return `You are a requirements analyst. Analyze the following task and the repository to identify what needs to be built.
|
|
656
|
-
|
|
657
|
-
Task: ${task.summary}
|
|
658
|
-
${task.description ? `\nDescription:\n${task.description}` : ""}
|
|
659
|
-
|
|
660
|
-
IMPORTANT: You MUST use tools to understand the existing codebase before analyzing requirements.
|
|
661
|
-
|
|
662
|
-
Step 1: Run Glob with pattern "**/*" to see what already exists in the repository.
|
|
663
|
-
Step 2: Read any existing README, docs, or configuration to understand the current state.
|
|
664
|
-
Step 3: Search for any code related to the task requirements using Grep.
|
|
665
|
-
|
|
666
|
-
After exploring, write a report covering:
|
|
667
|
-
1. Explicit acceptance criteria — what MUST be built based on the description
|
|
668
|
-
2. Implicit requirements — what's assumed but not stated (auth, error handling, etc.)
|
|
669
|
-
3. What already exists vs what needs to be created (based on your file exploration)
|
|
670
|
-
4. Ambiguities that could lead to wrong implementation
|
|
671
|
-
5. Suggested components/modules and which persona should own each
|
|
672
|
-
|
|
673
|
-
Keep your report under 1500 words.`;
|
|
674
|
-
}
|
|
675
|
-
function makeRiskAssessorPrompt(task) {
|
|
676
|
-
return `You are a risk assessor. Your job is to search this repository for potential risks and blockers for a development task.
|
|
677
|
-
|
|
678
|
-
Task: ${task.summary}
|
|
679
|
-
${task.description ? `\nDescription:\n${task.description}` : ""}
|
|
680
|
-
|
|
681
|
-
IMPORTANT: You MUST use tools to search the codebase. Do NOT guess file paths or make assumptions.
|
|
682
|
-
|
|
683
|
-
Step 1: Run Glob with pattern "**/*" to see the full repository structure.
|
|
684
|
-
Step 2: Use Grep to search for code related to the task (relevant keywords, APIs, components).
|
|
685
|
-
Step 3: Read files that are likely to be modified or affected by this task.
|
|
686
|
-
Step 4: Search for existing tests (Grep for "test", "spec", "describe", "it(") to find test coverage.
|
|
687
|
-
|
|
688
|
-
After exploring, write a report covering:
|
|
689
|
-
1. Specific files that will need to be modified (exact paths from your search)
|
|
690
|
-
2. Files with heavy coupling or shared dependencies (imports you found)
|
|
691
|
-
3. Existing tests that will need updating (exact file paths)
|
|
692
|
-
4. Environment, config, or migration requirements
|
|
693
|
-
5. Deployment or infrastructure risks
|
|
694
|
-
|
|
695
|
-
Keep your report under 1500 words. Only report facts you verified with tools.`;
|
|
696
|
-
}
|
|
697
|
-
/**
|
|
698
|
-
* Run team planning: spawn 3 parallel analyst agents, then synthesize
|
|
699
|
-
* their reports into an enhanced planning prompt for the final planner.
|
|
700
|
-
*
|
|
701
|
-
* Falls back to single-agent planning if anything goes wrong.
|
|
702
|
-
*/
|
|
703
|
-
/**
|
|
704
|
-
* Run team analysis: spawn 3 parallel analyst agents once, then return
|
|
705
|
-
* an enhanced prompt with their reports appended. Returns null if all
|
|
706
|
-
* analysts fail (caller should fall back to basePrompt).
|
|
707
|
-
*
|
|
708
|
-
* This runs ONCE before the planner-critic loop — analyst prompts don't
|
|
709
|
-
* include critic feedback, so re-running them on iteration 2+ is waste.
|
|
710
|
-
*/
|
|
711
|
-
async function runTeamAnalysis(task, basePrompt, claudePath, model, env, repoPath, taskId, startTime, provider = "anthropic", providerApiKey) {
|
|
712
|
-
const taskLabel = chalk.cyan(taskId.slice(0, 8));
|
|
713
|
-
console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — running 3 analysts in parallel...`);
|
|
714
|
-
await postLog(taskId, `${PREFIX} Team planning: running codebase, requirements, and risk analysts in parallel...`);
|
|
715
|
-
await postProgress(taskId, "reading_repo", Math.round((Date.now() - startTime) / 1000), "Running parallel analysis agents...", 0, 0);
|
|
716
|
-
const analysisModel = model;
|
|
717
|
-
const MAX_TEAM_RETRIES = 3;
|
|
718
|
-
const useCliAnalysts = provider === "anthropic";
|
|
719
|
-
// Helper: dispatch analyst to Claude CLI or AI SDK based on provider
|
|
720
|
-
const dispatchAnalyst = (name, prompt) => {
|
|
721
|
-
if (useCliAnalysts) {
|
|
722
|
-
return runAnalyst(name, claudePath, analysisModel, prompt, repoPath, env, 900_000, taskId);
|
|
723
|
-
}
|
|
724
|
-
if (!providerApiKey) {
|
|
725
|
-
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No API key for ${provider} analysts, skipping ${name}`);
|
|
726
|
-
return Promise.resolve("");
|
|
727
|
-
}
|
|
728
|
-
return runAnalystWithSdk(name, provider, analysisModel, providerApiKey, prompt, repoPath, 900_000, taskId);
|
|
729
|
-
};
|
|
730
|
-
let codebaseReport = "";
|
|
731
|
-
let requirementsReport = "";
|
|
732
|
-
let riskReport = "";
|
|
733
|
-
for (let attempt = 1; attempt <= MAX_TEAM_RETRIES; attempt++) {
|
|
734
|
-
if (attempt > 1) {
|
|
735
|
-
console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — retry ${attempt}/${MAX_TEAM_RETRIES}...`);
|
|
736
|
-
await postLog(taskId, `${PREFIX} Team analysis retry ${attempt}/${MAX_TEAM_RETRIES}...`);
|
|
737
|
-
}
|
|
738
|
-
const [codebaseResult, requirementsResult, riskResult] = await Promise.allSettled([
|
|
739
|
-
codebaseReport ? Promise.resolve(codebaseReport) : dispatchAnalyst("Codebase", CODEBASE_ANALYST_PROMPT),
|
|
740
|
-
requirementsReport ? Promise.resolve(requirementsReport) : dispatchAnalyst("Requirements", makeRequirementsAnalystPrompt(task)),
|
|
741
|
-
riskReport ? Promise.resolve(riskReport) : dispatchAnalyst("Risk", makeRiskAssessorPrompt(task)),
|
|
742
|
-
]);
|
|
743
|
-
if (!codebaseReport && codebaseResult.status === "fulfilled") {
|
|
744
|
-
codebaseReport = codebaseResult.value;
|
|
745
|
-
}
|
|
746
|
-
if (!requirementsReport && requirementsResult.status === "fulfilled") {
|
|
747
|
-
requirementsReport = requirementsResult.value;
|
|
748
|
-
}
|
|
749
|
-
if (!riskReport && riskResult.status === "fulfilled") {
|
|
750
|
-
riskReport = riskResult.value;
|
|
751
|
-
}
|
|
752
|
-
const successCount = [codebaseReport, requirementsReport, riskReport].filter((r) => r.length > 0).length;
|
|
753
|
-
const analysisElapsed = Math.round((Date.now() - startTime) / 1000);
|
|
754
|
-
console.log(`${ts()} ${taskLabel} Analysis attempt ${attempt}: ${successCount}/3 reports (${analysisElapsed}s)`);
|
|
755
|
-
if (successCount > 0) {
|
|
756
|
-
console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Analysis complete: ${successCount}/3 reports (${analysisElapsed}s)`);
|
|
757
|
-
await postLog(taskId, `${PREFIX} Team analysis complete: ${successCount}/3 reports in ${formatElapsed(analysisElapsed)}. Synthesizing plan...`);
|
|
758
|
-
await postProgress(taskId, "analyzing", analysisElapsed, "Synthesizing analysis reports...", 0, 0);
|
|
759
|
-
break;
|
|
760
|
-
}
|
|
761
|
-
if (attempt === MAX_TEAM_RETRIES) {
|
|
762
|
-
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} All analysts failed after ${MAX_TEAM_RETRIES} attempts, falling back to single-agent planning`);
|
|
763
|
-
await postLog(taskId, `${PREFIX} All analysis agents failed after ${MAX_TEAM_RETRIES} attempts — falling back to single-agent planning`);
|
|
764
|
-
return null;
|
|
765
|
-
}
|
|
766
|
-
}
|
|
767
|
-
// Build enhanced prompt with analysis reports
|
|
768
|
-
const sections = [];
|
|
769
|
-
if (codebaseReport) {
|
|
770
|
-
sections.push(`## Codebase Analysis (from automated analysis)\n\n${codebaseReport}`);
|
|
771
|
-
}
|
|
772
|
-
if (requirementsReport) {
|
|
773
|
-
sections.push(`## Requirements Analysis\n\n${requirementsReport}`);
|
|
774
|
-
}
|
|
775
|
-
if (riskReport) {
|
|
776
|
-
sections.push(`## Risk Assessment\n\n${riskReport}`);
|
|
777
|
-
}
|
|
778
|
-
return (basePrompt +
|
|
779
|
-
"\n\n" +
|
|
780
|
-
sections.join("\n\n") +
|
|
781
|
-
"\n\n" +
|
|
782
|
-
"Use these analyses to produce a more accurate execution plan.\n" +
|
|
783
|
-
"Prefer actual file paths discovered in the codebase analysis over guessed paths.");
|
|
784
|
-
}
|
|
785
450
|
/**
|
|
786
451
|
* Run planning for a task with Planner-Critic validation loop.
|
|
787
452
|
*
|
|
788
453
|
* Flow:
|
|
789
454
|
* 1. Fetch planning prompt from cloud API
|
|
790
|
-
* 2.
|
|
791
|
-
* 3.
|
|
792
|
-
* 4.
|
|
793
|
-
* 5.
|
|
794
|
-
* 6. If critic
|
|
795
|
-
* 7.
|
|
796
|
-
* 8.
|
|
455
|
+
* 2. Clone target repo (if available) so planner can explore with tools
|
|
456
|
+
* 3. Run Claude CLI to generate plan
|
|
457
|
+
* 4. Parse plan, apply file cap (max 5 files per story)
|
|
458
|
+
* 5. Run critic validation via Claude CLI
|
|
459
|
+
* 6. If critic approves (score >= 80): post validated plan to API
|
|
460
|
+
* 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
461
|
+
* 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
462
|
+
* 9. If no plan scored >= 50: fail the task
|
|
797
463
|
*/
|
|
798
464
|
export async function planTask(task, config, credentials) {
|
|
799
465
|
const taskLabel = chalk.cyan(task.id.slice(0, 8));
|
|
@@ -816,12 +482,9 @@ export async function planTask(task, config, credentials) {
|
|
|
816
482
|
const startTime = Date.now();
|
|
817
483
|
// PRD for critic validation: use task description, fall back to summary
|
|
818
484
|
const prd = task.description || task.summary;
|
|
819
|
-
//
|
|
820
|
-
// Analyst prompts don't include critic feedback, so re-running them
|
|
821
|
-
// on iteration 2+ wastes compute (they'd produce the same reports).
|
|
485
|
+
// Clone target repo so the planner can explore with tools
|
|
822
486
|
let repoPath = null;
|
|
823
|
-
|
|
824
|
-
if (config.teamPlanningEnabled && task.githubRepo) {
|
|
487
|
+
if (task.githubRepo) {
|
|
825
488
|
const scmProvider = task.scmProvider || "github";
|
|
826
489
|
const scmToken = scmProvider === "bitbucket"
|
|
827
490
|
? config.bitbucketToken
|
|
@@ -832,23 +495,11 @@ export async function planTask(task, config, credentials) {
|
|
|
832
495
|
repoPath = await cloneTargetRepo(task.githubRepo, scmToken, scmProvider, task.id);
|
|
833
496
|
}
|
|
834
497
|
else {
|
|
835
|
-
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider},
|
|
836
|
-
}
|
|
837
|
-
if (repoPath) {
|
|
838
|
-
const analystModel = config.analystModel || cliModel;
|
|
839
|
-
const analystBackend = isAnthropicPlanning ? "Claude CLI" : `${provider} AI SDK`;
|
|
840
|
-
console.log(`${ts()} ${taskLabel} Analysts using model: ${chalk.yellow(analystModel)} via ${chalk.dim(analystBackend)} (planner: ${chalk.yellow(cliModel)})`);
|
|
841
|
-
const analysisResult = await runTeamAnalysis(task, basePrompt, claudePath, analystModel, cleanEnv, repoPath, task.id, startTime, provider, providerApiKey);
|
|
842
|
-
if (analysisResult) {
|
|
843
|
-
enhancedBasePrompt = analysisResult;
|
|
844
|
-
}
|
|
845
|
-
// else: all analysts failed, fall back to basePrompt
|
|
498
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, planner will run without repo access`);
|
|
846
499
|
}
|
|
847
500
|
}
|
|
848
501
|
// 2. Planner-Critic iteration loop
|
|
849
|
-
|
|
850
|
-
// Critic feedback gets appended on re-plan, but analyst reports are fixed.
|
|
851
|
-
let currentPrompt = enhancedBasePrompt;
|
|
502
|
+
let currentPrompt = basePrompt;
|
|
852
503
|
let bestPlan = null;
|
|
853
504
|
let bestScore = 0;
|
|
854
505
|
// Track critic history across iterations for analytics
|
|
@@ -870,9 +521,7 @@ export async function planTask(task, config, credentials) {
|
|
|
870
521
|
let rawOutput;
|
|
871
522
|
try {
|
|
872
523
|
if (isAnthropicPlanning) {
|
|
873
|
-
|
|
874
|
-
const hasAnalystContext = enhancedBasePrompt !== basePrompt;
|
|
875
|
-
rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, hasAnalystContext);
|
|
524
|
+
rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, repoPath || undefined);
|
|
876
525
|
}
|
|
877
526
|
else {
|
|
878
527
|
if (!providerApiKey) {
|
|
@@ -997,7 +646,7 @@ export async function planTask(task, config, credentials) {
|
|
|
997
646
|
// 2f. Rejected — append critic feedback for next iteration
|
|
998
647
|
if (iteration < MAX_ITERATIONS) {
|
|
999
648
|
const feedback = formatCriticFeedback(criticResult);
|
|
1000
|
-
currentPrompt =
|
|
649
|
+
currentPrompt = basePrompt + "\n\n" + feedback;
|
|
1001
650
|
const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
|
|
1002
651
|
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
|
|
1003
652
|
await postLog(task.id, msg);
|