@workermill/agent 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -0
- package/dist/config.d.ts +0 -4
- package/dist/config.js +0 -4
- package/dist/plan-validator.js +1 -0
- package/dist/planner.d.ts +8 -7
- package/dist/planner.js +24 -374
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/config.d.ts
CHANGED
|
@@ -16,8 +16,6 @@ export interface AgentConfig {
|
|
|
16
16
|
bitbucketToken: string;
|
|
17
17
|
gitlabToken: string;
|
|
18
18
|
workerImage: string;
|
|
19
|
-
teamPlanningEnabled: boolean;
|
|
20
|
-
analystModel?: string;
|
|
21
19
|
}
|
|
22
20
|
export interface FileConfig {
|
|
23
21
|
apiUrl: string;
|
|
@@ -32,8 +30,6 @@ export interface FileConfig {
|
|
|
32
30
|
gitlab: string;
|
|
33
31
|
};
|
|
34
32
|
workerImage: string;
|
|
35
|
-
teamPlanningEnabled?: boolean;
|
|
36
|
-
analystModel?: string;
|
|
37
33
|
setupCompletedAt: string;
|
|
38
34
|
}
|
|
39
35
|
export declare function getConfigDir(): string;
|
package/dist/config.js
CHANGED
|
@@ -75,8 +75,6 @@ export function loadConfigFromFile() {
|
|
|
75
75
|
bitbucketToken: fc.tokens?.bitbucket || "",
|
|
76
76
|
gitlabToken: fc.tokens?.gitlab || "",
|
|
77
77
|
workerImage,
|
|
78
|
-
teamPlanningEnabled: fc.teamPlanningEnabled ?? true,
|
|
79
|
-
analystModel: fc.analystModel,
|
|
80
78
|
};
|
|
81
79
|
}
|
|
82
80
|
/**
|
|
@@ -121,8 +119,6 @@ export function loadConfig() {
|
|
|
121
119
|
bitbucketToken: process.env.BITBUCKET_TOKEN || "",
|
|
122
120
|
gitlabToken: process.env.GITLAB_TOKEN || "",
|
|
123
121
|
workerImage: process.env.WORKER_IMAGE || "workermill-worker:local",
|
|
124
|
-
teamPlanningEnabled: process.env.TEAM_PLANNING_ENABLED !== "false",
|
|
125
|
-
analystModel: process.env.ANALYST_MODEL,
|
|
126
122
|
};
|
|
127
123
|
}
|
|
128
124
|
/**
|
package/dist/plan-validator.js
CHANGED
|
@@ -222,6 +222,7 @@ Review this execution plan against the PRD:
|
|
|
222
222
|
4. **Unrealistic Scope** - Any step targeting >5 files MUST score below 80 (auto-rejection threshold). Each step should modify at most 5 files. If a step needs more, split it into multiple steps first.
|
|
223
223
|
5. **Missing Operational Steps** - If the PRD requires deployment, provisioning, migrations, or running commands, does the plan include operational steps? Writing code is not the same as deploying it.
|
|
224
224
|
6. **Overlapping File Scope** - If two or more steps share the same targetFiles, this causes parallel merge conflicts. Steps MUST NOT overlap on targetFiles. Deduct 10 points per shared file across steps.
|
|
225
|
+
7. **Serialization Bottleneck** - If more than half the stories depend on a single story that targets >5 files, the plan has a bottleneck. Deduct 15 points — split the foundation or allow more parallel work.
|
|
225
226
|
|
|
226
227
|
## Scoring Guide
|
|
227
228
|
|
package/dist/planner.d.ts
CHANGED
|
@@ -28,12 +28,13 @@ export interface PlanningTask {
|
|
|
28
28
|
*
|
|
29
29
|
* Flow:
|
|
30
30
|
* 1. Fetch planning prompt from cloud API
|
|
31
|
-
* 2.
|
|
32
|
-
* 3.
|
|
33
|
-
* 4.
|
|
34
|
-
* 5.
|
|
35
|
-
* 6. If critic
|
|
36
|
-
* 7.
|
|
37
|
-
* 8.
|
|
31
|
+
* 2. Clone target repo (if available) so planner can explore with tools
|
|
32
|
+
* 3. Run Claude CLI to generate plan
|
|
33
|
+
* 4. Parse plan, apply file cap (max 5 files per story)
|
|
34
|
+
* 5. Run critic validation via Claude CLI
|
|
35
|
+
* 6. If critic approves (score >= 80): post validated plan to API
|
|
36
|
+
* 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
37
|
+
* 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
38
|
+
* 9. If no plan scored >= 50: fail the task
|
|
38
39
|
*/
|
|
39
40
|
export declare function planTask(task: PlanningTask, config: AgentConfig, credentials?: ClaimCredentials): Promise<boolean>;
|
package/dist/planner.js
CHANGED
|
@@ -80,9 +80,10 @@ const logQueue = [];
|
|
|
80
80
|
let logDrainPromise = null;
|
|
81
81
|
async function drainLogQueue() {
|
|
82
82
|
while (logQueue.length > 0) {
|
|
83
|
-
|
|
83
|
+
// Drain up to 50 entries per batch POST
|
|
84
|
+
const batch = logQueue.splice(0, 50);
|
|
84
85
|
try {
|
|
85
|
-
await api.post("/api/control-center/logs",
|
|
86
|
+
await api.post("/api/control-center/logs/batch", { entries: batch }, { timeout: 5_000 });
|
|
86
87
|
}
|
|
87
88
|
catch {
|
|
88
89
|
// Best-effort — drop on failure
|
|
@@ -157,7 +158,7 @@ function phaseLabel(phase, elapsed) {
|
|
|
157
158
|
* Run Claude CLI with stream-json output, posting real-time phase milestones
|
|
158
159
|
* to the cloud dashboard — identical terminal experience to cloud planning.
|
|
159
160
|
*/
|
|
160
|
-
function runClaudeCli(claudePath, model, prompt, env, taskId, startTime,
|
|
161
|
+
function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, cwd) {
|
|
161
162
|
const taskLabel = chalk.cyan(taskId.slice(0, 8));
|
|
162
163
|
return new Promise((resolve, reject) => {
|
|
163
164
|
const cliArgs = [
|
|
@@ -167,12 +168,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disable
|
|
|
167
168
|
"--model", model,
|
|
168
169
|
"--permission-mode", "bypassPermissions",
|
|
169
170
|
];
|
|
170
|
-
// When analysts already explored the repo, strip tools so the planner
|
|
171
|
-
// doesn't waste turns re-exploring — it has all context in the prompt.
|
|
172
|
-
if (disableTools) {
|
|
173
|
-
cliArgs.push("--allowedTools", "");
|
|
174
|
-
}
|
|
175
171
|
const proc = spawn(claudePath, cliArgs, {
|
|
172
|
+
cwd,
|
|
176
173
|
env,
|
|
177
174
|
stdio: ["pipe", "pipe", "pipe"],
|
|
178
175
|
});
|
|
@@ -222,7 +219,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disable
|
|
|
222
219
|
console.log(`${ts()} ${taskLabel} ${chalk.dim(msg)}`);
|
|
223
220
|
}
|
|
224
221
|
// Flush buffered LLM text to dashboard every 1s (complete lines only)
|
|
225
|
-
const textFlushInterval = setInterval(() => flushTextBuffer(),
|
|
222
|
+
const textFlushInterval = setInterval(() => flushTextBuffer(), 500);
|
|
226
223
|
// SSE progress updates every 2s — drives PlanningTerminalBar in dashboard
|
|
227
224
|
// (same cadence as local dev's progressInterval in planning-agent-local.ts)
|
|
228
225
|
const sseProgressInterval = setInterval(() => {
|
|
@@ -422,15 +419,15 @@ function buildCloneUrl(repo, token, scmProvider) {
|
|
|
422
419
|
}
|
|
423
420
|
}
|
|
424
421
|
/**
|
|
425
|
-
* Clone the target repo to a temp directory
|
|
426
|
-
* Returns the path on success, or null on failure
|
|
422
|
+
* Clone the target repo to a temp directory so the planner can explore with tools.
|
|
423
|
+
* Returns the path on success, or null on failure.
|
|
427
424
|
*/
|
|
428
425
|
async function cloneTargetRepo(repo, token, scmProvider, taskId) {
|
|
429
426
|
const taskLabel = chalk.cyan(taskId.slice(0, 8));
|
|
430
427
|
const tmpDir = `/tmp/workermill-planning-${taskId.slice(0, 8)}-${Date.now()}`;
|
|
431
428
|
try {
|
|
432
429
|
const cloneUrl = buildCloneUrl(repo, token, scmProvider);
|
|
433
|
-
console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for
|
|
430
|
+
console.log(`${ts()} ${taskLabel} ${chalk.dim("Cloning repo for planner...")}`);
|
|
434
431
|
execSync(`git clone --depth 1 --single-branch "${cloneUrl}" "${tmpDir}"`, {
|
|
435
432
|
stdio: "ignore",
|
|
436
433
|
timeout: 60_000,
|
|
@@ -440,7 +437,7 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
|
|
|
440
437
|
}
|
|
441
438
|
catch (error) {
|
|
442
439
|
const errMsg = error instanceof Error ? error.message : String(error);
|
|
443
|
-
console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed,
|
|
440
|
+
console.error(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} Clone failed, planner will run without repo access: ${errMsg.substring(0, 100)}`);
|
|
444
441
|
// Cleanup partial clone
|
|
445
442
|
try {
|
|
446
443
|
execSync(`rm -rf "${tmpDir}"`, { stdio: "ignore" });
|
|
@@ -451,349 +448,19 @@ async function cloneTargetRepo(repo, token, scmProvider, taskId) {
|
|
|
451
448
|
return null;
|
|
452
449
|
}
|
|
453
450
|
}
|
|
454
|
-
/**
|
|
455
|
-
* Run an analyst agent via Claude CLI with tool access to the cloned repo.
|
|
456
|
-
* Returns the analyst's report text, or an empty string on failure.
|
|
457
|
-
*/
|
|
458
|
-
function runAnalyst(name, claudePath, model, prompt, repoPath, env, timeoutMs = 900_000, taskId) {
|
|
459
|
-
const label = chalk.blue(`[${name}]`);
|
|
460
|
-
const modelLabel = chalk.yellow(model);
|
|
461
|
-
return new Promise((resolve) => {
|
|
462
|
-
console.log(`${ts()} ${label} Starting analyst using ${modelLabel}...`);
|
|
463
|
-
if (taskId) {
|
|
464
|
-
postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${model}...`);
|
|
465
|
-
}
|
|
466
|
-
const proc = spawn(claudePath, [
|
|
467
|
-
"--print",
|
|
468
|
-
"--verbose",
|
|
469
|
-
"--output-format", "stream-json",
|
|
470
|
-
"--model", model,
|
|
471
|
-
"--permission-mode", "bypassPermissions",
|
|
472
|
-
], {
|
|
473
|
-
cwd: repoPath,
|
|
474
|
-
env,
|
|
475
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
476
|
-
});
|
|
477
|
-
// Write prompt via stdin (same as runClaudeCli)
|
|
478
|
-
proc.stdin.write(prompt);
|
|
479
|
-
proc.stdin.end();
|
|
480
|
-
let resultText = "";
|
|
481
|
-
let fullText = "";
|
|
482
|
-
let stderrOutput = "";
|
|
483
|
-
let lineBuffer = "";
|
|
484
|
-
let toolCalls = 0;
|
|
485
|
-
let timedOut = false;
|
|
486
|
-
const startMs = Date.now();
|
|
487
|
-
proc.stderr.on("data", (chunk) => {
|
|
488
|
-
const text = chunk.toString();
|
|
489
|
-
stderrOutput += text;
|
|
490
|
-
// Show stderr in real-time so we can see what's happening
|
|
491
|
-
for (const line of text.split("\n").filter((l) => l.trim())) {
|
|
492
|
-
console.log(`${ts()} ${label} ${chalk.red("stderr:")} ${line.trim()}`);
|
|
493
|
-
}
|
|
494
|
-
});
|
|
495
|
-
proc.stdout.on("data", (data) => {
|
|
496
|
-
lineBuffer += data.toString();
|
|
497
|
-
const lines = lineBuffer.split("\n");
|
|
498
|
-
lineBuffer = lines.pop() || "";
|
|
499
|
-
for (const line of lines) {
|
|
500
|
-
const trimmed = line.trim();
|
|
501
|
-
if (!trimmed)
|
|
502
|
-
continue;
|
|
503
|
-
try {
|
|
504
|
-
const event = JSON.parse(trimmed);
|
|
505
|
-
// Claude CLI stream-json wraps content in assistant message events
|
|
506
|
-
if (event.type === "assistant" && event.message?.content) {
|
|
507
|
-
const content = event.message.content;
|
|
508
|
-
if (Array.isArray(content)) {
|
|
509
|
-
for (const block of content) {
|
|
510
|
-
if (block.type === "text" && block.text) {
|
|
511
|
-
fullText += block.text;
|
|
512
|
-
// Log analyst reasoning (first line, truncated)
|
|
513
|
-
const thought = block.text.trim().split("\n")[0].substring(0, 120);
|
|
514
|
-
if (thought) {
|
|
515
|
-
console.log(`${ts()} ${label} ${chalk.dim("💭")} ${chalk.dim(thought)}`);
|
|
516
|
-
if (taskId)
|
|
517
|
-
postLog(taskId, `${PREFIX} [${name}] 💭 ${thought}`);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
else if (block.type === "tool_use") {
|
|
521
|
-
toolCalls++;
|
|
522
|
-
const toolName = block.name || "unknown";
|
|
523
|
-
// Show tool name + input preview (file path, pattern, etc.)
|
|
524
|
-
const inputStr = block.input ? JSON.stringify(block.input) : "";
|
|
525
|
-
const inputPreview = inputStr.length > 80 ? inputStr.substring(0, 80) + "…" : inputStr;
|
|
526
|
-
console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}${inputPreview ? chalk.dim(` ${inputPreview}`) : ""}`);
|
|
527
|
-
if (taskId)
|
|
528
|
-
postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName} ${inputPreview}`);
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
|
-
else if (typeof content === "string") {
|
|
533
|
-
fullText += content;
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
else if (event.type === "content_block_delta" && event.delta?.text) {
|
|
537
|
-
// Fallback: raw API streaming format (may appear in some CLI versions)
|
|
538
|
-
fullText += event.delta.text;
|
|
539
|
-
}
|
|
540
|
-
else if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
|
|
541
|
-
toolCalls++;
|
|
542
|
-
const toolName = event.content_block?.name || "unknown";
|
|
543
|
-
console.log(`${ts()} ${label} ${chalk.dim(`Tool: ${toolName}`)}`);
|
|
544
|
-
if (taskId)
|
|
545
|
-
postLog(taskId, `${PREFIX} [${name}] Tool: ${toolName}`);
|
|
546
|
-
}
|
|
547
|
-
else if (event.type === "result" && event.result) {
|
|
548
|
-
resultText =
|
|
549
|
-
typeof event.result === "string" ? event.result : "";
|
|
550
|
-
}
|
|
551
|
-
}
|
|
552
|
-
catch {
|
|
553
|
-
fullText += trimmed + "\n";
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
});
|
|
557
|
-
const timeout = setTimeout(() => {
|
|
558
|
-
timedOut = true;
|
|
559
|
-
proc.kill("SIGTERM");
|
|
560
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
561
|
-
console.log(`${ts()} ${label} ${chalk.yellow("⚠ Timed out")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${fullText.length} chars)`);
|
|
562
|
-
if (taskId)
|
|
563
|
-
postLog(taskId, `${PREFIX} [${name}] ⚠ Timed out after ${formatElapsed(elapsed)}`);
|
|
564
|
-
resolve(resultText || fullText || "");
|
|
565
|
-
}, timeoutMs);
|
|
566
|
-
proc.on("exit", (code) => {
|
|
567
|
-
clearTimeout(timeout);
|
|
568
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
569
|
-
if (timedOut)
|
|
570
|
-
return; // already resolved
|
|
571
|
-
const output = resultText || fullText || "";
|
|
572
|
-
if (code === 0 && output.length > 0) {
|
|
573
|
-
console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
|
|
574
|
-
if (taskId)
|
|
575
|
-
postLog(taskId, `${PREFIX} [${name}] ✓ Done in ${formatElapsed(elapsed)} (${toolCalls} tool calls, ${output.length} chars)`);
|
|
576
|
-
}
|
|
577
|
-
else if (code !== 0) {
|
|
578
|
-
console.log(`${ts()} ${label} ${chalk.red(`✗ Exited ${code}`)} after ${formatElapsed(elapsed)} — ${stderrOutput.substring(0, 150) || "no stderr"}`);
|
|
579
|
-
if (taskId)
|
|
580
|
-
postLog(taskId, `${PREFIX} [${name}] ✗ Exited ${code} after ${formatElapsed(elapsed)}`);
|
|
581
|
-
}
|
|
582
|
-
else {
|
|
583
|
-
console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${formatElapsed(elapsed)} (${toolCalls} tool calls)`);
|
|
584
|
-
if (taskId)
|
|
585
|
-
postLog(taskId, `${PREFIX} [${name}] ⚠ Empty output after ${formatElapsed(elapsed)}`);
|
|
586
|
-
}
|
|
587
|
-
resolve(output);
|
|
588
|
-
});
|
|
589
|
-
proc.on("error", (err) => {
|
|
590
|
-
clearTimeout(timeout);
|
|
591
|
-
console.log(`${ts()} ${label} ${chalk.red("✗ Spawn failed:")} ${err.message}`);
|
|
592
|
-
resolve("");
|
|
593
|
-
});
|
|
594
|
-
});
|
|
595
|
-
}
|
|
596
|
-
/**
|
|
597
|
-
* Run an analyst agent via Vercel AI SDK with tool access to the cloned repo.
|
|
598
|
-
* Used for non-Anthropic providers (OpenAI, Google, Ollama) that can't use Claude CLI.
|
|
599
|
-
* Returns the analyst's report text, or an empty string on failure.
|
|
600
|
-
*/
|
|
601
|
-
async function runAnalystWithSdk(name, provider, model, apiKey, prompt, repoPath, timeoutMs = 900_000, taskId) {
|
|
602
|
-
const label = chalk.blue(`[${name}]`);
|
|
603
|
-
const modelLabel = chalk.yellow(`${provider}/${model}`);
|
|
604
|
-
const startMs = Date.now();
|
|
605
|
-
console.log(`${ts()} ${label} Starting analyst using ${modelLabel} via AI SDK...`);
|
|
606
|
-
if (taskId)
|
|
607
|
-
postLog(taskId, `${PREFIX} [${name}] Starting analyst using ${provider}/${model} via AI SDK...`);
|
|
608
|
-
try {
|
|
609
|
-
const result = await generateTextWithTools({
|
|
610
|
-
provider,
|
|
611
|
-
model,
|
|
612
|
-
apiKey,
|
|
613
|
-
prompt,
|
|
614
|
-
workingDir: repoPath,
|
|
615
|
-
maxTokens: 16384,
|
|
616
|
-
temperature: 0.3,
|
|
617
|
-
timeoutMs,
|
|
618
|
-
maxSteps: 20, // Allow thorough exploration
|
|
619
|
-
enableTools: true,
|
|
620
|
-
});
|
|
621
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
622
|
-
if (result && result.length > 0) {
|
|
623
|
-
console.log(`${ts()} ${label} ${chalk.green("✓ Done")} in ${elapsed}s (${result.length} chars)`);
|
|
624
|
-
return result;
|
|
625
|
-
}
|
|
626
|
-
console.log(`${ts()} ${label} ${chalk.yellow("⚠ Empty output")} after ${elapsed}s`);
|
|
627
|
-
return "";
|
|
628
|
-
}
|
|
629
|
-
catch (error) {
|
|
630
|
-
const elapsed = Math.round((Date.now() - startMs) / 1000);
|
|
631
|
-
const errMsg = error instanceof Error ? error.message : String(error);
|
|
632
|
-
console.log(`${ts()} ${label} ${chalk.red(`✗ Failed`)} after ${elapsed}s: ${errMsg.substring(0, 150)}`);
|
|
633
|
-
return "";
|
|
634
|
-
}
|
|
635
|
-
}
|
|
636
|
-
/** Analyst prompt templates */
|
|
637
|
-
const CODEBASE_ANALYST_PROMPT = `You are a codebase analyst. Your job is to explore this repository using tools and report what you find.
|
|
638
|
-
|
|
639
|
-
IMPORTANT: You MUST use tools to explore the repository. Do NOT guess or make assumptions.
|
|
640
|
-
|
|
641
|
-
Step 1: Run Glob with pattern "**/*" to see the top-level directory structure.
|
|
642
|
-
Step 2: Read key files: package.json, tsconfig.json, README.md, .env.example, or equivalents.
|
|
643
|
-
Step 3: Run Glob on src/ or the main source directory to understand the code layout.
|
|
644
|
-
Step 4: Read 2-3 representative source files to understand patterns and frameworks.
|
|
645
|
-
|
|
646
|
-
After exploring, write a report covering:
|
|
647
|
-
1. Directory structure and organization
|
|
648
|
-
2. Languages, frameworks, and key dependencies (from package.json, requirements.txt, etc.)
|
|
649
|
-
3. Existing test files and testing patterns (search for test/, __tests__, *.test.*, *.spec.*)
|
|
650
|
-
4. CI/CD configuration (search for .github/workflows/, Jenkinsfile, etc.)
|
|
651
|
-
5. Configuration files and environment setup
|
|
652
|
-
|
|
653
|
-
Keep your report under 2000 words. Only report facts you verified with tools.`;
|
|
654
|
-
function makeRequirementsAnalystPrompt(task) {
|
|
655
|
-
return `You are a requirements analyst. Analyze the following task and the repository to identify what needs to be built.
|
|
656
|
-
|
|
657
|
-
Task: ${task.summary}
|
|
658
|
-
${task.description ? `\nDescription:\n${task.description}` : ""}
|
|
659
|
-
|
|
660
|
-
IMPORTANT: You MUST use tools to understand the existing codebase before analyzing requirements.
|
|
661
|
-
|
|
662
|
-
Step 1: Run Glob with pattern "**/*" to see what already exists in the repository.
|
|
663
|
-
Step 2: Read any existing README, docs, or configuration to understand the current state.
|
|
664
|
-
Step 3: Search for any code related to the task requirements using Grep.
|
|
665
|
-
|
|
666
|
-
After exploring, write a report covering:
|
|
667
|
-
1. Explicit acceptance criteria — what MUST be built based on the description
|
|
668
|
-
2. Implicit requirements — what's assumed but not stated (auth, error handling, etc.)
|
|
669
|
-
3. What already exists vs what needs to be created (based on your file exploration)
|
|
670
|
-
4. Ambiguities that could lead to wrong implementation
|
|
671
|
-
5. Suggested components/modules and which persona should own each
|
|
672
|
-
|
|
673
|
-
Keep your report under 1500 words.`;
|
|
674
|
-
}
|
|
675
|
-
function makeRiskAssessorPrompt(task) {
|
|
676
|
-
return `You are a risk assessor. Your job is to search this repository for potential risks and blockers for a development task.
|
|
677
|
-
|
|
678
|
-
Task: ${task.summary}
|
|
679
|
-
${task.description ? `\nDescription:\n${task.description}` : ""}
|
|
680
|
-
|
|
681
|
-
IMPORTANT: You MUST use tools to search the codebase. Do NOT guess file paths or make assumptions.
|
|
682
|
-
|
|
683
|
-
Step 1: Run Glob with pattern "**/*" to see the full repository structure.
|
|
684
|
-
Step 2: Use Grep to search for code related to the task (relevant keywords, APIs, components).
|
|
685
|
-
Step 3: Read files that are likely to be modified or affected by this task.
|
|
686
|
-
Step 4: Search for existing tests (Grep for "test", "spec", "describe", "it(") to find test coverage.
|
|
687
|
-
|
|
688
|
-
After exploring, write a report covering:
|
|
689
|
-
1. Specific files that will need to be modified (exact paths from your search)
|
|
690
|
-
2. Files with heavy coupling or shared dependencies (imports you found)
|
|
691
|
-
3. Existing tests that will need updating (exact file paths)
|
|
692
|
-
4. Environment, config, or migration requirements
|
|
693
|
-
5. Deployment or infrastructure risks
|
|
694
|
-
|
|
695
|
-
Keep your report under 1500 words. Only report facts you verified with tools.`;
|
|
696
|
-
}
|
|
697
|
-
/**
|
|
698
|
-
* Run team planning: spawn 3 parallel analyst agents, then synthesize
|
|
699
|
-
* their reports into an enhanced planning prompt for the final planner.
|
|
700
|
-
*
|
|
701
|
-
* Falls back to single-agent planning if anything goes wrong.
|
|
702
|
-
*/
|
|
703
|
-
/**
|
|
704
|
-
* Run team analysis: spawn 3 parallel analyst agents once, then return
|
|
705
|
-
* an enhanced prompt with their reports appended. Returns null if all
|
|
706
|
-
* analysts fail (caller should fall back to basePrompt).
|
|
707
|
-
*
|
|
708
|
-
* This runs ONCE before the planner-critic loop — analyst prompts don't
|
|
709
|
-
* include critic feedback, so re-running them on iteration 2+ is waste.
|
|
710
|
-
*/
|
|
711
|
-
async function runTeamAnalysis(task, basePrompt, claudePath, model, env, repoPath, taskId, startTime, provider = "anthropic", providerApiKey) {
|
|
712
|
-
const taskLabel = chalk.cyan(taskId.slice(0, 8));
|
|
713
|
-
console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — running 3 analysts in parallel...`);
|
|
714
|
-
await postLog(taskId, `${PREFIX} Team planning: running codebase, requirements, and risk analysts in parallel...`);
|
|
715
|
-
await postProgress(taskId, "reading_repo", Math.round((Date.now() - startTime) / 1000), "Running parallel analysis agents...", 0, 0);
|
|
716
|
-
const analysisModel = model;
|
|
717
|
-
const MAX_TEAM_RETRIES = 3;
|
|
718
|
-
const useCliAnalysts = provider === "anthropic";
|
|
719
|
-
// Helper: dispatch analyst to Claude CLI or AI SDK based on provider
|
|
720
|
-
const dispatchAnalyst = (name, prompt) => {
|
|
721
|
-
if (useCliAnalysts) {
|
|
722
|
-
return runAnalyst(name, claudePath, analysisModel, prompt, repoPath, env, 900_000, taskId);
|
|
723
|
-
}
|
|
724
|
-
if (!providerApiKey) {
|
|
725
|
-
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No API key for ${provider} analysts, skipping ${name}`);
|
|
726
|
-
return Promise.resolve("");
|
|
727
|
-
}
|
|
728
|
-
return runAnalystWithSdk(name, provider, analysisModel, providerApiKey, prompt, repoPath, 900_000, taskId);
|
|
729
|
-
};
|
|
730
|
-
let codebaseReport = "";
|
|
731
|
-
let requirementsReport = "";
|
|
732
|
-
let riskReport = "";
|
|
733
|
-
for (let attempt = 1; attempt <= MAX_TEAM_RETRIES; attempt++) {
|
|
734
|
-
if (attempt > 1) {
|
|
735
|
-
console.log(`${ts()} ${taskLabel} ${chalk.magenta("◆ Team planning")} — retry ${attempt}/${MAX_TEAM_RETRIES}...`);
|
|
736
|
-
await postLog(taskId, `${PREFIX} Team analysis retry ${attempt}/${MAX_TEAM_RETRIES}...`);
|
|
737
|
-
}
|
|
738
|
-
const [codebaseResult, requirementsResult, riskResult] = await Promise.allSettled([
|
|
739
|
-
codebaseReport ? Promise.resolve(codebaseReport) : dispatchAnalyst("Codebase", CODEBASE_ANALYST_PROMPT),
|
|
740
|
-
requirementsReport ? Promise.resolve(requirementsReport) : dispatchAnalyst("Requirements", makeRequirementsAnalystPrompt(task)),
|
|
741
|
-
riskReport ? Promise.resolve(riskReport) : dispatchAnalyst("Risk", makeRiskAssessorPrompt(task)),
|
|
742
|
-
]);
|
|
743
|
-
if (!codebaseReport && codebaseResult.status === "fulfilled") {
|
|
744
|
-
codebaseReport = codebaseResult.value;
|
|
745
|
-
}
|
|
746
|
-
if (!requirementsReport && requirementsResult.status === "fulfilled") {
|
|
747
|
-
requirementsReport = requirementsResult.value;
|
|
748
|
-
}
|
|
749
|
-
if (!riskReport && riskResult.status === "fulfilled") {
|
|
750
|
-
riskReport = riskResult.value;
|
|
751
|
-
}
|
|
752
|
-
const successCount = [codebaseReport, requirementsReport, riskReport].filter((r) => r.length > 0).length;
|
|
753
|
-
const analysisElapsed = Math.round((Date.now() - startTime) / 1000);
|
|
754
|
-
console.log(`${ts()} ${taskLabel} Analysis attempt ${attempt}: ${successCount}/3 reports (${analysisElapsed}s)`);
|
|
755
|
-
if (successCount > 0) {
|
|
756
|
-
console.log(`${ts()} ${taskLabel} ${chalk.green("✓")} Analysis complete: ${successCount}/3 reports (${analysisElapsed}s)`);
|
|
757
|
-
await postLog(taskId, `${PREFIX} Team analysis complete: ${successCount}/3 reports in ${formatElapsed(analysisElapsed)}. Synthesizing plan...`);
|
|
758
|
-
await postProgress(taskId, "analyzing", analysisElapsed, "Synthesizing analysis reports...", 0, 0);
|
|
759
|
-
break;
|
|
760
|
-
}
|
|
761
|
-
if (attempt === MAX_TEAM_RETRIES) {
|
|
762
|
-
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} All analysts failed after ${MAX_TEAM_RETRIES} attempts, falling back to single-agent planning`);
|
|
763
|
-
await postLog(taskId, `${PREFIX} All analysis agents failed after ${MAX_TEAM_RETRIES} attempts — falling back to single-agent planning`);
|
|
764
|
-
return null;
|
|
765
|
-
}
|
|
766
|
-
}
|
|
767
|
-
// Build enhanced prompt with analysis reports
|
|
768
|
-
const sections = [];
|
|
769
|
-
if (codebaseReport) {
|
|
770
|
-
sections.push(`## Codebase Analysis (from automated analysis)\n\n${codebaseReport}`);
|
|
771
|
-
}
|
|
772
|
-
if (requirementsReport) {
|
|
773
|
-
sections.push(`## Requirements Analysis\n\n${requirementsReport}`);
|
|
774
|
-
}
|
|
775
|
-
if (riskReport) {
|
|
776
|
-
sections.push(`## Risk Assessment\n\n${riskReport}`);
|
|
777
|
-
}
|
|
778
|
-
return (basePrompt +
|
|
779
|
-
"\n\n" +
|
|
780
|
-
sections.join("\n\n") +
|
|
781
|
-
"\n\n" +
|
|
782
|
-
"Use these analyses to produce a more accurate execution plan.\n" +
|
|
783
|
-
"Prefer actual file paths discovered in the codebase analysis over guessed paths.");
|
|
784
|
-
}
|
|
785
451
|
/**
|
|
786
452
|
* Run planning for a task with Planner-Critic validation loop.
|
|
787
453
|
*
|
|
788
454
|
* Flow:
|
|
789
455
|
* 1. Fetch planning prompt from cloud API
|
|
790
|
-
* 2.
|
|
791
|
-
* 3.
|
|
792
|
-
* 4.
|
|
793
|
-
* 5.
|
|
794
|
-
* 6. If critic
|
|
795
|
-
* 7.
|
|
796
|
-
* 8.
|
|
456
|
+
* 2. Clone target repo (if available) so planner can explore with tools
|
|
457
|
+
* 3. Run Claude CLI to generate plan
|
|
458
|
+
* 4. Parse plan, apply file cap (max 5 files per story)
|
|
459
|
+
* 5. Run critic validation via Claude CLI
|
|
460
|
+
* 6. If critic approves (score >= 80): post validated plan to API
|
|
461
|
+
* 7. If critic rejects: re-run planner with feedback (up to MAX_ITERATIONS)
|
|
462
|
+
* 8. After MAX_ITERATIONS without approval: post best plan if score >= 50 (fallback)
|
|
463
|
+
* 9. If no plan scored >= 50: fail the task
|
|
797
464
|
*/
|
|
798
465
|
export async function planTask(task, config, credentials) {
|
|
799
466
|
const taskLabel = chalk.cyan(task.id.slice(0, 8));
|
|
@@ -816,12 +483,9 @@ export async function planTask(task, config, credentials) {
|
|
|
816
483
|
const startTime = Date.now();
|
|
817
484
|
// PRD for critic validation: use task description, fall back to summary
|
|
818
485
|
const prd = task.description || task.summary;
|
|
819
|
-
//
|
|
820
|
-
// Analyst prompts don't include critic feedback, so re-running them
|
|
821
|
-
// on iteration 2+ wastes compute (they'd produce the same reports).
|
|
486
|
+
// Clone target repo so the planner can explore with tools
|
|
822
487
|
let repoPath = null;
|
|
823
|
-
|
|
824
|
-
if (config.teamPlanningEnabled && task.githubRepo) {
|
|
488
|
+
if (task.githubRepo) {
|
|
825
489
|
const scmProvider = task.scmProvider || "github";
|
|
826
490
|
const scmToken = scmProvider === "bitbucket"
|
|
827
491
|
? config.bitbucketToken
|
|
@@ -832,23 +496,11 @@ export async function planTask(task, config, credentials) {
|
|
|
832
496
|
repoPath = await cloneTargetRepo(task.githubRepo, scmToken, scmProvider, task.id);
|
|
833
497
|
}
|
|
834
498
|
else {
|
|
835
|
-
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider},
|
|
836
|
-
}
|
|
837
|
-
if (repoPath) {
|
|
838
|
-
const analystModel = config.analystModel || cliModel;
|
|
839
|
-
const analystBackend = isAnthropicPlanning ? "Claude CLI" : `${provider} AI SDK`;
|
|
840
|
-
console.log(`${ts()} ${taskLabel} Analysts using model: ${chalk.yellow(analystModel)} via ${chalk.dim(analystBackend)} (planner: ${chalk.yellow(cliModel)})`);
|
|
841
|
-
const analysisResult = await runTeamAnalysis(task, basePrompt, claudePath, analystModel, cleanEnv, repoPath, task.id, startTime, provider, providerApiKey);
|
|
842
|
-
if (analysisResult) {
|
|
843
|
-
enhancedBasePrompt = analysisResult;
|
|
844
|
-
}
|
|
845
|
-
// else: all analysts failed, fall back to basePrompt
|
|
499
|
+
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} No SCM token for ${scmProvider}, planner will run without repo access`);
|
|
846
500
|
}
|
|
847
501
|
}
|
|
848
502
|
// 2. Planner-Critic iteration loop
|
|
849
|
-
|
|
850
|
-
// Critic feedback gets appended on re-plan, but analyst reports are fixed.
|
|
851
|
-
let currentPrompt = enhancedBasePrompt;
|
|
503
|
+
let currentPrompt = basePrompt;
|
|
852
504
|
let bestPlan = null;
|
|
853
505
|
let bestScore = 0;
|
|
854
506
|
// Track critic history across iterations for analytics
|
|
@@ -870,9 +522,7 @@ export async function planTask(task, config, credentials) {
|
|
|
870
522
|
let rawOutput;
|
|
871
523
|
try {
|
|
872
524
|
if (isAnthropicPlanning) {
|
|
873
|
-
|
|
874
|
-
const hasAnalystContext = enhancedBasePrompt !== basePrompt;
|
|
875
|
-
rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, hasAnalystContext);
|
|
525
|
+
rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, repoPath || undefined);
|
|
876
526
|
}
|
|
877
527
|
else {
|
|
878
528
|
if (!providerApiKey) {
|
|
@@ -997,7 +647,7 @@ export async function planTask(task, config, credentials) {
|
|
|
997
647
|
// 2f. Rejected — append critic feedback for next iteration
|
|
998
648
|
if (iteration < MAX_ITERATIONS) {
|
|
999
649
|
const feedback = formatCriticFeedback(criticResult);
|
|
1000
|
-
currentPrompt =
|
|
650
|
+
currentPrompt = basePrompt + "\n\n" + feedback;
|
|
1001
651
|
const msg = `${PREFIX} Critic rejected (score: ${criticResult.score}/100, threshold: ${AUTO_APPROVAL_THRESHOLD}). Re-planning with feedback...`;
|
|
1002
652
|
console.log(`${ts()} ${taskLabel} ${chalk.yellow("⚠")} ${msg}`);
|
|
1003
653
|
await postLog(task.id, msg);
|