@workermill/agent 0.7.17 → 0.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/plan-validator.d.ts +2 -2
- package/dist/plan-validator.js +40 -4
- package/dist/planner.js +81 -5
- package/dist/spawner.js +11 -0
- package/package.json +1 -1
package/dist/plan-validator.d.ts
CHANGED
|
@@ -90,7 +90,7 @@ export declare function parseCriticResponse(text: string): CriticResult;
|
|
|
90
90
|
* Run the critic via Claude CLI (lightweight — no tools, just reasoning).
|
|
91
91
|
* Returns the raw text output.
|
|
92
92
|
*/
|
|
93
|
-
export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined
|
|
93
|
+
export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>, taskId?: string): Promise<string>;
|
|
94
94
|
/**
|
|
95
95
|
* Format critic feedback for appending to the planner prompt on re-run.
|
|
96
96
|
*/
|
|
@@ -100,5 +100,5 @@ export declare function formatCriticFeedback(critic: CriticResult): string;
|
|
|
100
100
|
* Routes to Claude CLI (Anthropic) or HTTP API (other providers).
|
|
101
101
|
* Returns the critic result, or null if critic fails (non-blocking).
|
|
102
102
|
*/
|
|
103
|
-
export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string, provider?: AIProvider, providerApiKey?: string): Promise<CriticResult | null>;
|
|
103
|
+
export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string, provider?: AIProvider, providerApiKey?: string, taskId?: string): Promise<CriticResult | null>;
|
|
104
104
|
export { AUTO_APPROVAL_THRESHOLD };
|
package/dist/plan-validator.js
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
import { spawn } from "child_process";
|
|
13
13
|
import chalk from "chalk";
|
|
14
14
|
import { generateText } from "./providers.js";
|
|
15
|
+
import { api } from "./api.js";
|
|
15
16
|
// ============================================================================
|
|
16
17
|
// CONSTANTS
|
|
17
18
|
// ============================================================================
|
|
@@ -277,7 +278,7 @@ export function parseCriticResponse(text) {
|
|
|
277
278
|
* Run the critic via Claude CLI (lightweight — no tools, just reasoning).
|
|
278
279
|
* Returns the raw text output.
|
|
279
280
|
*/
|
|
280
|
-
export function runCriticCli(claudePath, model, prompt, env) {
|
|
281
|
+
export function runCriticCli(claudePath, model, prompt, env, taskId) {
|
|
281
282
|
return new Promise((resolve, reject) => {
|
|
282
283
|
const proc = spawn(claudePath, [
|
|
283
284
|
"--print",
|
|
@@ -294,7 +295,21 @@ export function runCriticCli(claudePath, model, prompt, env) {
|
|
|
294
295
|
let stdout = "";
|
|
295
296
|
let stderr = "";
|
|
296
297
|
proc.stdout.on("data", (data) => {
|
|
297
|
-
|
|
298
|
+
const chunk = data.toString();
|
|
299
|
+
stdout += chunk;
|
|
300
|
+
// Stream critic reasoning to dashboard in real-time
|
|
301
|
+
const lines = chunk.split("\n").filter((l) => l.trim());
|
|
302
|
+
for (const line of lines) {
|
|
303
|
+
const trimmed = line.trim().length > 200
|
|
304
|
+
? line.trim().substring(0, 200) + "…"
|
|
305
|
+
: line.trim();
|
|
306
|
+
if (trimmed) {
|
|
307
|
+
if (taskId) {
|
|
308
|
+
postLog(taskId, `${PREFIX} [critic] ${trimmed}`, "output");
|
|
309
|
+
}
|
|
310
|
+
console.log(`${ts()} ${chalk.dim("🔍")} ${chalk.dim(trimmed)}`);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
298
313
|
});
|
|
299
314
|
proc.stderr.on("data", (data) => {
|
|
300
315
|
stderr += data.toString();
|
|
@@ -358,23 +373,44 @@ export function formatCriticFeedback(critic) {
|
|
|
358
373
|
lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
|
|
359
374
|
return lines.join("\n");
|
|
360
375
|
}
|
|
376
|
+
/** Consistent prefix matching planner dashboard format */
|
|
377
|
+
const PREFIX = "[🗺️ planning_agent 🤖]";
|
|
361
378
|
/** Timestamp prefix for console logs */
|
|
362
379
|
function ts() {
|
|
363
380
|
return chalk.dim(new Date().toLocaleTimeString());
|
|
364
381
|
}
|
|
382
|
+
/**
|
|
383
|
+
* Post a log message to the cloud dashboard for real-time visibility.
|
|
384
|
+
*/
|
|
385
|
+
async function postLog(taskId, message, type = "system", severity = "info") {
|
|
386
|
+
try {
|
|
387
|
+
await api.post("/api/control-center/logs", {
|
|
388
|
+
taskId,
|
|
389
|
+
type,
|
|
390
|
+
message,
|
|
391
|
+
severity,
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
catch {
|
|
395
|
+
// Fire and forget — don't block critic on log failures
|
|
396
|
+
}
|
|
397
|
+
}
|
|
365
398
|
/**
|
|
366
399
|
* Run critic validation on a parsed plan.
|
|
367
400
|
* Routes to Claude CLI (Anthropic) or HTTP API (other providers).
|
|
368
401
|
* Returns the critic result, or null if critic fails (non-blocking).
|
|
369
402
|
*/
|
|
370
|
-
export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey) {
|
|
403
|
+
export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey, taskId) {
|
|
371
404
|
const criticPrompt = buildCriticPrompt(prd, plan);
|
|
372
405
|
const effectiveProvider = provider || "anthropic";
|
|
373
406
|
console.log(`${ts()} ${taskLabel} ${chalk.dim(`Running critic validation (${effectiveProvider})...`)}`);
|
|
407
|
+
if (taskId) {
|
|
408
|
+
postLog(taskId, `${PREFIX} Running critic validation (${effectiveProvider})...`);
|
|
409
|
+
}
|
|
374
410
|
try {
|
|
375
411
|
let rawCriticOutput;
|
|
376
412
|
if (effectiveProvider === "anthropic") {
|
|
377
|
-
rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env);
|
|
413
|
+
rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env, taskId);
|
|
378
414
|
}
|
|
379
415
|
else {
|
|
380
416
|
if (!providerApiKey) {
|
package/dist/planner.js
CHANGED
|
@@ -20,6 +20,50 @@ import { findClaudePath } from "./config.js";
|
|
|
20
20
|
import { api } from "./api.js";
|
|
21
21
|
import { parseExecutionPlan, applyFileCap, applyStoryCap, resolveFileOverlaps, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
|
|
22
22
|
import { generateTextWithTools } from "./ai-sdk-generate.js";
|
|
23
|
+
/**
|
|
24
|
+
* Extract token usage from a stream-json event.
|
|
25
|
+
* Claude reports cumulative tokens, so we use Math.max to track the highest values.
|
|
26
|
+
*/
|
|
27
|
+
function extractTokenUsage(event, usage) {
|
|
28
|
+
const paths = [
|
|
29
|
+
event.usage,
|
|
30
|
+
event.message?.usage,
|
|
31
|
+
event.result?.usage,
|
|
32
|
+
];
|
|
33
|
+
for (const u of paths) {
|
|
34
|
+
if (u && typeof u === "object") {
|
|
35
|
+
const d = u;
|
|
36
|
+
if (typeof d.input_tokens === "number")
|
|
37
|
+
usage.inputTokens = Math.max(usage.inputTokens, d.input_tokens);
|
|
38
|
+
if (typeof d.output_tokens === "number")
|
|
39
|
+
usage.outputTokens = Math.max(usage.outputTokens, d.output_tokens);
|
|
40
|
+
if (typeof d.cache_creation_input_tokens === "number")
|
|
41
|
+
usage.cacheCreationTokens = Math.max(usage.cacheCreationTokens, d.cache_creation_input_tokens);
|
|
42
|
+
if (typeof d.cache_read_input_tokens === "number")
|
|
43
|
+
usage.cacheReadTokens = Math.max(usage.cacheReadTokens, d.cache_read_input_tokens);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Report partial token usage to the cloud API.
|
|
49
|
+
*/
|
|
50
|
+
async function reportPlanningUsage(taskId, usage, model, mode) {
|
|
51
|
+
if (usage.inputTokens === 0 && usage.outputTokens === 0)
|
|
52
|
+
return;
|
|
53
|
+
try {
|
|
54
|
+
await api.post(`/api/tasks/${taskId}/usage/partial`, {
|
|
55
|
+
inputTokens: usage.inputTokens,
|
|
56
|
+
outputTokens: usage.outputTokens,
|
|
57
|
+
cacheCreationTokens: usage.cacheCreationTokens,
|
|
58
|
+
cacheReadTokens: usage.cacheReadTokens,
|
|
59
|
+
model,
|
|
60
|
+
mode,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
// Fire and forget
|
|
65
|
+
}
|
|
66
|
+
}
|
|
23
67
|
/** Max Planner-Critic iterations before giving up */
|
|
24
68
|
const MAX_ITERATIONS = 3;
|
|
25
69
|
/** Timestamp prefix */
|
|
@@ -83,16 +127,22 @@ function phaseLabel(phase, elapsed) {
|
|
|
83
127
|
* Run Claude CLI with stream-json output, posting real-time phase milestones
|
|
84
128
|
* to the cloud dashboard — identical terminal experience to cloud planning.
|
|
85
129
|
*/
|
|
86
|
-
function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
130
|
+
function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disableTools = false) {
|
|
87
131
|
const taskLabel = chalk.cyan(taskId.slice(0, 8));
|
|
88
132
|
return new Promise((resolve, reject) => {
|
|
89
|
-
const
|
|
133
|
+
const cliArgs = [
|
|
90
134
|
"--print",
|
|
91
135
|
"--verbose",
|
|
92
136
|
"--output-format", "stream-json",
|
|
93
137
|
"--model", model,
|
|
94
138
|
"--permission-mode", "bypassPermissions",
|
|
95
|
-
]
|
|
139
|
+
];
|
|
140
|
+
// When analysts already explored the repo, strip tools so the planner
|
|
141
|
+
// doesn't waste turns re-exploring — it has all context in the prompt.
|
|
142
|
+
if (disableTools) {
|
|
143
|
+
cliArgs.push("--allowedTools", "");
|
|
144
|
+
}
|
|
145
|
+
const proc = spawn(claudePath, cliArgs, {
|
|
96
146
|
env,
|
|
97
147
|
stdio: ["pipe", "pipe", "pipe"],
|
|
98
148
|
});
|
|
@@ -103,6 +153,9 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
103
153
|
let stderrOutput = "";
|
|
104
154
|
let charsReceived = 0;
|
|
105
155
|
let toolCallCount = 0;
|
|
156
|
+
// Token usage accumulator — extract from stream events using Math.max
|
|
157
|
+
const tokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
|
|
158
|
+
let resultModel = model;
|
|
106
159
|
// Buffered text streaming — flush complete lines to dashboard every 1s.
|
|
107
160
|
// LLM deltas are tiny fragments; we accumulate until we see '\n', then
|
|
108
161
|
// a 1s interval flushes all complete lines as log entries. On exit we
|
|
@@ -185,6 +238,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
185
238
|
if (block.type === "text" && block.text) {
|
|
186
239
|
fullText += block.text;
|
|
187
240
|
charsReceived += block.text.length;
|
|
241
|
+
textBuffer += block.text;
|
|
188
242
|
if (!firstTextSeen) {
|
|
189
243
|
firstTextSeen = true;
|
|
190
244
|
if (toolCallCount > 0 && !milestoneSent.analyzing) {
|
|
@@ -210,6 +264,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
210
264
|
else if (typeof content === "string" && content) {
|
|
211
265
|
fullText += content;
|
|
212
266
|
charsReceived += content.length;
|
|
267
|
+
textBuffer += content;
|
|
213
268
|
}
|
|
214
269
|
}
|
|
215
270
|
else if (event.type === "content_block_delta" && event.delta?.text) {
|
|
@@ -240,6 +295,16 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
240
295
|
else if (event.type === "result" && event.result) {
|
|
241
296
|
resultText = typeof event.result === "string" ? event.result : "";
|
|
242
297
|
}
|
|
298
|
+
// Extract token usage from any event that carries it
|
|
299
|
+
extractTokenUsage(event, tokenUsage);
|
|
300
|
+
if (event.type === "result" && event.total_cost_usd !== undefined) {
|
|
301
|
+
// Result event also carries model info
|
|
302
|
+
if (event.modelUsage && typeof event.modelUsage === "object") {
|
|
303
|
+
const models = Object.keys(event.modelUsage);
|
|
304
|
+
if (models.length > 0)
|
|
305
|
+
resultModel = models[0];
|
|
306
|
+
}
|
|
307
|
+
}
|
|
243
308
|
}
|
|
244
309
|
catch {
|
|
245
310
|
// Not valid JSON — raw text, accumulate
|
|
@@ -251,10 +316,17 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
251
316
|
proc.stderr.on("data", (chunk) => {
|
|
252
317
|
stderrOutput += chunk.toString();
|
|
253
318
|
});
|
|
319
|
+
// Report partial token usage every 30s during planning
|
|
320
|
+
const usageReportInterval = setInterval(() => {
|
|
321
|
+
if (tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0) {
|
|
322
|
+
reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
|
|
323
|
+
}
|
|
324
|
+
}, 30_000);
|
|
254
325
|
function cleanupAll() {
|
|
255
326
|
clearInterval(progressInterval);
|
|
256
327
|
clearInterval(sseProgressInterval);
|
|
257
328
|
clearInterval(textFlushInterval);
|
|
329
|
+
clearInterval(usageReportInterval);
|
|
258
330
|
flushTextBuffer(true);
|
|
259
331
|
}
|
|
260
332
|
const timeout = setTimeout(() => {
|
|
@@ -268,6 +340,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
|
|
|
268
340
|
// Emit final "validating" phase to dashboard
|
|
269
341
|
const elapsedAtClose = Math.round((Date.now() - startTime) / 1000);
|
|
270
342
|
postProgress(taskId, "validating", elapsedAtClose, "Validating plan...", charsReceived, toolCallCount);
|
|
343
|
+
// Final usage report
|
|
344
|
+
reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
|
|
271
345
|
if (code !== 0) {
|
|
272
346
|
reject(new Error(`Claude CLI failed (exit ${code}): ${stderrOutput.substring(0, 300)}`));
|
|
273
347
|
}
|
|
@@ -766,7 +840,9 @@ export async function planTask(task, config, credentials) {
|
|
|
766
840
|
let rawOutput;
|
|
767
841
|
try {
|
|
768
842
|
if (isAnthropicPlanning) {
|
|
769
|
-
|
|
843
|
+
// Disable tools when analysts already provided repo context
|
|
844
|
+
const hasAnalystContext = enhancedBasePrompt !== basePrompt;
|
|
845
|
+
rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, hasAnalystContext);
|
|
770
846
|
}
|
|
771
847
|
else {
|
|
772
848
|
if (!providerApiKey) {
|
|
@@ -845,7 +921,7 @@ export async function planTask(task, config, credentials) {
|
|
|
845
921
|
console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories (max ${maxStories})`);
|
|
846
922
|
await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
|
|
847
923
|
// 2d. Run critic validation
|
|
848
|
-
const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey);
|
|
924
|
+
const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey, task.id);
|
|
849
925
|
// Track best plan across iterations
|
|
850
926
|
if (criticResult && criticResult.score > bestScore) {
|
|
851
927
|
bestPlan = plan;
|
package/dist/spawner.js
CHANGED
|
@@ -140,6 +140,17 @@ export async function spawnWorker(task, config, orgConfig, credentials) {
|
|
|
140
140
|
return;
|
|
141
141
|
}
|
|
142
142
|
if (claudeConfigDir) {
|
|
143
|
+
// Ensure credentials file is readable AND writable inside container.
|
|
144
|
+
// Claude CLI creates .credentials.json with 600 permissions, but the container
|
|
145
|
+
// runs as UID 1001 (worker) while the host user is UID 1000. Without this chmod,
|
|
146
|
+
// the mounted file is unreadable inside the container → "Invalid API key" errors.
|
|
147
|
+
const credFile = path.join(claudeConfigDir, ".credentials.json");
|
|
148
|
+
try {
|
|
149
|
+
fs.chmodSync(credFile, 0o666);
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
// Ignore - file may not exist yet
|
|
153
|
+
}
|
|
143
154
|
const dockerClaudeDir = toDockerPath(claudeConfigDir);
|
|
144
155
|
dockerArgs.push("-v", `${dockerClaudeDir}:/home/worker/.claude`);
|
|
145
156
|
}
|