@workermill/agent 0.7.17 → 0.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,7 +90,7 @@ export declare function parseCriticResponse(text: string): CriticResult;
90
90
  * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
91
91
  * Returns the raw text output.
92
92
  */
93
- export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>): Promise<string>;
93
+ export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>, taskId?: string): Promise<string>;
94
94
  /**
95
95
  * Format critic feedback for appending to the planner prompt on re-run.
96
96
  */
@@ -100,5 +100,5 @@ export declare function formatCriticFeedback(critic: CriticResult): string;
100
100
  * Routes to Claude CLI (Anthropic) or HTTP API (other providers).
101
101
  * Returns the critic result, or null if critic fails (non-blocking).
102
102
  */
103
- export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string, provider?: AIProvider, providerApiKey?: string): Promise<CriticResult | null>;
103
+ export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string, provider?: AIProvider, providerApiKey?: string, taskId?: string): Promise<CriticResult | null>;
104
104
  export { AUTO_APPROVAL_THRESHOLD };
@@ -12,6 +12,7 @@
12
12
  import { spawn } from "child_process";
13
13
  import chalk from "chalk";
14
14
  import { generateText } from "./providers.js";
15
+ import { api } from "./api.js";
15
16
  // ============================================================================
16
17
  // CONSTANTS
17
18
  // ============================================================================
@@ -277,7 +278,7 @@ export function parseCriticResponse(text) {
277
278
  * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
278
279
  * Returns the raw text output.
279
280
  */
280
- export function runCriticCli(claudePath, model, prompt, env) {
281
+ export function runCriticCli(claudePath, model, prompt, env, taskId) {
281
282
  return new Promise((resolve, reject) => {
282
283
  const proc = spawn(claudePath, [
283
284
  "--print",
@@ -294,7 +295,21 @@ export function runCriticCli(claudePath, model, prompt, env) {
294
295
  let stdout = "";
295
296
  let stderr = "";
296
297
  proc.stdout.on("data", (data) => {
297
- stdout += data.toString();
298
+ const chunk = data.toString();
299
+ stdout += chunk;
300
+ // Stream critic reasoning to dashboard in real-time
301
+ const lines = chunk.split("\n").filter((l) => l.trim());
302
+ for (const line of lines) {
303
+ const trimmed = line.trim().length > 200
304
+ ? line.trim().substring(0, 200) + "…"
305
+ : line.trim();
306
+ if (trimmed) {
307
+ if (taskId) {
308
+ postLog(taskId, `${PREFIX} [critic] ${trimmed}`, "output");
309
+ }
310
+ console.log(`${ts()} ${chalk.dim("🔍")} ${chalk.dim(trimmed)}`);
311
+ }
312
+ }
298
313
  });
299
314
  proc.stderr.on("data", (data) => {
300
315
  stderr += data.toString();
@@ -358,23 +373,44 @@ export function formatCriticFeedback(critic) {
358
373
  lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
359
374
  return lines.join("\n");
360
375
  }
376
+ /** Consistent prefix matching planner dashboard format */
377
+ const PREFIX = "[🗺️ planning_agent 🤖]";
361
378
  /** Timestamp prefix for console logs */
362
379
  function ts() {
363
380
  return chalk.dim(new Date().toLocaleTimeString());
364
381
  }
382
+ /**
383
+ * Post a log message to the cloud dashboard for real-time visibility.
384
+ */
385
+ async function postLog(taskId, message, type = "system", severity = "info") {
386
+ try {
387
+ await api.post("/api/control-center/logs", {
388
+ taskId,
389
+ type,
390
+ message,
391
+ severity,
392
+ });
393
+ }
394
+ catch {
395
+ // Fire and forget — don't block critic on log failures
396
+ }
397
+ }
365
398
  /**
366
399
  * Run critic validation on a parsed plan.
367
400
  * Routes to Claude CLI (Anthropic) or HTTP API (other providers).
368
401
  * Returns the critic result, or null if critic fails (non-blocking).
369
402
  */
370
- export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey) {
403
+ export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey, taskId) {
371
404
  const criticPrompt = buildCriticPrompt(prd, plan);
372
405
  const effectiveProvider = provider || "anthropic";
373
406
  console.log(`${ts()} ${taskLabel} ${chalk.dim(`Running critic validation (${effectiveProvider})...`)}`);
407
+ if (taskId) {
408
+ postLog(taskId, `${PREFIX} Running critic validation (${effectiveProvider})...`);
409
+ }
374
410
  try {
375
411
  let rawCriticOutput;
376
412
  if (effectiveProvider === "anthropic") {
377
- rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env);
413
+ rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env, taskId);
378
414
  }
379
415
  else {
380
416
  if (!providerApiKey) {
package/dist/planner.js CHANGED
@@ -20,6 +20,50 @@ import { findClaudePath } from "./config.js";
20
20
  import { api } from "./api.js";
21
21
  import { parseExecutionPlan, applyFileCap, applyStoryCap, resolveFileOverlaps, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
22
22
  import { generateTextWithTools } from "./ai-sdk-generate.js";
23
+ /**
24
+ * Extract token usage from a stream-json event.
25
+ * Claude reports cumulative tokens, so we use Math.max to track the highest values.
26
+ */
27
+ function extractTokenUsage(event, usage) {
28
+ const paths = [
29
+ event.usage,
30
+ event.message?.usage,
31
+ event.result?.usage,
32
+ ];
33
+ for (const u of paths) {
34
+ if (u && typeof u === "object") {
35
+ const d = u;
36
+ if (typeof d.input_tokens === "number")
37
+ usage.inputTokens = Math.max(usage.inputTokens, d.input_tokens);
38
+ if (typeof d.output_tokens === "number")
39
+ usage.outputTokens = Math.max(usage.outputTokens, d.output_tokens);
40
+ if (typeof d.cache_creation_input_tokens === "number")
41
+ usage.cacheCreationTokens = Math.max(usage.cacheCreationTokens, d.cache_creation_input_tokens);
42
+ if (typeof d.cache_read_input_tokens === "number")
43
+ usage.cacheReadTokens = Math.max(usage.cacheReadTokens, d.cache_read_input_tokens);
44
+ }
45
+ }
46
+ }
47
+ /**
48
+ * Report partial token usage to the cloud API.
49
+ */
50
+ async function reportPlanningUsage(taskId, usage, model, mode) {
51
+ if (usage.inputTokens === 0 && usage.outputTokens === 0)
52
+ return;
53
+ try {
54
+ await api.post(`/api/tasks/${taskId}/usage/partial`, {
55
+ inputTokens: usage.inputTokens,
56
+ outputTokens: usage.outputTokens,
57
+ cacheCreationTokens: usage.cacheCreationTokens,
58
+ cacheReadTokens: usage.cacheReadTokens,
59
+ model,
60
+ mode,
61
+ });
62
+ }
63
+ catch {
64
+ // Fire and forget
65
+ }
66
+ }
23
67
  /** Max Planner-Critic iterations before giving up */
24
68
  const MAX_ITERATIONS = 3;
25
69
  /** Timestamp prefix */
@@ -83,16 +127,22 @@ function phaseLabel(phase, elapsed) {
83
127
  * Run Claude CLI with stream-json output, posting real-time phase milestones
84
128
  * to the cloud dashboard — identical terminal experience to cloud planning.
85
129
  */
86
- function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
130
+ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disableTools = false) {
87
131
  const taskLabel = chalk.cyan(taskId.slice(0, 8));
88
132
  return new Promise((resolve, reject) => {
89
- const proc = spawn(claudePath, [
133
+ const cliArgs = [
90
134
  "--print",
91
135
  "--verbose",
92
136
  "--output-format", "stream-json",
93
137
  "--model", model,
94
138
  "--permission-mode", "bypassPermissions",
95
- ], {
139
+ ];
140
+ // When analysts already explored the repo, strip tools so the planner
141
+ // doesn't waste turns re-exploring — it has all context in the prompt.
142
+ if (disableTools) {
143
+ cliArgs.push("--allowedTools", "");
144
+ }
145
+ const proc = spawn(claudePath, cliArgs, {
96
146
  env,
97
147
  stdio: ["pipe", "pipe", "pipe"],
98
148
  });
@@ -103,6 +153,9 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
103
153
  let stderrOutput = "";
104
154
  let charsReceived = 0;
105
155
  let toolCallCount = 0;
156
+ // Token usage accumulator — extract from stream events using Math.max
157
+ const tokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
158
+ let resultModel = model;
106
159
  // Buffered text streaming — flush complete lines to dashboard every 1s.
107
160
  // LLM deltas are tiny fragments; we accumulate until we see '\n', then
108
161
  // a 1s interval flushes all complete lines as log entries. On exit we
@@ -185,6 +238,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
185
238
  if (block.type === "text" && block.text) {
186
239
  fullText += block.text;
187
240
  charsReceived += block.text.length;
241
+ textBuffer += block.text;
188
242
  if (!firstTextSeen) {
189
243
  firstTextSeen = true;
190
244
  if (toolCallCount > 0 && !milestoneSent.analyzing) {
@@ -210,6 +264,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
210
264
  else if (typeof content === "string" && content) {
211
265
  fullText += content;
212
266
  charsReceived += content.length;
267
+ textBuffer += content;
213
268
  }
214
269
  }
215
270
  else if (event.type === "content_block_delta" && event.delta?.text) {
@@ -240,6 +295,16 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
240
295
  else if (event.type === "result" && event.result) {
241
296
  resultText = typeof event.result === "string" ? event.result : "";
242
297
  }
298
+ // Extract token usage from any event that carries it
299
+ extractTokenUsage(event, tokenUsage);
300
+ if (event.type === "result" && event.total_cost_usd !== undefined) {
301
+ // Result event also carries model info
302
+ if (event.modelUsage && typeof event.modelUsage === "object") {
303
+ const models = Object.keys(event.modelUsage);
304
+ if (models.length > 0)
305
+ resultModel = models[0];
306
+ }
307
+ }
243
308
  }
244
309
  catch {
245
310
  // Not valid JSON — raw text, accumulate
@@ -251,10 +316,17 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
251
316
  proc.stderr.on("data", (chunk) => {
252
317
  stderrOutput += chunk.toString();
253
318
  });
319
+ // Report partial token usage every 30s during planning
320
+ const usageReportInterval = setInterval(() => {
321
+ if (tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0) {
322
+ reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
323
+ }
324
+ }, 30_000);
254
325
  function cleanupAll() {
255
326
  clearInterval(progressInterval);
256
327
  clearInterval(sseProgressInterval);
257
328
  clearInterval(textFlushInterval);
329
+ clearInterval(usageReportInterval);
258
330
  flushTextBuffer(true);
259
331
  }
260
332
  const timeout = setTimeout(() => {
@@ -268,6 +340,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
268
340
  // Emit final "validating" phase to dashboard
269
341
  const elapsedAtClose = Math.round((Date.now() - startTime) / 1000);
270
342
  postProgress(taskId, "validating", elapsedAtClose, "Validating plan...", charsReceived, toolCallCount);
343
+ // Final usage report
344
+ reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
271
345
  if (code !== 0) {
272
346
  reject(new Error(`Claude CLI failed (exit ${code}): ${stderrOutput.substring(0, 300)}`));
273
347
  }
@@ -766,7 +840,9 @@ export async function planTask(task, config, credentials) {
766
840
  let rawOutput;
767
841
  try {
768
842
  if (isAnthropicPlanning) {
769
- rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime);
843
+ // Disable tools when analysts already provided repo context
844
+ const hasAnalystContext = enhancedBasePrompt !== basePrompt;
845
+ rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, hasAnalystContext);
770
846
  }
771
847
  else {
772
848
  if (!providerApiKey) {
@@ -845,7 +921,7 @@ export async function planTask(task, config, credentials) {
845
921
  console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories (max ${maxStories})`);
846
922
  await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
847
923
  // 2d. Run critic validation
848
- const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey);
924
+ const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey, task.id);
849
925
  // Track best plan across iterations
850
926
  if (criticResult && criticResult.score > bestScore) {
851
927
  bestPlan = plan;
package/dist/spawner.js CHANGED
@@ -140,6 +140,17 @@ export async function spawnWorker(task, config, orgConfig, credentials) {
140
140
  return;
141
141
  }
142
142
  if (claudeConfigDir) {
143
+ // Ensure credentials file is readable AND writable inside container.
144
+ // Claude CLI creates .credentials.json with 600 permissions, but the container
145
+ // runs as UID 1001 (worker) while the host user is UID 1000. Without this chmod,
146
+ // the mounted file is unreadable inside the container → "Invalid API key" errors.
147
+ const credFile = path.join(claudeConfigDir, ".credentials.json");
148
+ try {
149
+ fs.chmodSync(credFile, 0o666);
150
+ }
151
+ catch {
152
+ // Ignore - file may not exist yet
153
+ }
143
154
  const dockerClaudeDir = toDockerPath(claudeConfigDir);
144
155
  dockerArgs.push("-v", `${dockerClaudeDir}:/home/worker/.claude`);
145
156
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workermill/agent",
3
- "version": "0.7.17",
3
+ "version": "0.7.19",
4
4
  "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",