npm - @workermill/agent - Versions diffs - 0.7.17 → 0.7.19 - Mend

@workermill/agent 0.7.17 → 0.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/plan-validator.d.ts CHANGED Viewed

@@ -90,7 +90,7 @@ export declare function parseCriticResponse(text: string): CriticResult;
  * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
  * Returns the raw text output.
  */
-export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>): Promise<string>;
+export declare function runCriticCli(claudePath: string, model: string, prompt: string, env: Record<string, string | undefined>, taskId?: string): Promise<string>;
 /**
  * Format critic feedback for appending to the planner prompt on re-run.
  */
@@ -100,5 +100,5 @@ export declare function formatCriticFeedback(critic: CriticResult): string;
  * Routes to Claude CLI (Anthropic) or HTTP API (other providers).
  * Returns the critic result, or null if critic fails (non-blocking).
  */
-export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string, provider?: AIProvider, providerApiKey?: string): Promise<CriticResult | null>;
+export declare function runCriticValidation(claudePath: string, model: string, prd: string, plan: ExecutionPlan, env: Record<string, string | undefined>, taskLabel: string, provider?: AIProvider, providerApiKey?: string, taskId?: string): Promise<CriticResult | null>;
 export { AUTO_APPROVAL_THRESHOLD };

package/dist/plan-validator.js CHANGED Viewed

@@ -12,6 +12,7 @@
 import { spawn } from "child_process";
 import chalk from "chalk";
 import { generateText } from "./providers.js";
+import { api } from "./api.js";
 // ============================================================================
 // CONSTANTS
 // ============================================================================
@@ -277,7 +278,7 @@ export function parseCriticResponse(text) {
  * Run the critic via Claude CLI (lightweight — no tools, just reasoning).
  * Returns the raw text output.
  */
-export function runCriticCli(claudePath, model, prompt, env) {
+export function runCriticCli(claudePath, model, prompt, env, taskId) {
     return new Promise((resolve, reject) => {
         const proc = spawn(claudePath, [
             "--print",
@@ -294,7 +295,21 @@ export function runCriticCli(claudePath, model, prompt, env) {
         let stdout = "";
         let stderr = "";
         proc.stdout.on("data", (data) => {
-            stdout += data.toString();
+            const chunk = data.toString();
+            stdout += chunk;
+            // Stream critic reasoning to dashboard in real-time
+            const lines = chunk.split("\n").filter((l) => l.trim());
+            for (const line of lines) {
+                const trimmed = line.trim().length > 200
+                    ? line.trim().substring(0, 200) + "…"
+                    : line.trim();
+                if (trimmed) {
+                    if (taskId) {
+                        postLog(taskId, `${PREFIX} [critic] ${trimmed}`, "output");
+                    }
+                    console.log(`${ts()} ${chalk.dim("🔍")} ${chalk.dim(trimmed)}`);
+                }
+            }
         });
         proc.stderr.on("data", (data) => {
             stderr += data.toString();
@@ -358,23 +373,44 @@ export function formatCriticFeedback(critic) {
     lines.push("**You MUST address ALL feedback above.** Each story must target at most 5 files.", "Stories MUST NOT overlap on targetFiles. Generate a revised plan.");
     return lines.join("\n");
 }
+/** Consistent prefix matching planner dashboard format */
+const PREFIX = "[🗺️ planning_agent 🤖]";
 /** Timestamp prefix for console logs */
 function ts() {
     return chalk.dim(new Date().toLocaleTimeString());
 }
+/**
+ * Post a log message to the cloud dashboard for real-time visibility.
+ */
+async function postLog(taskId, message, type = "system", severity = "info") {
+    try {
+        await api.post("/api/control-center/logs", {
+            taskId,
+            type,
+            message,
+            severity,
+        });
+    }
+    catch {
+        // Fire and forget — don't block critic on log failures
+    }
+}
 /**
  * Run critic validation on a parsed plan.
  * Routes to Claude CLI (Anthropic) or HTTP API (other providers).
  * Returns the critic result, or null if critic fails (non-blocking).
  */
-export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey) {
+export async function runCriticValidation(claudePath, model, prd, plan, env, taskLabel, provider, providerApiKey, taskId) {
     const criticPrompt = buildCriticPrompt(prd, plan);
     const effectiveProvider = provider || "anthropic";
     console.log(`${ts()} ${taskLabel} ${chalk.dim(`Running critic validation (${effectiveProvider})...`)}`);
+    if (taskId) {
+        postLog(taskId, `${PREFIX} Running critic validation (${effectiveProvider})...`);
+    }
     try {
         let rawCriticOutput;
         if (effectiveProvider === "anthropic") {
-            rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env);
+            rawCriticOutput = await runCriticCli(claudePath, model, criticPrompt, env, taskId);
         }
         else {
             if (!providerApiKey) {

package/dist/planner.js CHANGED Viewed

@@ -20,6 +20,50 @@ import { findClaudePath } from "./config.js";
 import { api } from "./api.js";
 import { parseExecutionPlan, applyFileCap, applyStoryCap, resolveFileOverlaps, serializePlan, runCriticValidation, formatCriticFeedback, AUTO_APPROVAL_THRESHOLD, } from "./plan-validator.js";
 import { generateTextWithTools } from "./ai-sdk-generate.js";
+/**
+ * Extract token usage from a stream-json event.
+ * Claude reports cumulative tokens, so we use Math.max to track the highest values.
+ */
+function extractTokenUsage(event, usage) {
+    const paths = [
+        event.usage,
+        event.message?.usage,
+        event.result?.usage,
+    ];
+    for (const u of paths) {
+        if (u && typeof u === "object") {
+            const d = u;
+            if (typeof d.input_tokens === "number")
+                usage.inputTokens = Math.max(usage.inputTokens, d.input_tokens);
+            if (typeof d.output_tokens === "number")
+                usage.outputTokens = Math.max(usage.outputTokens, d.output_tokens);
+            if (typeof d.cache_creation_input_tokens === "number")
+                usage.cacheCreationTokens = Math.max(usage.cacheCreationTokens, d.cache_creation_input_tokens);
+            if (typeof d.cache_read_input_tokens === "number")
+                usage.cacheReadTokens = Math.max(usage.cacheReadTokens, d.cache_read_input_tokens);
+        }
+    }
+}
+/**
+ * Report partial token usage to the cloud API.
+ */
+async function reportPlanningUsage(taskId, usage, model, mode) {
+    if (usage.inputTokens === 0 && usage.outputTokens === 0)
+        return;
+    try {
+        await api.post(`/api/tasks/${taskId}/usage/partial`, {
+            inputTokens: usage.inputTokens,
+            outputTokens: usage.outputTokens,
+            cacheCreationTokens: usage.cacheCreationTokens,
+            cacheReadTokens: usage.cacheReadTokens,
+            model,
+            mode,
+        });
+    }
+    catch {
+        // Fire and forget
+    }
+}
 /** Max Planner-Critic iterations before giving up */
 const MAX_ITERATIONS = 3;
 /** Timestamp prefix */
@@ -83,16 +127,22 @@ function phaseLabel(phase, elapsed) {
  * Run Claude CLI with stream-json output, posting real-time phase milestones
  * to the cloud dashboard — identical terminal experience to cloud planning.
  */
-function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
+function runClaudeCli(claudePath, model, prompt, env, taskId, startTime, disableTools = false) {
     const taskLabel = chalk.cyan(taskId.slice(0, 8));
     return new Promise((resolve, reject) => {
-        const proc = spawn(claudePath, [
+        const cliArgs = [
             "--print",
             "--verbose",
             "--output-format", "stream-json",
             "--model", model,
             "--permission-mode", "bypassPermissions",
-        ], {
+        ];
+        // When analysts already explored the repo, strip tools so the planner
+        // doesn't waste turns re-exploring — it has all context in the prompt.
+        if (disableTools) {
+            cliArgs.push("--allowedTools", "");
+        }
+        const proc = spawn(claudePath, cliArgs, {
             env,
             stdio: ["pipe", "pipe", "pipe"],
         });
@@ -103,6 +153,9 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
         let stderrOutput = "";
         let charsReceived = 0;
         let toolCallCount = 0;
+        // Token usage accumulator — extract from stream events using Math.max
+        const tokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
+        let resultModel = model;
         // Buffered text streaming — flush complete lines to dashboard every 1s.
         // LLM deltas are tiny fragments; we accumulate until we see '\n', then
         // a 1s interval flushes all complete lines as log entries.  On exit we
@@ -185,6 +238,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
                                 if (block.type === "text" && block.text) {
                                     fullText += block.text;
                                     charsReceived += block.text.length;
+                                    textBuffer += block.text;
                                     if (!firstTextSeen) {
                                         firstTextSeen = true;
                                         if (toolCallCount > 0 && !milestoneSent.analyzing) {
@@ -210,6 +264,7 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
                         else if (typeof content === "string" && content) {
                             fullText += content;
                             charsReceived += content.length;
+                            textBuffer += content;
                         }
                     }
                     else if (event.type === "content_block_delta" && event.delta?.text) {
@@ -240,6 +295,16 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
                     else if (event.type === "result" && event.result) {
                         resultText = typeof event.result === "string" ? event.result : "";
                     }
+                    // Extract token usage from any event that carries it
+                    extractTokenUsage(event, tokenUsage);
+                    if (event.type === "result" && event.total_cost_usd !== undefined) {
+                        // Result event also carries model info
+                        if (event.modelUsage && typeof event.modelUsage === "object") {
+                            const models = Object.keys(event.modelUsage);
+                            if (models.length > 0)
+                                resultModel = models[0];
+                        }
+                    }
                 }
                 catch {
                     // Not valid JSON — raw text, accumulate
@@ -251,10 +316,17 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
         proc.stderr.on("data", (chunk) => {
             stderrOutput += chunk.toString();
         });
+        // Report partial token usage every 30s during planning
+        const usageReportInterval = setInterval(() => {
+            if (tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0) {
+                reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
+            }
+        }, 30_000);
         function cleanupAll() {
             clearInterval(progressInterval);
             clearInterval(sseProgressInterval);
             clearInterval(textFlushInterval);
+            clearInterval(usageReportInterval);
             flushTextBuffer(true);
         }
         const timeout = setTimeout(() => {
@@ -268,6 +340,8 @@ function runClaudeCli(claudePath, model, prompt, env, taskId, startTime) {
             // Emit final "validating" phase to dashboard
             const elapsedAtClose = Math.round((Date.now() - startTime) / 1000);
             postProgress(taskId, "validating", elapsedAtClose, "Validating plan...", charsReceived, toolCallCount);
+            // Final usage report
+            reportPlanningUsage(taskId, tokenUsage, resultModel, "greatest").catch(() => { });
             if (code !== 0) {
                 reject(new Error(`Claude CLI failed (exit ${code}): ${stderrOutput.substring(0, 300)}`));
             }
@@ -766,7 +840,9 @@ export async function planTask(task, config, credentials) {
             let rawOutput;
             try {
                 if (isAnthropicPlanning) {
-                    rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime);
+                    // Disable tools when analysts already provided repo context
+                    const hasAnalystContext = enhancedBasePrompt !== basePrompt;
+                    rawOutput = await runClaudeCli(claudePath, cliModel, currentPrompt, cleanEnv, task.id, startTime, hasAnalystContext);
                 }
                 else {
                     if (!providerApiKey) {
@@ -845,7 +921,7 @@ export async function planTask(task, config, credentials) {
             console.log(`${ts()} ${taskLabel} Plan: ${chalk.bold(plan.stories.length)} stories (max ${maxStories})`);
             await postLog(task.id, `${PREFIX} Plan generated: ${plan.stories.length} stories (${formatElapsed(elapsed)}). Running critic validation...`);
             // 2d. Run critic validation
-            const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey);
+            const criticResult = await runCriticValidation(claudePath, cliModel, prd, plan, cleanEnv, taskLabel, provider, providerApiKey, task.id);
             // Track best plan across iterations
             if (criticResult && criticResult.score > bestScore) {
                 bestPlan = plan;

package/dist/spawner.js CHANGED Viewed

@@ -140,6 +140,17 @@ export async function spawnWorker(task, config, orgConfig, credentials) {
         return;
     }
     if (claudeConfigDir) {
+        // Ensure credentials file is readable AND writable inside container.
+        // Claude CLI creates .credentials.json with 600 permissions, but the container
+        // runs as UID 1001 (worker) while the host user is UID 1000. Without this chmod,
+        // the mounted file is unreadable inside the container → "Invalid API key" errors.
+        const credFile = path.join(claudeConfigDir, ".credentials.json");
+        try {
+            fs.chmodSync(credFile, 0o666);
+        }
+        catch {
+            // Ignore - file may not exist yet
+        }
         const dockerClaudeDir = toDockerPath(claudeConfigDir);
         dockerArgs.push("-v", `${dockerClaudeDir}:/home/worker/.claude`);
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@workermill/agent",
-  "version": "0.7.17",
+  "version": "0.7.19",
   "description": "WorkerMill Remote Agent - Run AI workers locally with your Claude Max subscription",
   "type": "module",
   "main": "./dist/index.js",