npm - agent-state-machine - Versions diffs - 2.3.0 → 2.5.0 - Mend

agent-state-machine 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/bin/cli.js +65 -9
package/lib/llm.js +134 -22
package/lib/remote/client.js +19 -6
package/lib/runtime/agent.js +127 -3
package/lib/runtime/runtime.js +127 -5
package/package.json +1 -1
package/templates/project-builder/config.js +4 -4
package/vercel-server/api/config/[token].js +76 -0
package/vercel-server/api/history/[token].js +1 -0
package/vercel-server/api/ws/cli.js +39 -20
package/vercel-server/local-server.js +98 -11
package/vercel-server/public/remote/assets/index-BHvHkNOe.css +1 -0
package/vercel-server/public/remote/assets/index-BSL55rdk.js +188 -0
package/vercel-server/public/remote/index.html +2 -2
package/vercel-server/ui/src/App.jsx +36 -1
package/vercel-server/ui/src/components/ContentCard.jsx +350 -19
package/vercel-server/ui/src/components/Footer.jsx +1 -6
package/vercel-server/ui/src/components/Header.jsx +59 -11
package/vercel-server/ui/src/components/SettingsModal.jsx +130 -0
package/vercel-server/ui/src/index.css +53 -0
package/vercel-server/public/remote/assets/index-BTLc1QSv.js +0 -168
package/vercel-server/public/remote/assets/index-DLa4X08t.css +0 -1

package/bin/cli.js CHANGED Viewed

@@ -99,6 +99,7 @@ Options:
   --new, -n       Generate a new remote follow path
   --full-auto, -a Auto-select first option for choice interactions (no blocking)
   --delay, -d     Seconds to wait before auto-select in full-auto mode (default: 20)
+  --non-verbose, -q  Suppress per-agent token usage display (show only final summary)
   -reset          Reset workflow state before running
   -reset-hard     Hard reset workflow before running
   --help, -h      Show help
@@ -190,6 +191,50 @@ function summarizeStatus(state) {
   return state.status ? ` [${state.status}]` : '';
 }
+/**
+ * Display usage summary after workflow completion
+ */
+function displayUsageSummary(runtime) {
+  const u = runtime._usageTotals;
+  if (!u || (!u.totalInputTokens && !u.totalOutputTokens)) return;
+  const C = {
+    bold: '\x1b[1m',
+    dim: '\x1b[2m',
+    cyan: '\x1b[36m',
+    reset: '\x1b[0m'
+  };
+  const formatTokens = (count) => {
+    if (count >= 1000000) return `${(count / 1000000).toFixed(1)}M`;
+    if (count >= 10000) return `${Math.round(count / 1000)}k`;
+    if (count >= 1000) return `${(count / 1000).toFixed(1)}k`;
+    return count.toString();
+  };
+  console.log(`\n${C.bold}Token Usage Summary${C.reset}`);
+  console.log(`${C.dim}${'─'.repeat(40)}${C.reset}`);
+  console.log(`  Input:  ${formatTokens(u.totalInputTokens)}`);
+  console.log(`  Output: ${formatTokens(u.totalOutputTokens)}`);
+  if (u.totalCachedTokens > 0) {
+    console.log(`  Cached: ${formatTokens(u.totalCachedTokens)}`);
+  }
+  console.log(`  ${C.bold}Total:  ${formatTokens(u.totalInputTokens + u.totalOutputTokens)}${C.reset}`);
+  if (u.totalCost > 0) {
+    console.log(`  ${C.cyan}Cost:   $${u.totalCost.toFixed(4)}${C.reset}`);
+  }
+  // Show per-model breakdown if multiple models used
+  const models = Object.keys(u.modelUsage || {});
+  if (models.length > 1) {
+    console.log(`\n${C.dim}By Model:${C.reset}`);
+    for (const model of models) {
+      const m = u.modelUsage[model];
+      console.log(`  ${model}: ${formatTokens(m.inputTokens)} in / ${formatTokens(m.outputTokens)} out`);
+    }
+  }
+}
 function listWorkflows() {
   const root = workflowsRoot();
@@ -242,7 +287,8 @@ async function runOrResume(
     preReset = false,
     preResetHard = false,
     fullAuto = false,
-    autoSelectDelay = null
+    autoSelectDelay = null,
+    nonVerbose = false
   } = {}
 ) {
   const workflowDir = resolveWorkflowDir(workflowName);
@@ -292,13 +338,7 @@ async function runOrResume(
     remoteUrl = process.env.STATE_MACHINE_REMOTE_URL || DEFAULT_REMOTE_URL;
   }
-  // Enable remote follow mode if we have a URL
-  if (remoteUrl) {
-    const sessionToken = ensureRemotePath(configFile, { forceNew: forceNewRemotePath });
-    await runtime.enableRemote(remoteUrl, { sessionToken, uiBaseUrl: useLocalServer });
-  }
-  // Set full-auto mode from CLI flag (will be merged with config.js during runWorkflow)
+  // Set full-auto mode from CLI flag BEFORE enabling remote (so session_init includes correct config)
   if (fullAuto) {
     runtime.workflowConfig.fullAuto = true;
     if (autoSelectDelay !== null) {
@@ -308,6 +348,17 @@ async function runOrResume(
     console.log(`\n\x1b[36m\x1b[1m⚡ Full-auto mode enabled\x1b[0m - Agent will auto-select recommended options after ${delay}s countdown`);
   }
+  // Enable remote follow mode if we have a URL
+  if (remoteUrl) {
+    const sessionToken = ensureRemotePath(configFile, { forceNew: forceNewRemotePath });
+    await runtime.enableRemote(remoteUrl, { sessionToken, uiBaseUrl: useLocalServer });
+  }
+  // Set non-verbose mode from CLI flag
+  if (nonVerbose) {
+    runtime.workflowConfig.nonVerbose = true;
+  }
   // Prevent system sleep while workflow runs (macOS only)
   // Display can still sleep, but system stays awake for remote follow
   const stopCaffeinate = preventSleep();
@@ -317,6 +368,9 @@ async function runOrResume(
   try {
     await runtime.runWorkflow(workflowUrl);
+    // Display usage summary after workflow completion
+    displayUsageSummary(runtime);
   } finally {
     // Allow sleep again
     if (stopCaffeinate) {
@@ -385,6 +439,7 @@ async function main() {
         const preReset = args.includes('-reset');
         const preResetHard = args.includes('-reset-hard');
         const fullAuto = args.includes('--full-auto') || args.includes('-a');
+        const nonVerbose = args.includes('--non-verbose') || args.includes('-q') || args.includes('--quiet');
         const remoteEnabled = !useLocalServer; // Use Vercel if not local
         // Parse --delay or -d flag
@@ -405,7 +460,8 @@ async function main() {
             preReset,
             preResetHard,
             fullAuto,
-            autoSelectDelay
+            autoSelectDelay,
+            nonVerbose
           });
         } catch (err) {
           console.error('Error:', err.message || String(err));

package/lib/llm.js CHANGED Viewed

@@ -12,6 +12,111 @@ import { resolveUnknownModel } from './runtime/model-resolution.js';
 const require = createRequire(import.meta.url);
+/**
+ * Parse Claude CLI JSON output
+ * @param {string} output - Raw JSON output from claude --output-format json
+ * @returns {{ text: string, model: string|null, usage: object|null }}
+ */
+function parseClaudeOutput(output) {
+  try {
+    const json = JSON.parse(output);
+    const modelUsage = json.modelUsage || {};
+    const modelName = Object.keys(modelUsage)[0] || null;
+    const usage = json.usage ? {
+      inputTokens: json.usage.input_tokens || 0,
+      outputTokens: json.usage.output_tokens || 0,
+      cacheReadInputTokens: json.usage.cache_read_input_tokens || 0,
+      cacheCreationInputTokens: json.usage.cache_creation_input_tokens || 0,
+      cost: json.total_cost_usd || null
+    } : null;
+    return {
+      text: json.result || output,
+      model: modelName,
+      usage
+    };
+  } catch {
+    return { text: output, model: null, usage: null };
+  }
+}
+/**
+ * Parse Gemini CLI JSON output
+ * @param {string} output - Raw JSON output from gemini --output-format json
+ * @returns {{ text: string, model: string|null, usage: object|null }}
+ */
+function parseGeminiOutput(output) {
+  try {
+    const json = JSON.parse(output);
+    const stats = json.stats?.models || {};
+    const modelName = Object.keys(stats)[0] || null;
+    const tokens = modelName ? stats[modelName]?.tokens || {} : {};
+    const usage = {
+      inputTokens: tokens.input || tokens.prompt || 0,
+      outputTokens: tokens.candidates || 0,
+      cachedTokens: tokens.cached || 0,
+      thoughtTokens: tokens.thoughts || 0
+    };
+    return {
+      text: json.response || output,
+      model: modelName,
+      usage
+    };
+  } catch {
+    return { text: output, model: null, usage: null };
+  }
+}
+/**
+ * Parse Codex CLI JSON output (NDJSON format)
+ * @param {string} output - Raw NDJSON output from codex --json
+ * @returns {{ text: string, model: string|null, usage: object|null }}
+ */
+function parseCodexOutput(output) {
+  const lines = output.trim().split('\n');
+  let text = '';
+  let usage = null;
+  for (const line of lines) {
+    try {
+      const json = JSON.parse(line);
+      // Extract text from agent_message items
+      if (json.type === 'item.completed' && json.item?.type === 'agent_message') {
+        text = json.item.text || text;
+      }
+      // Extract usage from turn.completed event
+      if (json.type === 'turn.completed' && json.usage) {
+        usage = {
+          inputTokens: json.usage.input_tokens || 0,
+          outputTokens: json.usage.output_tokens || 0,
+          cachedInputTokens: json.usage.cached_input_tokens || 0
+        };
+      }
+    } catch {
+      // Non-JSON line - might be the actual response text
+      if (!text && line.trim()) text = line;
+    }
+  }
+  return { text, model: null, usage };
+}
+/**
+ * Parse CLI output based on tool type
+ * @param {string} output - Raw CLI output
+ * @param {string} baseCmd - Base command (claude, gemini, codex)
+ * @returns {{ text: string, model: string|null, usage: object|null }}
+ */
+function parseCLIOutput(output, baseCmd) {
+  if (baseCmd === 'claude') return parseClaudeOutput(output);
+  if (baseCmd === 'gemini') return parseGeminiOutput(output);
+  if (baseCmd === 'codex') return parseCodexOutput(output);
+  return { text: output.trim(), model: null, usage: null };
+}
 /**
  * LLM Helper Module
  *
@@ -250,17 +355,15 @@ async function executeCLI(command, promptText, options = {}, apiKeys = {}) {
     if (baseCmd === 'claude') {
       args.push('--print');
       args.push('--permission-mode', 'acceptEdits');
+      args.push('--output-format', 'json');
       // Input via stdin
     } else if (baseCmd === 'gemini') {
       args.push('--approval-mode', 'auto_edit');
+      args.push('--output-format', 'json');
       // Input via stdin
     } else if (baseCmd === 'codex') {
       ensureCodexExec();
-      const lastMessageFile = path.join(
-        os.tmpdir(),
-        `codex-last-message-${process.pid}-${Date.now()}.txt`
-      );
-      args.push('--output-last-message', lastMessageFile);
+      args.push('--json');
       args.push('-'); // Explicitly read from stdin
     } else {
       // Generic CLI: Fallback to temp file if not a known stdin consumer
@@ -310,24 +413,23 @@ async function executeCLI(command, promptText, options = {}, apiKeys = {}) {
       }
       if (code === 0) {
-        if (baseCmd === 'codex') {
-          const outputFlagIndex = args.findIndex(a => a === '--output-last-message' || a === '-o');
-          const outputFile = outputFlagIndex >= 0 ? args[outputFlagIndex + 1] : null;
-          if (outputFile && fs.existsSync(outputFile)) {
-            try {
-              stdout = fs.readFileSync(outputFile, 'utf-8');
-            } finally {
-              try { fs.unlinkSync(outputFile); } catch {}
-            }
-          }
+        // Parse JSON output for standard CLI tools
+        if (isStandardCLI) {
+          const parsed = parseCLIOutput(stdout, baseCmd);
+          resolve({
+            text: parsed.text,
+            model: parsed.model || command,
+            provider: 'cli',
+            usage: parsed.usage
+          });
+        } else {
+          resolve({
+            text: stdout.trim(),
+            model: command,
+            provider: 'cli',
+            usage: null
+          });
         }
-        resolve({
-          text: stdout.trim(),
-          model: command,
-          provider: 'cli',
-          usage: null
-        });
       } else {
         reject(new Error(`CLI command failed (exit ${code}): ${stderr || stdout}`));
       }
@@ -482,6 +584,16 @@ export async function llm(context, options) {
     result = await executeCLI(modelConfig, fullPrompt, options, apiKeys);
   }
+  // Record usage in agent tracker (if active)
+  if (result.usage) {
+    try {
+      const { recordLLMUsage } = await import('./runtime/agent.js');
+      recordLLMUsage(result.usage, result.model, result.provider);
+    } catch {
+      // Agent tracking not available (outside agent context)
+    }
+  }
   return { ...result, fullPrompt };
 }

package/lib/remote/client.js CHANGED Viewed

@@ -89,6 +89,7 @@ export class RemoteClient {
    * @param {string} options.serverUrl - Base URL of remote server (e.g., https://example.vercel.app)
    * @param {string} options.workflowName - Name of the workflow
    * @param {function} options.onInteractionResponse - Callback when interaction response received
+   * @param {function} [options.onConfigUpdate] - Callback when config update received from browser
    * @param {function} [options.onStatusChange] - Callback when connection status changes
    * @param {string} [options.sessionToken] - Optional session token to reuse
    * @param {boolean} [options.uiBaseUrl] - If true, return base URL for UI instead of /s/{token}
@@ -97,6 +98,7 @@ export class RemoteClient {
     this.serverUrl = options.serverUrl.replace(/\/$/, ''); // Remove trailing slash
     this.workflowName = options.workflowName;
     this.onInteractionResponse = options.onInteractionResponse;
+    this.onConfigUpdate = options.onConfigUpdate || (() => {});
     this.onStatusChange = options.onStatusChange || (() => {});
     this.uiBaseUrl = Boolean(options.uiBaseUrl);
@@ -166,16 +168,18 @@ export class RemoteClient {
   }
   /**
-   * Send initial session info with history
+   * Send initial session info with history and config
    * @param {Array} history - Array of history entries
+   * @param {object} [config] - Optional workflow config (fullAuto, autoSelectDelay)
    */
-  async sendSessionInit(history = []) {
+  async sendSessionInit(history = [], config = null) {
     this.initialHistorySent = true;
     await this.send({
       type: 'session_init',
       sessionToken: this.sessionToken,
       workflowName: this.workflowName,
       history,
+      config,
     });
   }
@@ -231,7 +235,7 @@ export class RemoteClient {
   }
   /**
-   * Poll for interaction responses
+   * Poll for interaction responses and config updates
    * Uses 35s timeout to stay under Vercel's 50s limit with buffer
    */
   async poll() {
@@ -246,20 +250,29 @@ export class RemoteClient {
         consecutiveErrors = 0; // Reset on success
         if (response.status === 200 && response.data) {
-          const { type, slug, targetKey, response: interactionResponse } = response.data;
+          const { type, slug, targetKey, response: interactionResponse, fullAuto, autoSelectDelay, stop } = response.data;
           if (type === 'interaction_response' && this.onInteractionResponse) {
             // Confirm receipt BEFORE processing - removes from Redis pending queue
-            // This ensures we don't lose the interaction if processing fails
             try {
               const confirmUrl = `${this.serverUrl}/api/ws/cli?token=${this.sessionToken}`;
               await makeRequest(confirmUrl, { method: 'DELETE' }, null, 10000);
             } catch (err) {
-              // Non-fatal - interaction will be re-delivered on next poll
               console.error(`${C.dim}Remote: Failed to confirm receipt: ${err.message}${C.reset}`);
             }
             this.onInteractionResponse(slug, targetKey, interactionResponse);
+          } else if (type === 'config_update') {
+            // Confirm receipt of config update
+            try {
+              const confirmUrl = `${this.serverUrl}/api/ws/cli?token=${this.sessionToken}&type=config`;
+              await makeRequest(confirmUrl, { method: 'DELETE' }, null, 10000);
+            } catch (err) {
+              console.error(`${C.dim}Remote: Failed to confirm config receipt: ${err.message}${C.reset}`);
+            }
+            // Call config update callback
+            this.onConfigUpdate({ fullAuto, autoSelectDelay, stop });
           }
         }

package/lib/runtime/agent.js CHANGED Viewed

@@ -13,9 +13,79 @@ import { pathToFileURL } from 'url';
 import { getCurrentRuntime } from './runtime.js';
 import { formatInteractionPrompt } from './interaction.js';
 import { withChangeTracking } from './track-changes.js';
+import { resolveUnknownModel } from './model-resolution.js';
+import { detectAvailableCLIs } from '../llm.js';
 const require = createRequire(import.meta.url);
+/**
+ * Token Usage Tracking
+ *
+ * Tracks LLM token usage across all calls within a single agent execution.
+ * The tracker is cleared before each agent runs and aggregated after completion.
+ */
+const AGENT_USAGE_KEY = Symbol.for('agent-state-machine.agent-usage');
+function getAgentUsageTracker() {
+  return globalThis[AGENT_USAGE_KEY] || (globalThis[AGENT_USAGE_KEY] = []);
+}
+export function clearAgentUsageTracker() {
+  globalThis[AGENT_USAGE_KEY] = [];
+}
+/**
+ * Record usage from an LLM call (called from llm.js)
+ */
+export function recordLLMUsage(usage, model, provider) {
+  if (!usage) return;
+  const tracker = getAgentUsageTracker();
+  tracker.push({ usage, model, provider, timestamp: new Date().toISOString() });
+}
+/**
+ * Aggregate all recorded usage into a summary
+ */
+export function aggregateUsage() {
+  const tracker = getAgentUsageTracker();
+  if (tracker.length === 0) return null;
+  const agg = {
+    inputTokens: 0,
+    outputTokens: 0,
+    cachedTokens: 0,
+    cost: 0,
+    calls: tracker.length,
+    models: {}
+  };
+  for (const { usage, model } of tracker) {
+    agg.inputTokens += usage.inputTokens || 0;
+    agg.outputTokens += usage.outputTokens || 0;
+    agg.cachedTokens += usage.cachedTokens || usage.cacheReadInputTokens || usage.cachedInputTokens || 0;
+    if (usage.cost) agg.cost += usage.cost;
+    const m = model || 'unknown';
+    if (!agg.models[m]) {
+      agg.models[m] = { inputTokens: 0, outputTokens: 0 };
+    }
+    agg.models[m].inputTokens += usage.inputTokens || 0;
+    agg.models[m].outputTokens += usage.outputTokens || 0;
+  }
+  return agg;
+}
+/**
+ * Format token count for display
+ */
+function formatTokens(count) {
+  if (count >= 1000000) return `${(count / 1000000).toFixed(1)}M`;
+  if (count >= 10000) return `${Math.round(count / 1000)}k`;
+  if (count >= 1000) return `${(count / 1000).toFixed(1)}k`;
+  return count.toString();
+}
 /**
  * Run an agent with context
  * @param {string} name - Agent name (file basename)
@@ -43,20 +113,49 @@ export async function agent(name, params = {}, options = {}) {
         console.log(`  [Agent: ${name}] Starting...`);
       }
+      // Clear usage tracker before each attempt
+      clearAgentUsageTracker();
       const result = await executeAgent(runtime, name, params, options);
       if (result && typeof result === 'object' && result._debug_prompt) {
         delete result._debug_prompt;
       }
+      // Aggregate token usage from all LLM calls in this agent
+      const usage = aggregateUsage();
       console.log(`  [Agent: ${name}] Completed`);
+      // Display token usage (unless non-verbose mode)
+      if (usage && !runtime.workflowConfig?.nonVerbose) {
+        let usageLine = `    Tokens: ${formatTokens(usage.inputTokens)} in / ${formatTokens(usage.outputTokens)} out`;
+        if (usage.cachedTokens > 0) {
+          usageLine += ` (${formatTokens(usage.cachedTokens)} cached)`;
+        }
+        if (usage.cost) {
+          usageLine += ` $${usage.cost.toFixed(4)}`;
+        }
+        console.log(usageLine);
+      }
+      // Get primary model from usage
+      const primaryModel = usage?.models ? Object.keys(usage.models)[0] : null;
       await runtime.prependHistory({
         event: 'AGENT_COMPLETED',
         agent: name,
         output: result,
-        attempts: attempt + 1
+        attempts: attempt + 1,
+        usage: usage,
+        model: primaryModel
       });
+      // Update running totals
+      if (usage && runtime.updateUsageTotals) {
+        runtime.updateUsageTotals(name, usage);
+      }
       return result;
     } catch (error) {
       lastError = error;
@@ -277,6 +376,23 @@ async function executeMDAgent(runtime, agentPath, name, params, options = {}) {
       const model = config.model || 'fast';
+      // Resolve model alias to actual model config for display
+      let resolvedModel = baseConfig.models?.[model];
+      if (!resolvedModel) {
+        // Auto-resolve unknown model (same logic as llm.js)
+        try {
+          resolvedModel = await resolveUnknownModel(model, baseConfig, runtime.workflowDir, {
+            availableCLIs: detectAvailableCLIs()
+          });
+          // Cache it for future use
+          if (!baseConfig.models) baseConfig.models = {};
+          baseConfig.models[model] = resolvedModel;
+          runtime.workflowConfig.models[model] = resolvedModel;
+        } catch {
+          resolvedModel = model; // Fallback to alias if resolution fails
+        }
+      }
       const fullPrompt = buildPrompt(context, {
         model,
         prompt: interpolatedPrompt,
@@ -284,7 +400,7 @@ async function executeMDAgent(runtime, agentPath, name, params, options = {}) {
         responseType: config.response
       });
-      await logAgentStart(runtime, name, fullPrompt);
+      await logAgentStart(runtime, name, fullPrompt, resolvedModel, model);
       console.log(`    Using model: ${model}`);
@@ -550,7 +666,7 @@ ${content}
   return response;
 }
-async function logAgentStart(runtime, name, prompt) {
+async function logAgentStart(runtime, name, prompt, model = null, modelAlias = null) {
   if (runtime._agentResumeFlags?.has(name)) {
     runtime._agentResumeFlags.delete(name);
     await runtime.prependHistory({
@@ -569,5 +685,13 @@ async function logAgentStart(runtime, name, prompt) {
     entry.prompt = prompt;
   }
+  if (model) {
+    entry.model = model;
+  }
+  if (modelAlias && modelAlias !== model) {
+    entry.modelAlias = modelAlias;
+  }
   await runtime.prependHistory(entry);
 }