npm - superghost - Versions diffs - 0.1.0 → 0.2.0 - Mend

superghost 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +3 -1
package/src/agent/agent-runner.ts +22 -0
package/src/agent/types.ts +0 -17
package/src/cache/cache-manager.ts +49 -3
package/src/cache/step-replayer.ts +9 -1
package/src/cache/types.ts +1 -1
package/src/cli.ts +120 -12
package/src/infra/preflight.ts +13 -0
package/src/output/banner.ts +70 -0
package/src/output/reporter.ts +46 -16
package/src/output/tool-name-map.ts +71 -0
package/src/output/types.ts +26 -0
package/src/runner/test-executor.ts +26 -15
package/src/runner/test-runner.ts +1 -0
package/src/runner/types.ts +1 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superghost",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
   "type": "module",
   "bin": {
@@ -60,10 +60,12 @@
     "@ai-sdk/openai": "^3.0.41",
     "@modelcontextprotocol/sdk": "^1.27.1",
     "@openrouter/ai-sdk-provider": "^2.2.5",
+    "@types/picomatch": "^4.0.2",
     "ai": "^6.0.116",
     "commander": "^14.0.3",
     "nanospinner": "^1.2.2",
     "picocolors": "^1.1.1",
+    "picomatch": "^4.0.3",
     "zod": "^4.3.6"
   },
   "devDependencies": {

package/src/agent/agent-runner.ts CHANGED Viewed

@@ -3,6 +3,8 @@ import { z } from "zod";
 import { StepRecorder } from "../cache/step-recorder.ts";
 import type { AgentExecutionResult } from "./types.ts";
 import { buildSystemPrompt } from "./prompt.ts";
+import { describeToolCall } from "../output/tool-name-map.ts";
+import type { OnStepProgress } from "../output/types.ts";
 /**
  * Schema for structured agent output.
@@ -33,6 +35,7 @@ export async function executeAgent(config: {
   recursionLimit: number;
   globalContext?: string;
   testContext?: string;
+  onStepProgress?: OnStepProgress;
 }): Promise<AgentExecutionResult> {
   const recorder = new StepRecorder();
   const wrappedTools = recorder.wrapTools(config.tools);
@@ -44,6 +47,8 @@ export async function executeAgent(config: {
     config.testContext,
   );
+  let stepCounter = 0;
   const { output } = await generateText({
     model: config.model,
     tools: wrappedTools,
@@ -51,6 +56,23 @@ export async function executeAgent(config: {
     prompt: `Execute the test case: "${config.testCase}"`,
     stopWhen: stepCountIs(config.recursionLimit),
     output: Output.object({ schema: TestResultSchema }),
+    experimental_onToolCallFinish: config.onStepProgress
+      ? (event: any) => {
+          if (event.success) {
+            stepCounter++;
+            const input = (event.toolCall.input ?? {}) as Record<
+              string,
+              unknown
+            >;
+            config.onStepProgress!({
+              stepNumber: stepCounter,
+              toolName: event.toolCall.toolName,
+              input,
+              description: describeToolCall(event.toolCall.toolName, input),
+            });
+          }
+        }
+      : undefined,
   });
   if (output === null) {

package/src/agent/types.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 import type { CachedStep } from "../cache/types.ts";
-import type { ProviderName } from "./model-factory.ts";
 /** Result of a single AI agent execution */
 export interface AgentExecutionResult {
@@ -10,19 +9,3 @@ export interface AgentExecutionResult {
   /** Recorded tool call steps for caching */
   steps: CachedStep[];
 }
-/** Configuration for a single agent run */
-export interface AgentConfig {
-  /** Model identifier (e.g., "claude-sonnet-4-6", "gpt-4o") */
-  model: string;
-  /** LLM provider */
-  provider: ProviderName;
-  /** Maximum number of agent steps */
-  recursionLimit: number;
-  /** Plain English test case description */
-  testCase: string;
-  /** Base URL for the application under test */
-  baseUrl: string;
-  /** Optional per-test context appended to system prompt */
-  context?: string;
-}

package/src/cache/cache-manager.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { join } from "node:path";
-import { mkdir, rename } from "node:fs/promises";
+import { mkdir, rename, readdir } from "node:fs/promises";
 import type { CacheEntry, CachedStep } from "./types.ts";
 /**
@@ -17,9 +17,30 @@ export class CacheManager {
   /**
    * Generate a deterministic 16-char hex hash key.
    * Uses Bun-native CryptoHasher for SHA-256 hashing.
+   *
+   * Normalization pipeline (v2):
+   * 1. Unicode NFC normalization + whitespace collapse (case-preserving)
+   * 2. URL normalization (lowercase hostname, strip trailing slash)
+   * 3. Version-prefixed input string ("v2|...")
    */
   static hashKey(testCase: string, baseUrl: string): string {
-    const input = `${testCase}|${baseUrl}`;
+    // Step 1: Unicode NFC + whitespace collapse (case-preserving per user decision)
+    const normalizedCase = testCase.normalize("NFC").replace(/\s+/g, " ").trim();
+    // Step 2: URL normalization (lowercase hostname, strip trailing slash)
+    let normalizedUrl: string;
+    try {
+      const url = new URL(baseUrl);
+      // new URL() lowercases hostname and strips default ports
+      // Manually strip trailing slash(es)
+      normalizedUrl = url.href.replace(/\/+$/, "");
+    } catch {
+      // Fallback for non-URL values (defensive)
+      normalizedUrl = baseUrl.replace(/\/+$/, "").toLowerCase();
+    }
+    // Step 3: Version-prefixed input
+    const input = `v2|${normalizedCase}|${normalizedUrl}`;
     const hasher = new Bun.CryptoHasher("sha256");
     hasher.update(input);
     return hasher.digest("hex").slice(0, 16);
@@ -52,7 +73,7 @@ export class CacheManager {
     const existing = await this.load(testCase, baseUrl);
     const entry: CacheEntry = {
-      version: 1,
+      version: 2,
       testCase,
       baseUrl,
       steps,
@@ -102,4 +123,29 @@ export class CacheManager {
       // No-op if file doesn't exist
     }
   }
+  /**
+   * Migrate v1 cache entries by deleting them.
+   * Scans the cache directory for JSON files with version 1 and removes them.
+   * v2 entries are preserved. Handles missing/empty cache directories gracefully.
+   */
+  async migrateV1Cache(): Promise<void> {
+    try {
+      const files = await readdir(this.cacheDir);
+      for (const file of files) {
+        if (!file.endsWith(".json")) continue;
+        try {
+          const filePath = join(this.cacheDir, file);
+          const entry = await Bun.file(filePath).json();
+          if (entry?.version === 1) {
+            await Bun.file(filePath).delete();
+          }
+        } catch {
+          // Skip corrupted files silently
+        }
+      }
+    } catch {
+      // Cache dir doesn't exist yet -- nothing to migrate
+    }
+  }
 }

package/src/cache/step-replayer.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 import type { CachedStep } from "./types.ts";
+import type { OnStepProgress } from "../output/types.ts";
+import { describeToolCall } from "../output/tool-name-map.ts";
 /** Function signature for executing a tool by name with given input */
 export type ToolExecutor = (
@@ -30,13 +32,19 @@ export class StepReplayer {
    * @param steps - The cached steps to replay
    * @returns Result indicating success or failure with details
    */
-  async replay(steps: CachedStep[]): Promise<ReplayResult> {
+  async replay(steps: CachedStep[], onStepProgress?: OnStepProgress): Promise<ReplayResult> {
     for (let i = 0; i < steps.length; i++) {
       const step = steps[i];
       if (!step) continue;
       try {
         await this.executor(step.toolName, step.toolInput);
+        onStepProgress?.({
+          stepNumber: i + 1,
+          toolName: step.toolName,
+          input: step.toolInput,
+          description: describeToolCall(step.toolName, step.toolInput),
+        });
       } catch (error) {
         return {
           success: false,

package/src/cache/types.ts CHANGED Viewed

@@ -6,7 +6,7 @@ export interface CachedStep {
 /** A complete cache entry with diagnostic metadata */
 export interface CacheEntry {
-  version: 1;
+  version: 1 | 2;
   testCase: string;
   baseUrl: string;
   steps: CachedStep[];

package/src/cli.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import pc from "picocolors";
 import { loadConfig, ConfigLoadError } from "./config/loader.ts";
 import { TestRunner } from "./runner/test-runner.ts";
 import type { ExecuteFn } from "./runner/test-runner.ts";
-import { ConsoleReporter } from "./output/reporter.ts";
+import { ConsoleReporter, writeStderr } from "./output/reporter.ts";
 import { ProcessManager } from "./infra/process-manager.ts";
 import { setupSignalHandlers } from "./infra/signals.ts";
 import { McpManager } from "./agent/mcp-manager.ts";
@@ -20,10 +20,32 @@ import {
 } from "./agent/model-factory.ts";
 import type { ProviderName } from "./agent/model-factory.ts";
 import { executeAgent } from "./agent/agent-runner.ts";
+import type { OnStepProgress } from "./output/types.ts";
+import picomatch from "picomatch";
+import { checkBaseUrlReachable } from "./infra/preflight.ts";
 import { isStandaloneBinary } from "./dist/paths.ts";
 import { ensureMcpDependencies } from "./dist/setup.ts";
+import { animateBanner } from "./output/banner.ts";
 import pkg from "../package.json";
+/** Print the run header and any stacked annotations to stderr */
+function printRunHeader(testCount: number, totalTestCount: number | undefined, annotations: string[]): void {
+  let header = `\n${pc.bold("superghost")} v${pkg.version} / Running ${testCount}`;
+  if (totalTestCount !== undefined) {
+    header += ` of ${totalTestCount}`;
+  }
+  header += ` test(s)...`;
+  writeStderr(header);
+  writeStderr("");
+  for (const annotation of annotations) {
+    writeStderr(pc.dim(`  ${annotation}`));
+  }
+  if (annotations.length > 0) {
+    writeStderr("");
+  }
+}
 const program = new Command();
 program
@@ -32,7 +54,18 @@ program
   .version(pkg.version)
   .requiredOption("-c, --config <path>", "Path to YAML config file")
   .option("--headed", "Run browser in headed mode (visible browser window)")
-  .action(async (options: { config: string; headed?: boolean }) => {
+  .option("--only <pattern>", "Run only tests matching glob pattern")
+  .option("--no-cache", "Bypass cache reads (still writes on success)")
+  .option("--dry-run", "List tests and validate config without executing")
+  .option("--verbose", "Show per-step tool call output during execution")
+  .exitOverride((err) => {
+    // Commander writes its own error message to stderr.
+    // Re-exit with code 2 for config-class errors (missing required option, unknown option).
+    if (err.exitCode !== 0) {
+      process.exit(2);
+    }
+  })
+  .action(async (options: { config: string; headed?: boolean; only?: string; cache: boolean; dryRun?: boolean; verbose?: boolean }) => {
     const pm = new ProcessManager();
     setupSignalHandlers(pm);
@@ -48,7 +81,7 @@ program
       if (options.headed) {
         config.headless = false;
       }
-      const reporter = new ConsoleReporter();
+      const reporter = new ConsoleReporter(options.verbose ?? false);
       // Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
       const provider =
@@ -59,6 +92,64 @@ program
       // Validate API key at startup before any tests run
       validateApiKey(provider);
+      // Apply --only filter before any expensive operations
+      const totalTestCount = config.tests.length;
+      if (options.only) {
+        const allTestNames = config.tests.map((t) => t.name);
+        const isMatch = picomatch(options.only, { nocase: true });
+        config.tests = config.tests.filter((t) => isMatch(t.name));
+        if (config.tests.length === 0) {
+          const names = allTestNames.map((n) => `  - ${n}`).join("\n");
+          writeStderr(`${pc.red("Error:")} No tests match pattern "${options.only}"\n\nAvailable tests:\n${names}`);
+          setTimeout(() => process.exit(2), 100);
+          return;
+        }
+      }
+      // Dry-run: list tests with cache/AI source labels, then exit
+      if (options.dryRun) {
+        const cacheManager = new CacheManager(config.cacheDir);
+        // Print header with annotations
+        const dryRunAnnotations = ["(dry-run)"];
+        if (options.only) dryRunAnnotations.push(`(filtered by --only "${options.only}")`);
+        printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, dryRunAnnotations);
+        // Determine max test name length for padding
+        const maxNameLen = Math.max(...config.tests.map(t => t.name.length));
+        let cachedCount = 0;
+        for (let i = 0; i < config.tests.length; i++) {
+          const test = config.tests[i];
+          const baseUrl = test.baseUrl ?? config.baseUrl ?? "";
+          const entry = await cacheManager.load(test.case, baseUrl);
+          const source = entry ? "cache" : "ai";
+          if (entry) cachedCount++;
+          const paddedName = test.name.padEnd(maxNameLen);
+          writeStderr(`  ${i + 1}. ${paddedName}  (${source})`);
+        }
+        writeStderr("");
+        writeStderr(`${config.tests.length} tests, ${cachedCount} cached`);
+        setTimeout(() => process.exit(0), 100);
+        return;
+      }
+      // Preflight: check baseUrl reachability (only if global baseUrl configured)
+      if (config.baseUrl) {
+        try {
+          await checkBaseUrlReachable(config.baseUrl);
+        } catch {
+          writeStderr(`${pc.red("Error:")} baseUrl unreachable: ${config.baseUrl}`);
+          writeStderr(`  Check that the server is running and the URL is correct.`);
+          setTimeout(() => process.exit(2), 100);
+          return;
+        }
+      }
       // Create AI model
       const model = createModel(config.model, provider);
@@ -72,6 +163,7 @@ program
       // Create cache subsystem
       const cacheManager = new CacheManager(config.cacheDir);
+      await cacheManager.migrateV1Cache();
       const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
         const tool = tools[toolName];
         if (!tool) throw new Error(`Tool not found: ${toolName}`);
@@ -79,6 +171,9 @@ program
       };
       const replayer = new StepReplayer(toolExecutor);
+      // Create onStepProgress callback bound to reporter
+      const onStepProgress: OnStepProgress = (step) => reporter.onStepProgress(step);
       // Create TestExecutor with cache-first strategy
       const executor = new TestExecutor({
         cacheManager,
@@ -88,18 +183,23 @@ program
         tools,
         config,
         globalContext: config.context,
+        noCache: !options.cache,
+        onStepProgress,
       });
       // Wire execute function for TestRunner
       const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
         executor.execute(testCase, baseUrl, testContext);
-      console.log(
-        `\n${pc.bold("superghost")} v${pkg.version} / Running ${config.tests.length} test(s)...\n`,
-      );
+      const runAnnotations: string[] = [];
+      if (options.only) runAnnotations.push(`(filtered by --only "${options.only}")`);
+      if (!options.cache) runAnnotations.push("(cache disabled)");
+      if (options.verbose) runAnnotations.push("(verbose)");
+      printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, runAnnotations);
       const runner = new TestRunner(config, reporter, executeFn);
       const result = await runner.run();
+      result.skipped = options.only ? totalTestCount - config.tests.length : 0;
       await mcpManager.close();
       await pm.killAll();
@@ -112,17 +212,25 @@ program
       await pm.killAll();
       if (error instanceof ConfigLoadError) {
-        Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
-        setTimeout(() => process.exit(1), 100);
+        writeStderr(`${pc.red("Error:")} ${error.message}`);
+        setTimeout(() => process.exit(2), 100);
         return;
       }
       if (error instanceof Error && error.message.startsWith("Missing API key")) {
-        Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
-        setTimeout(() => process.exit(1), 100);
+        writeStderr(`${pc.red("Error:")} ${error.message}`);
+        setTimeout(() => process.exit(2), 100);
         return;
       }
-      throw error;
+      const msg = error instanceof Error ? error.message : String(error);
+      writeStderr(`${pc.red("Unexpected error:")} ${msg}`);
+      setTimeout(() => process.exit(2), 100);
     }
   });
-await program.parseAsync();
+(async () => {
+  const isHelpRequest = process.argv.includes("--help") || process.argv.includes("-h");
+  if (isHelpRequest) {
+    await animateBanner();
+  }
+  await program.parseAsync();
+})();

package/src/infra/preflight.ts ADDED Viewed

@@ -0,0 +1,13 @@
+/**
+ * Preflight reachability check for baseUrl.
+ *
+ * Resolves on ANY HTTP response (even 4xx/5xx -- those prove the server is reachable).
+ * Throws on network-level failures: connection refused, DNS failure, timeout.
+ */
+export async function checkBaseUrlReachable(url: string, timeoutMs = 5000): Promise<void> {
+  await fetch(url, {
+    method: "HEAD",
+    signal: AbortSignal.timeout(timeoutMs),
+    redirect: "follow",
+  });
+}

package/src/output/banner.ts ADDED Viewed

@@ -0,0 +1,70 @@
+function hslToRgb(h: number, s: number, l: number): [number, number, number] {
+  s /= 100;
+  l /= 100;
+  const k = (n: number) => (n + h / 30) % 12;
+  const a = s * Math.min(l, 1 - l);
+  const f = (n: number) => l - a * Math.max(-1, Math.min(k(n) - 3, Math.min(9 - k(n), 1)));
+  return [Math.round(f(0) * 255), Math.round(f(8) * 255), Math.round(f(4) * 255)];
+}
+function colorChar(char: string, hue: number): string {
+  const [r, g, b] = hslToRgb(hue % 360, 100, 60);
+  return `\x1b[38;2;${r};${g};${b}m${char}\x1b[0m`;
+}
+function rainbowLine(text: string, hueOffset: number): string {
+  const hueStep = 360 / text.length;
+  return text
+    .split("")
+    .map((char, i) => colorChar(char, (hueOffset + i * hueStep) % 360))
+    .join("");
+}
+const TITLE = "  Super Ghost  ";
+const BANNER_LINES = [
+  `   👻${TITLE}👻`,
+  `  ─────────────────────`,
+  `  AI-powered E2E testing`,
+];
+function renderBanner(hueOffset: number): string[] {
+  return [
+    `   👻${rainbowLine(TITLE, hueOffset)}👻`,
+    `  \x1b[2m─────────────────────\x1b[0m`,
+    `  \x1b[2mAI-powered E2E testing\x1b[0m`,
+  ];
+}
+const FRAMES = 15;
+const FRAME_MS = 60;
+const HUE_STEP = 24;
+export async function animateBanner(): Promise<void> {
+  const isTTY = process.stdout.isTTY === true;
+  if (!isTTY) {
+    const lines = BANNER_LINES;
+    process.stdout.write(lines.join("\n") + "\n\n");
+    return;
+  }
+  process.stdout.write("\x1b[?25l"); // hide cursor
+  try {
+    for (let frame = 0; frame < FRAMES; frame++) {
+      const lines = renderBanner(frame * HUE_STEP);
+      if (frame > 0) {
+        // Move cursor up N lines to overwrite previous frame
+        process.stdout.write(`\x1b[${lines.length}A`);
+      }
+      process.stdout.write(lines.join("\n") + "\n");
+      if (frame < FRAMES - 1) {
+        await new Promise<void>((resolve) => setTimeout(resolve, FRAME_MS));
+      }
+    }
+    process.stdout.write("\n");
+  } finally {
+    process.stdout.write("\x1b[?25h"); // restore cursor
+  }
+}

package/src/output/reporter.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import pc from "picocolors";
 import { createSpinner } from "nanospinner";
-import type { Reporter } from "./types.ts";
+import type { Reporter, StepInfo } from "./types.ts";
 import type { TestResult, RunResult } from "../runner/types.ts";
 /**
@@ -14,16 +14,29 @@ export function formatDuration(ms: number): string {
   return `${(ms / 1000).toFixed(1)}s`;
 }
+/** Write a line of text to stderr */
+export function writeStderr(text: string): void {
+  Bun.write(Bun.stderr, text + "\n");
+}
 /**
  * Console reporter with colored output, spinners, and box summary.
+ * All output routes to stderr so stdout is reserved for structured output.
  * Colors auto-disable when stdout is not a TTY (via picocolors).
  * Spinner animation auto-disables in non-TTY (via nanospinner).
  */
 export class ConsoleReporter implements Reporter {
   private spinner: ReturnType<typeof createSpinner> | null = null;
+  private readonly verbose: boolean;
+  private currentTestName: string | null = null;
+  constructor(verbose = false) {
+    this.verbose = verbose;
+  }
   /** Creates a spinner with the test name and starts it */
   onTestStart(testName: string): void {
+    this.currentTestName = testName;
     this.spinner = createSpinner(testName).start();
   }
@@ -38,35 +51,52 @@ export class ConsoleReporter implements Reporter {
       this.spinner?.error({ text: `${testName} ${duration}` });
     }
     if (selfHealed) {
-      console.log(pc.dim("  Cache was stale — re-executed and updated"));
+      writeStderr(pc.dim("  Cache was stale \u2014 re-executed and updated"));
     }
     this.spinner = null;
+    this.currentTestName = null;
+  }
+  /** Handles per-step progress during AI execution */
+  onStepProgress(step: StepInfo): void {
+    if (this.verbose) {
+      writeStderr(pc.dim(`    Step ${step.stepNumber}: ${step.description.full}`));
+    } else if (this.spinner) {
+      let spinnerText = `${this.currentTestName} \u2014 ${step.description.full}`;
+      if (spinnerText.length > 60) {
+        spinnerText = spinnerText.slice(0, 57) + "...";
+      }
+      this.spinner.update(spinnerText);
+    }
   }
   /** Prints bordered box summary and lists failed tests with error messages */
   onRunComplete(data: RunResult): void {
     const bar = "\u2501".repeat(40);
-    console.log("");
-    console.log(`  ${bar}`);
-    console.log("    SuperGhost Results");
-    console.log(`  ${bar}`);
-    console.log(`    Total:   ${data.results.length}`);
-    console.log(`    Passed:  ${pc.green(String(data.passed))}`);
-    console.log(
+    writeStderr("");
+    writeStderr(`  ${bar}`);
+    writeStderr("    SuperGhost Results");
+    writeStderr(`  ${bar}`);
+    writeStderr(`    Total:   ${data.results.length}`);
+    writeStderr(`    Passed:  ${pc.green(String(data.passed))}`);
+    writeStderr(
       `    Failed:  ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
     );
-    console.log(`    Cached:  ${data.cached}`);
-    console.log(`    Time:    ${pc.dim(formatDuration(data.totalDurationMs))}`);
-    console.log(`  ${bar}`);
+    if (data.skipped > 0) {
+      writeStderr(`    Skipped: ${data.skipped}`);
+    }
+    writeStderr(`    Cached:  ${data.cached}`);
+    writeStderr(`    Time:    ${pc.dim(formatDuration(data.totalDurationMs))}`);
+    writeStderr(`  ${bar}`);
     if (data.failed > 0) {
-      console.log("");
-      console.log(pc.red("  Failed tests:"));
+      writeStderr("");
+      writeStderr(pc.red("  Failed tests:"));
       for (const result of data.results) {
         if (result.status === "failed") {
-          console.log(`    ${pc.red("-")} ${result.testName}`);
+          writeStderr(`    ${pc.red("-")} ${result.testName}`);
           if (result.error) {
-            console.log(`      ${pc.dim(result.error)}`);
+            writeStderr(`      ${pc.dim(result.error)}`);
           }
         }
       }

package/src/output/tool-name-map.ts ADDED Viewed

@@ -0,0 +1,71 @@
+import type { StepDescription } from "./types.ts";
+/** Maps raw MCP tool names to human-readable action names */
+const PREFIX_MAP: Record<string, string> = {
+  browser_navigate: "Navigate",
+  browser_click: "Click",
+  browser_type: "Type",
+  browser_screenshot: "Screenshot",
+  browser_wait_for_text: "Wait for text",
+  browser_hover: "Hover",
+  browser_select_option: "Select",
+  browser_go_back: "Go back",
+  browser_go_forward: "Go forward",
+  browser_press_key: "Press key",
+  browser_drag: "Drag",
+  browser_resize: "Resize",
+  browser_handle_dialog: "Handle dialog",
+  browser_file_upload: "Upload file",
+  browser_pdf_save: "Save PDF",
+  browser_close: "Close",
+  browser_console_messages: "Console messages",
+  browser_install: "Install browser",
+  browser_tab_list: "List tabs",
+  browser_tab_new: "New tab",
+  browser_tab_select: "Select tab",
+  browser_tab_close: "Close tab",
+  browser_network_requests: "Network requests",
+  browser_snapshot: "Snapshot",
+};
+/** Maps tool names to the input field used as the key argument */
+const KEY_ARG_MAP: Record<string, string> = {
+  browser_navigate: "url",
+  browser_click: "element",
+  browser_type: "element",
+  browser_hover: "element",
+  browser_select_option: "element",
+  browser_press_key: "key",
+  browser_wait_for_text: "text",
+};
+/**
+ * Convert a raw tool call into a human-readable description.
+ *
+ * Known tools (browser_navigate, browser_click, etc.) map to friendly names.
+ * Unknown tools fall back to: strip underscores, capitalize first letter.
+ * Key arguments are extracted based on tool type (e.g., "url" for navigate).
+ */
+export function describeToolCall(
+  toolName: string,
+  input: Record<string, unknown>,
+): StepDescription {
+  // Look up human name, or derive from raw name as fallback
+  const action =
+    PREFIX_MAP[toolName] ??
+    toolName
+      .replace(/_/g, " ")
+      .replace(/^\w/, (c) => c.toUpperCase());
+  // Look up which input field is the key argument for this tool
+  const keyArgField = KEY_ARG_MAP[toolName];
+  const rawKeyArg = keyArgField ? input[keyArgField] : undefined;
+  const keyArg =
+    rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== ""
+      ? String(rawKeyArg)
+      : undefined;
+  const full = keyArg ? `${action} \u2192 ${keyArg}` : action;
+  return { action, keyArg, full };
+}

package/src/output/types.ts CHANGED Viewed

@@ -1,8 +1,34 @@
 import type { RunResult, TestResult } from "../runner/types.ts";
+/** Describes a tool call in human-readable form */
+export interface StepDescription {
+  /** Human-readable action name, e.g. "Navigate", "Click" */
+  action: string;
+  /** Key argument value, e.g. "/login", "button.submit" */
+  keyArg?: string;
+  /** Full description string, e.g. "Navigate \u2192 /login" */
+  full: string;
+}
+/** Information about a single step (tool call) during AI execution */
+export interface StepInfo {
+  /** 1-based step counter for the current test */
+  stepNumber: number;
+  /** Raw tool name, e.g. "browser_navigate" */
+  toolName: string;
+  /** Tool call input arguments */
+  input: Record<string, unknown>;
+  /** Human-readable description of the tool call */
+  description: StepDescription;
+}
+/** Callback invoked for each tool call during AI execution */
+export type OnStepProgress = (step: StepInfo) => void;
 /** Interface for output reporting */
 export interface Reporter {
   onTestStart(testName: string): void;
   onTestComplete(result: TestResult): void;
   onRunComplete(data: RunResult): void;
+  onStepProgress?(step: StepInfo): void;
 }

package/src/runner/test-executor.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import type { StepReplayer } from "../cache/step-replayer.ts";
 import type { AgentExecutionResult } from "../agent/types.ts";
 import type { Config } from "../config/types.ts";
 import type { TestResult } from "./types.ts";
+import type { OnStepProgress } from "../output/types.ts";
 /** Function signature for executing a test via the AI agent */
 type ExecuteAgentFn = (config: {
@@ -13,6 +14,7 @@ type ExecuteAgentFn = (config: {
   recursionLimit: number;
   globalContext?: string;
   testContext?: string;
+  onStepProgress?: OnStepProgress;
 }) => Promise<AgentExecutionResult>;
 /**
@@ -32,6 +34,8 @@ export class TestExecutor {
     "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
   > & { context?: string };
   private readonly globalContext?: string;
+  private readonly noCache: boolean;
+  private readonly onStepProgress?: OnStepProgress;
   constructor(opts: {
     cacheManager: CacheManager;
@@ -44,6 +48,8 @@ export class TestExecutor {
       "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
     > & { context?: string };
     globalContext?: string;
+    noCache?: boolean;
+    onStepProgress?: OnStepProgress;
   }) {
     this.cacheManager = opts.cacheManager;
     this.replayer = opts.replayer;
@@ -52,6 +58,8 @@ export class TestExecutor {
     this.tools = opts.tools ?? {};
     this.config = opts.config;
     this.globalContext = opts.globalContext;
+    this.noCache = opts.noCache ?? false;
+    this.onStepProgress = opts.onStepProgress;
   }
   /** Execute a single test case with cache-first strategy */
@@ -62,24 +70,26 @@ export class TestExecutor {
   ): Promise<TestResult> {
     const start = Date.now();
-    // Phase 1: Try cache replay
-    const cached = await this.cacheManager.load(testCase, baseUrl);
-    if (cached) {
-      const replay = await this.replayer.replay(cached.steps);
-      if (replay.success) {
-        return {
-          testName: testCase,
-          testCase,
-          status: "passed",
-          source: "cache",
-          durationMs: Date.now() - start,
-        };
+    // Phase 1: Try cache replay (unless noCache)
+    if (!this.noCache) {
+      const cached = await this.cacheManager.load(testCase, baseUrl);
+      if (cached) {
+        const replay = await this.replayer.replay(cached.steps, this.onStepProgress);
+        if (replay.success) {
+          return {
+            testName: testCase,
+            testCase,
+            status: "passed",
+            source: "cache",
+            durationMs: Date.now() - start,
+          };
+        }
+        // Cache stale — fall through to AI with self-heal flag
+        return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
       }
-      // Cache stale — fall through to AI with self-heal flag
-      return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
     }
-    // Phase 2: No cache — go directly to AI
+    // Phase 2: No cache or noCache — go directly to AI
     return this.executeWithAgent(testCase, baseUrl, start, false, testContext);
   }
@@ -102,6 +112,7 @@ export class TestExecutor {
         recursionLimit: this.config.recursionLimit,
         globalContext: this.globalContext,
         testContext,
+        onStepProgress: this.onStepProgress,
       });
       if (result.passed) {

package/src/runner/test-runner.ts CHANGED Viewed

@@ -62,5 +62,6 @@ function aggregateResults(
     cached: results.filter(
       (r) => r.source === "cache" && r.status === "passed",
     ).length,
+    skipped: 0,
   };
 }

package/src/runner/types.ts CHANGED Viewed

@@ -23,4 +23,5 @@ export interface RunResult {
   passed: number;
   failed: number;
   cached: number;
+  skipped: number;
 }