superghost 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superghost",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
5
5
  "type": "module",
6
6
  "bin": {
@@ -60,10 +60,12 @@
60
60
  "@ai-sdk/openai": "^3.0.41",
61
61
  "@modelcontextprotocol/sdk": "^1.27.1",
62
62
  "@openrouter/ai-sdk-provider": "^2.2.5",
63
+ "@types/picomatch": "^4.0.2",
63
64
  "ai": "^6.0.116",
64
65
  "commander": "^14.0.3",
65
66
  "nanospinner": "^1.2.2",
66
67
  "picocolors": "^1.1.1",
68
+ "picomatch": "^4.0.3",
67
69
  "zod": "^4.3.6"
68
70
  },
69
71
  "devDependencies": {
@@ -3,6 +3,8 @@ import { z } from "zod";
3
3
  import { StepRecorder } from "../cache/step-recorder.ts";
4
4
  import type { AgentExecutionResult } from "./types.ts";
5
5
  import { buildSystemPrompt } from "./prompt.ts";
6
+ import { describeToolCall } from "../output/tool-name-map.ts";
7
+ import type { OnStepProgress } from "../output/types.ts";
6
8
 
7
9
  /**
8
10
  * Schema for structured agent output.
@@ -33,6 +35,7 @@ export async function executeAgent(config: {
33
35
  recursionLimit: number;
34
36
  globalContext?: string;
35
37
  testContext?: string;
38
+ onStepProgress?: OnStepProgress;
36
39
  }): Promise<AgentExecutionResult> {
37
40
  const recorder = new StepRecorder();
38
41
  const wrappedTools = recorder.wrapTools(config.tools);
@@ -44,6 +47,8 @@ export async function executeAgent(config: {
44
47
  config.testContext,
45
48
  );
46
49
 
50
+ let stepCounter = 0;
51
+
47
52
  const { output } = await generateText({
48
53
  model: config.model,
49
54
  tools: wrappedTools,
@@ -51,6 +56,23 @@ export async function executeAgent(config: {
51
56
  prompt: `Execute the test case: "${config.testCase}"`,
52
57
  stopWhen: stepCountIs(config.recursionLimit),
53
58
  output: Output.object({ schema: TestResultSchema }),
59
+ experimental_onToolCallFinish: config.onStepProgress
60
+ ? (event: any) => {
61
+ if (event.success) {
62
+ stepCounter++;
63
+ const input = (event.toolCall.input ?? {}) as Record<
64
+ string,
65
+ unknown
66
+ >;
67
+ config.onStepProgress!({
68
+ stepNumber: stepCounter,
69
+ toolName: event.toolCall.toolName,
70
+ input,
71
+ description: describeToolCall(event.toolCall.toolName, input),
72
+ });
73
+ }
74
+ }
75
+ : undefined,
54
76
  });
55
77
 
56
78
  if (output === null) {
@@ -1,5 +1,4 @@
1
1
  import type { CachedStep } from "../cache/types.ts";
2
- import type { ProviderName } from "./model-factory.ts";
3
2
 
4
3
  /** Result of a single AI agent execution */
5
4
  export interface AgentExecutionResult {
@@ -10,19 +9,3 @@ export interface AgentExecutionResult {
10
9
  /** Recorded tool call steps for caching */
11
10
  steps: CachedStep[];
12
11
  }
13
-
14
- /** Configuration for a single agent run */
15
- export interface AgentConfig {
16
- /** Model identifier (e.g., "claude-sonnet-4-6", "gpt-4o") */
17
- model: string;
18
- /** LLM provider */
19
- provider: ProviderName;
20
- /** Maximum number of agent steps */
21
- recursionLimit: number;
22
- /** Plain English test case description */
23
- testCase: string;
24
- /** Base URL for the application under test */
25
- baseUrl: string;
26
- /** Optional per-test context appended to system prompt */
27
- context?: string;
28
- }
@@ -1,5 +1,5 @@
1
1
  import { join } from "node:path";
2
- import { mkdir, rename } from "node:fs/promises";
2
+ import { mkdir, rename, readdir } from "node:fs/promises";
3
3
  import type { CacheEntry, CachedStep } from "./types.ts";
4
4
 
5
5
  /**
@@ -17,9 +17,30 @@ export class CacheManager {
17
17
  /**
18
18
  * Generate a deterministic 16-char hex hash key.
19
19
  * Uses Bun-native CryptoHasher for SHA-256 hashing.
20
+ *
21
+ * Normalization pipeline (v2):
22
+ * 1. Unicode NFC normalization + whitespace collapse (case-preserving)
23
+ * 2. URL normalization (lowercase hostname, strip trailing slash)
24
+ * 3. Version-prefixed input string ("v2|...")
20
25
  */
21
26
  static hashKey(testCase: string, baseUrl: string): string {
22
- const input = `${testCase}|${baseUrl}`;
27
+ // Step 1: Unicode NFC + whitespace collapse (case-preserving per user decision)
28
+ const normalizedCase = testCase.normalize("NFC").replace(/\s+/g, " ").trim();
29
+
30
+ // Step 2: URL normalization (lowercase hostname, strip trailing slash)
31
+ let normalizedUrl: string;
32
+ try {
33
+ const url = new URL(baseUrl);
34
+ // new URL() lowercases hostname and strips default ports
35
+ // Manually strip trailing slash(es)
36
+ normalizedUrl = url.href.replace(/\/+$/, "");
37
+ } catch {
38
+ // Fallback for non-URL values (defensive)
39
+ normalizedUrl = baseUrl.replace(/\/+$/, "").toLowerCase();
40
+ }
41
+
42
+ // Step 3: Version-prefixed input
43
+ const input = `v2|${normalizedCase}|${normalizedUrl}`;
23
44
  const hasher = new Bun.CryptoHasher("sha256");
24
45
  hasher.update(input);
25
46
  return hasher.digest("hex").slice(0, 16);
@@ -52,7 +73,7 @@ export class CacheManager {
52
73
  const existing = await this.load(testCase, baseUrl);
53
74
 
54
75
  const entry: CacheEntry = {
55
- version: 1,
76
+ version: 2,
56
77
  testCase,
57
78
  baseUrl,
58
79
  steps,
@@ -102,4 +123,29 @@ export class CacheManager {
102
123
  // No-op if file doesn't exist
103
124
  }
104
125
  }
126
+
127
+ /**
128
+ * Migrate v1 cache entries by deleting them.
129
+ * Scans the cache directory for JSON files with version 1 and removes them.
130
+ * v2 entries are preserved. Handles missing/empty cache directories gracefully.
131
+ */
132
+ async migrateV1Cache(): Promise<void> {
133
+ try {
134
+ const files = await readdir(this.cacheDir);
135
+ for (const file of files) {
136
+ if (!file.endsWith(".json")) continue;
137
+ try {
138
+ const filePath = join(this.cacheDir, file);
139
+ const entry = await Bun.file(filePath).json();
140
+ if (entry?.version === 1) {
141
+ await Bun.file(filePath).delete();
142
+ }
143
+ } catch {
144
+ // Skip corrupted files silently
145
+ }
146
+ }
147
+ } catch {
148
+ // Cache dir doesn't exist yet -- nothing to migrate
149
+ }
150
+ }
105
151
  }
@@ -1,4 +1,6 @@
1
1
  import type { CachedStep } from "./types.ts";
2
+ import type { OnStepProgress } from "../output/types.ts";
3
+ import { describeToolCall } from "../output/tool-name-map.ts";
2
4
 
3
5
  /** Function signature for executing a tool by name with given input */
4
6
  export type ToolExecutor = (
@@ -30,13 +32,19 @@ export class StepReplayer {
30
32
  * @param steps - The cached steps to replay
31
33
  * @returns Result indicating success or failure with details
32
34
  */
33
- async replay(steps: CachedStep[]): Promise<ReplayResult> {
35
+ async replay(steps: CachedStep[], onStepProgress?: OnStepProgress): Promise<ReplayResult> {
34
36
  for (let i = 0; i < steps.length; i++) {
35
37
  const step = steps[i];
36
38
  if (!step) continue;
37
39
 
38
40
  try {
39
41
  await this.executor(step.toolName, step.toolInput);
42
+ onStepProgress?.({
43
+ stepNumber: i + 1,
44
+ toolName: step.toolName,
45
+ input: step.toolInput,
46
+ description: describeToolCall(step.toolName, step.toolInput),
47
+ });
40
48
  } catch (error) {
41
49
  return {
42
50
  success: false,
@@ -6,7 +6,7 @@ export interface CachedStep {
6
6
 
7
7
  /** A complete cache entry with diagnostic metadata */
8
8
  export interface CacheEntry {
9
- version: 1;
9
+ version: 1 | 2;
10
10
  testCase: string;
11
11
  baseUrl: string;
12
12
  steps: CachedStep[];
package/src/cli.ts CHANGED
@@ -5,7 +5,7 @@ import pc from "picocolors";
5
5
  import { loadConfig, ConfigLoadError } from "./config/loader.ts";
6
6
  import { TestRunner } from "./runner/test-runner.ts";
7
7
  import type { ExecuteFn } from "./runner/test-runner.ts";
8
- import { ConsoleReporter } from "./output/reporter.ts";
8
+ import { ConsoleReporter, writeStderr } from "./output/reporter.ts";
9
9
  import { ProcessManager } from "./infra/process-manager.ts";
10
10
  import { setupSignalHandlers } from "./infra/signals.ts";
11
11
  import { McpManager } from "./agent/mcp-manager.ts";
@@ -20,10 +20,32 @@ import {
20
20
  } from "./agent/model-factory.ts";
21
21
  import type { ProviderName } from "./agent/model-factory.ts";
22
22
  import { executeAgent } from "./agent/agent-runner.ts";
23
+ import type { OnStepProgress } from "./output/types.ts";
24
+ import picomatch from "picomatch";
25
+ import { checkBaseUrlReachable } from "./infra/preflight.ts";
23
26
  import { isStandaloneBinary } from "./dist/paths.ts";
24
27
  import { ensureMcpDependencies } from "./dist/setup.ts";
28
+ import { animateBanner } from "./output/banner.ts";
25
29
  import pkg from "../package.json";
26
30
 
31
+ /** Print the run header and any stacked annotations to stderr */
32
+ function printRunHeader(testCount: number, totalTestCount: number | undefined, annotations: string[]): void {
33
+ let header = `\n${pc.bold("superghost")} v${pkg.version} / Running ${testCount}`;
34
+ if (totalTestCount !== undefined) {
35
+ header += ` of ${totalTestCount}`;
36
+ }
37
+ header += ` test(s)...`;
38
+ writeStderr(header);
39
+ writeStderr("");
40
+
41
+ for (const annotation of annotations) {
42
+ writeStderr(pc.dim(` ${annotation}`));
43
+ }
44
+ if (annotations.length > 0) {
45
+ writeStderr("");
46
+ }
47
+ }
48
+
27
49
  const program = new Command();
28
50
 
29
51
  program
@@ -32,7 +54,18 @@ program
32
54
  .version(pkg.version)
33
55
  .requiredOption("-c, --config <path>", "Path to YAML config file")
34
56
  .option("--headed", "Run browser in headed mode (visible browser window)")
35
- .action(async (options: { config: string; headed?: boolean }) => {
57
+ .option("--only <pattern>", "Run only tests matching glob pattern")
58
+ .option("--no-cache", "Bypass cache reads (still writes on success)")
59
+ .option("--dry-run", "List tests and validate config without executing")
60
+ .option("--verbose", "Show per-step tool call output during execution")
61
+ .exitOverride((err) => {
62
+ // Commander writes its own error message to stderr.
63
+ // Re-exit with code 2 for config-class errors (missing required option, unknown option).
64
+ if (err.exitCode !== 0) {
65
+ process.exit(2);
66
+ }
67
+ })
68
+ .action(async (options: { config: string; headed?: boolean; only?: string; cache: boolean; dryRun?: boolean; verbose?: boolean }) => {
36
69
  const pm = new ProcessManager();
37
70
  setupSignalHandlers(pm);
38
71
 
@@ -48,7 +81,7 @@ program
48
81
  if (options.headed) {
49
82
  config.headless = false;
50
83
  }
51
- const reporter = new ConsoleReporter();
84
+ const reporter = new ConsoleReporter(options.verbose ?? false);
52
85
 
53
86
  // Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
54
87
  const provider =
@@ -59,6 +92,64 @@ program
59
92
  // Validate API key at startup before any tests run
60
93
  validateApiKey(provider);
61
94
 
95
+ // Apply --only filter before any expensive operations
96
+ const totalTestCount = config.tests.length;
97
+ if (options.only) {
98
+ const allTestNames = config.tests.map((t) => t.name);
99
+ const isMatch = picomatch(options.only, { nocase: true });
100
+ config.tests = config.tests.filter((t) => isMatch(t.name));
101
+
102
+ if (config.tests.length === 0) {
103
+ const names = allTestNames.map((n) => ` - ${n}`).join("\n");
104
+ writeStderr(`${pc.red("Error:")} No tests match pattern "${options.only}"\n\nAvailable tests:\n${names}`);
105
+ setTimeout(() => process.exit(2), 100);
106
+ return;
107
+ }
108
+ }
109
+
110
+ // Dry-run: list tests with cache/AI source labels, then exit
111
+ if (options.dryRun) {
112
+ const cacheManager = new CacheManager(config.cacheDir);
113
+
114
+ // Print header with annotations
115
+ const dryRunAnnotations = ["(dry-run)"];
116
+ if (options.only) dryRunAnnotations.push(`(filtered by --only "${options.only}")`);
117
+ printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, dryRunAnnotations);
118
+
119
+ // Determine max test name length for padding
120
+ const maxNameLen = Math.max(...config.tests.map(t => t.name.length));
121
+ let cachedCount = 0;
122
+
123
+ for (let i = 0; i < config.tests.length; i++) {
124
+ const test = config.tests[i];
125
+ const baseUrl = test.baseUrl ?? config.baseUrl ?? "";
126
+ const entry = await cacheManager.load(test.case, baseUrl);
127
+ const source = entry ? "cache" : "ai";
128
+ if (entry) cachedCount++;
129
+
130
+ const paddedName = test.name.padEnd(maxNameLen);
131
+ writeStderr(` ${i + 1}. ${paddedName} (${source})`);
132
+ }
133
+
134
+ writeStderr("");
135
+ writeStderr(`${config.tests.length} tests, ${cachedCount} cached`);
136
+
137
+ setTimeout(() => process.exit(0), 100);
138
+ return;
139
+ }
140
+
141
+ // Preflight: check baseUrl reachability (only if global baseUrl configured)
142
+ if (config.baseUrl) {
143
+ try {
144
+ await checkBaseUrlReachable(config.baseUrl);
145
+ } catch {
146
+ writeStderr(`${pc.red("Error:")} baseUrl unreachable: ${config.baseUrl}`);
147
+ writeStderr(` Check that the server is running and the URL is correct.`);
148
+ setTimeout(() => process.exit(2), 100);
149
+ return;
150
+ }
151
+ }
152
+
62
153
  // Create AI model
63
154
  const model = createModel(config.model, provider);
64
155
 
@@ -72,6 +163,7 @@ program
72
163
 
73
164
  // Create cache subsystem
74
165
  const cacheManager = new CacheManager(config.cacheDir);
166
+ await cacheManager.migrateV1Cache();
75
167
  const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
76
168
  const tool = tools[toolName];
77
169
  if (!tool) throw new Error(`Tool not found: ${toolName}`);
@@ -79,6 +171,9 @@ program
79
171
  };
80
172
  const replayer = new StepReplayer(toolExecutor);
81
173
 
174
+ // Create onStepProgress callback bound to reporter
175
+ const onStepProgress: OnStepProgress = (step) => reporter.onStepProgress(step);
176
+
82
177
  // Create TestExecutor with cache-first strategy
83
178
  const executor = new TestExecutor({
84
179
  cacheManager,
@@ -88,18 +183,23 @@ program
88
183
  tools,
89
184
  config,
90
185
  globalContext: config.context,
186
+ noCache: !options.cache,
187
+ onStepProgress,
91
188
  });
92
189
 
93
190
  // Wire execute function for TestRunner
94
191
  const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
95
192
  executor.execute(testCase, baseUrl, testContext);
96
193
 
97
- console.log(
98
- `\n${pc.bold("superghost")} v${pkg.version} / Running ${config.tests.length} test(s)...\n`,
99
- );
194
+ const runAnnotations: string[] = [];
195
+ if (options.only) runAnnotations.push(`(filtered by --only "${options.only}")`);
196
+ if (!options.cache) runAnnotations.push("(cache disabled)");
197
+ if (options.verbose) runAnnotations.push("(verbose)");
198
+ printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, runAnnotations);
100
199
 
101
200
  const runner = new TestRunner(config, reporter, executeFn);
102
201
  const result = await runner.run();
202
+ result.skipped = options.only ? totalTestCount - config.tests.length : 0;
103
203
 
104
204
  await mcpManager.close();
105
205
  await pm.killAll();
@@ -112,17 +212,25 @@ program
112
212
  await pm.killAll();
113
213
 
114
214
  if (error instanceof ConfigLoadError) {
115
- Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
116
- setTimeout(() => process.exit(1), 100);
215
+ writeStderr(`${pc.red("Error:")} ${error.message}`);
216
+ setTimeout(() => process.exit(2), 100);
117
217
  return;
118
218
  }
119
219
  if (error instanceof Error && error.message.startsWith("Missing API key")) {
120
- Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
121
- setTimeout(() => process.exit(1), 100);
220
+ writeStderr(`${pc.red("Error:")} ${error.message}`);
221
+ setTimeout(() => process.exit(2), 100);
122
222
  return;
123
223
  }
124
- throw error;
224
+ const msg = error instanceof Error ? error.message : String(error);
225
+ writeStderr(`${pc.red("Unexpected error:")} ${msg}`);
226
+ setTimeout(() => process.exit(2), 100);
125
227
  }
126
228
  });
127
229
 
128
- await program.parseAsync();
230
+ (async () => {
231
+ const isHelpRequest = process.argv.includes("--help") || process.argv.includes("-h");
232
+ if (isHelpRequest) {
233
+ await animateBanner();
234
+ }
235
+ await program.parseAsync();
236
+ })();
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Preflight reachability check for baseUrl.
3
+ *
4
+ * Resolves on ANY HTTP response (even 4xx/5xx -- those prove the server is reachable).
5
+ * Throws on network-level failures: connection refused, DNS failure, timeout.
6
+ */
7
+ export async function checkBaseUrlReachable(url: string, timeoutMs = 5000): Promise<void> {
8
+ await fetch(url, {
9
+ method: "HEAD",
10
+ signal: AbortSignal.timeout(timeoutMs),
11
+ redirect: "follow",
12
+ });
13
+ }
@@ -0,0 +1,70 @@
1
+ function hslToRgb(h: number, s: number, l: number): [number, number, number] {
2
+ s /= 100;
3
+ l /= 100;
4
+ const k = (n: number) => (n + h / 30) % 12;
5
+ const a = s * Math.min(l, 1 - l);
6
+ const f = (n: number) => l - a * Math.max(-1, Math.min(k(n) - 3, Math.min(9 - k(n), 1)));
7
+ return [Math.round(f(0) * 255), Math.round(f(8) * 255), Math.round(f(4) * 255)];
8
+ }
9
+
10
+ function colorChar(char: string, hue: number): string {
11
+ const [r, g, b] = hslToRgb(hue % 360, 100, 60);
12
+ return `\x1b[38;2;${r};${g};${b}m${char}\x1b[0m`;
13
+ }
14
+
15
+ function rainbowLine(text: string, hueOffset: number): string {
16
+ const hueStep = 360 / text.length;
17
+ return text
18
+ .split("")
19
+ .map((char, i) => colorChar(char, (hueOffset + i * hueStep) % 360))
20
+ .join("");
21
+ }
22
+
23
+ const TITLE = " Super Ghost ";
24
+ const BANNER_LINES = [
25
+ ` 👻${TITLE}👻`,
26
+ ` ─────────────────────`,
27
+ ` AI-powered E2E testing`,
28
+ ];
29
+
30
+ function renderBanner(hueOffset: number): string[] {
31
+ return [
32
+ ` 👻${rainbowLine(TITLE, hueOffset)}👻`,
33
+ ` \x1b[2m─────────────────────\x1b[0m`,
34
+ ` \x1b[2mAI-powered E2E testing\x1b[0m`,
35
+ ];
36
+ }
37
+
38
+ const FRAMES = 15;
39
+ const FRAME_MS = 60;
40
+ const HUE_STEP = 24;
41
+
42
+ export async function animateBanner(): Promise<void> {
43
+ const isTTY = process.stdout.isTTY === true;
44
+
45
+ if (!isTTY) {
46
+ const lines = BANNER_LINES;
47
+ process.stdout.write(lines.join("\n") + "\n\n");
48
+ return;
49
+ }
50
+
51
+ process.stdout.write("\x1b[?25l"); // hide cursor
52
+
53
+ try {
54
+ for (let frame = 0; frame < FRAMES; frame++) {
55
+ const lines = renderBanner(frame * HUE_STEP);
56
+ if (frame > 0) {
57
+ // Move cursor up N lines to overwrite previous frame
58
+ process.stdout.write(`\x1b[${lines.length}A`);
59
+ }
60
+ process.stdout.write(lines.join("\n") + "\n");
61
+
62
+ if (frame < FRAMES - 1) {
63
+ await new Promise<void>((resolve) => setTimeout(resolve, FRAME_MS));
64
+ }
65
+ }
66
+ process.stdout.write("\n");
67
+ } finally {
68
+ process.stdout.write("\x1b[?25h"); // restore cursor
69
+ }
70
+ }
@@ -1,6 +1,6 @@
1
1
  import pc from "picocolors";
2
2
  import { createSpinner } from "nanospinner";
3
- import type { Reporter } from "./types.ts";
3
+ import type { Reporter, StepInfo } from "./types.ts";
4
4
  import type { TestResult, RunResult } from "../runner/types.ts";
5
5
 
6
6
  /**
@@ -14,16 +14,29 @@ export function formatDuration(ms: number): string {
14
14
  return `${(ms / 1000).toFixed(1)}s`;
15
15
  }
16
16
 
17
+ /** Write a line of text to stderr */
18
+ export function writeStderr(text: string): void {
19
+ Bun.write(Bun.stderr, text + "\n");
20
+ }
21
+
17
22
  /**
18
23
  * Console reporter with colored output, spinners, and box summary.
24
+ * All output routes to stderr so stdout is reserved for structured output.
19
25
  * Colors auto-disable when stdout is not a TTY (via picocolors).
20
26
  * Spinner animation auto-disables in non-TTY (via nanospinner).
21
27
  */
22
28
  export class ConsoleReporter implements Reporter {
23
29
  private spinner: ReturnType<typeof createSpinner> | null = null;
30
+ private readonly verbose: boolean;
31
+ private currentTestName: string | null = null;
32
+
33
+ constructor(verbose = false) {
34
+ this.verbose = verbose;
35
+ }
24
36
 
25
37
  /** Creates a spinner with the test name and starts it */
26
38
  onTestStart(testName: string): void {
39
+ this.currentTestName = testName;
27
40
  this.spinner = createSpinner(testName).start();
28
41
  }
29
42
 
@@ -38,35 +51,52 @@ export class ConsoleReporter implements Reporter {
38
51
  this.spinner?.error({ text: `${testName} ${duration}` });
39
52
  }
40
53
  if (selfHealed) {
41
- console.log(pc.dim(" Cache was stale re-executed and updated"));
54
+ writeStderr(pc.dim(" Cache was stale \u2014 re-executed and updated"));
42
55
  }
43
56
  this.spinner = null;
57
+ this.currentTestName = null;
58
+ }
59
+
60
+ /** Handles per-step progress during AI execution */
61
+ onStepProgress(step: StepInfo): void {
62
+ if (this.verbose) {
63
+ writeStderr(pc.dim(` Step ${step.stepNumber}: ${step.description.full}`));
64
+ } else if (this.spinner) {
65
+ let spinnerText = `${this.currentTestName} \u2014 ${step.description.full}`;
66
+ if (spinnerText.length > 60) {
67
+ spinnerText = spinnerText.slice(0, 57) + "...";
68
+ }
69
+ this.spinner.update(spinnerText);
70
+ }
44
71
  }
45
72
 
46
73
  /** Prints bordered box summary and lists failed tests with error messages */
47
74
  onRunComplete(data: RunResult): void {
48
75
  const bar = "\u2501".repeat(40);
49
- console.log("");
50
- console.log(` ${bar}`);
51
- console.log(" SuperGhost Results");
52
- console.log(` ${bar}`);
53
- console.log(` Total: ${data.results.length}`);
54
- console.log(` Passed: ${pc.green(String(data.passed))}`);
55
- console.log(
76
+ writeStderr("");
77
+ writeStderr(` ${bar}`);
78
+ writeStderr(" SuperGhost Results");
79
+ writeStderr(` ${bar}`);
80
+ writeStderr(` Total: ${data.results.length}`);
81
+ writeStderr(` Passed: ${pc.green(String(data.passed))}`);
82
+ writeStderr(
56
83
  ` Failed: ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
57
84
  );
58
- console.log(` Cached: ${data.cached}`);
59
- console.log(` Time: ${pc.dim(formatDuration(data.totalDurationMs))}`);
60
- console.log(` ${bar}`);
85
+ if (data.skipped > 0) {
86
+ writeStderr(` Skipped: ${data.skipped}`);
87
+ }
88
+ writeStderr(` Cached: ${data.cached}`);
89
+ writeStderr(` Time: ${pc.dim(formatDuration(data.totalDurationMs))}`);
90
+ writeStderr(` ${bar}`);
61
91
 
62
92
  if (data.failed > 0) {
63
- console.log("");
64
- console.log(pc.red(" Failed tests:"));
93
+ writeStderr("");
94
+ writeStderr(pc.red(" Failed tests:"));
65
95
  for (const result of data.results) {
66
96
  if (result.status === "failed") {
67
- console.log(` ${pc.red("-")} ${result.testName}`);
97
+ writeStderr(` ${pc.red("-")} ${result.testName}`);
68
98
  if (result.error) {
69
- console.log(` ${pc.dim(result.error)}`);
99
+ writeStderr(` ${pc.dim(result.error)}`);
70
100
  }
71
101
  }
72
102
  }
@@ -0,0 +1,71 @@
1
+ import type { StepDescription } from "./types.ts";
2
+
3
+ /** Maps raw MCP tool names to human-readable action names */
4
+ const PREFIX_MAP: Record<string, string> = {
5
+ browser_navigate: "Navigate",
6
+ browser_click: "Click",
7
+ browser_type: "Type",
8
+ browser_screenshot: "Screenshot",
9
+ browser_wait_for_text: "Wait for text",
10
+ browser_hover: "Hover",
11
+ browser_select_option: "Select",
12
+ browser_go_back: "Go back",
13
+ browser_go_forward: "Go forward",
14
+ browser_press_key: "Press key",
15
+ browser_drag: "Drag",
16
+ browser_resize: "Resize",
17
+ browser_handle_dialog: "Handle dialog",
18
+ browser_file_upload: "Upload file",
19
+ browser_pdf_save: "Save PDF",
20
+ browser_close: "Close",
21
+ browser_console_messages: "Console messages",
22
+ browser_install: "Install browser",
23
+ browser_tab_list: "List tabs",
24
+ browser_tab_new: "New tab",
25
+ browser_tab_select: "Select tab",
26
+ browser_tab_close: "Close tab",
27
+ browser_network_requests: "Network requests",
28
+ browser_snapshot: "Snapshot",
29
+ };
30
+
31
+ /** Maps tool names to the input field used as the key argument */
32
+ const KEY_ARG_MAP: Record<string, string> = {
33
+ browser_navigate: "url",
34
+ browser_click: "element",
35
+ browser_type: "element",
36
+ browser_hover: "element",
37
+ browser_select_option: "element",
38
+ browser_press_key: "key",
39
+ browser_wait_for_text: "text",
40
+ };
41
+
42
+ /**
43
+ * Convert a raw tool call into a human-readable description.
44
+ *
45
+ * Known tools (browser_navigate, browser_click, etc.) map to friendly names.
46
+ * Unknown tools fall back to: strip underscores, capitalize first letter.
47
+ * Key arguments are extracted based on tool type (e.g., "url" for navigate).
48
+ */
49
+ export function describeToolCall(
50
+ toolName: string,
51
+ input: Record<string, unknown>,
52
+ ): StepDescription {
53
+ // Look up human name, or derive from raw name as fallback
54
+ const action =
55
+ PREFIX_MAP[toolName] ??
56
+ toolName
57
+ .replace(/_/g, " ")
58
+ .replace(/^\w/, (c) => c.toUpperCase());
59
+
60
+ // Look up which input field is the key argument for this tool
61
+ const keyArgField = KEY_ARG_MAP[toolName];
62
+ const rawKeyArg = keyArgField ? input[keyArgField] : undefined;
63
+ const keyArg =
64
+ rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== ""
65
+ ? String(rawKeyArg)
66
+ : undefined;
67
+
68
+ const full = keyArg ? `${action} \u2192 ${keyArg}` : action;
69
+
70
+ return { action, keyArg, full };
71
+ }
@@ -1,8 +1,34 @@
1
1
  import type { RunResult, TestResult } from "../runner/types.ts";
2
2
 
3
+ /** Describes a tool call in human-readable form */
4
+ export interface StepDescription {
5
+ /** Human-readable action name, e.g. "Navigate", "Click" */
6
+ action: string;
7
+ /** Key argument value, e.g. "/login", "button.submit" */
8
+ keyArg?: string;
9
+ /** Full description string, e.g. "Navigate \u2192 /login" */
10
+ full: string;
11
+ }
12
+
13
+ /** Information about a single step (tool call) during AI execution */
14
+ export interface StepInfo {
15
+ /** 1-based step counter for the current test */
16
+ stepNumber: number;
17
+ /** Raw tool name, e.g. "browser_navigate" */
18
+ toolName: string;
19
+ /** Tool call input arguments */
20
+ input: Record<string, unknown>;
21
+ /** Human-readable description of the tool call */
22
+ description: StepDescription;
23
+ }
24
+
25
+ /** Callback invoked for each tool call during AI execution */
26
+ export type OnStepProgress = (step: StepInfo) => void;
27
+
3
28
  /** Interface for output reporting */
4
29
  export interface Reporter {
5
30
  onTestStart(testName: string): void;
6
31
  onTestComplete(result: TestResult): void;
7
32
  onRunComplete(data: RunResult): void;
33
+ onStepProgress?(step: StepInfo): void;
8
34
  }
@@ -3,6 +3,7 @@ import type { StepReplayer } from "../cache/step-replayer.ts";
3
3
  import type { AgentExecutionResult } from "../agent/types.ts";
4
4
  import type { Config } from "../config/types.ts";
5
5
  import type { TestResult } from "./types.ts";
6
+ import type { OnStepProgress } from "../output/types.ts";
6
7
 
7
8
  /** Function signature for executing a test via the AI agent */
8
9
  type ExecuteAgentFn = (config: {
@@ -13,6 +14,7 @@ type ExecuteAgentFn = (config: {
13
14
  recursionLimit: number;
14
15
  globalContext?: string;
15
16
  testContext?: string;
17
+ onStepProgress?: OnStepProgress;
16
18
  }) => Promise<AgentExecutionResult>;
17
19
 
18
20
  /**
@@ -32,6 +34,8 @@ export class TestExecutor {
32
34
  "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
33
35
  > & { context?: string };
34
36
  private readonly globalContext?: string;
37
+ private readonly noCache: boolean;
38
+ private readonly onStepProgress?: OnStepProgress;
35
39
 
36
40
  constructor(opts: {
37
41
  cacheManager: CacheManager;
@@ -44,6 +48,8 @@ export class TestExecutor {
44
48
  "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
45
49
  > & { context?: string };
46
50
  globalContext?: string;
51
+ noCache?: boolean;
52
+ onStepProgress?: OnStepProgress;
47
53
  }) {
48
54
  this.cacheManager = opts.cacheManager;
49
55
  this.replayer = opts.replayer;
@@ -52,6 +58,8 @@ export class TestExecutor {
52
58
  this.tools = opts.tools ?? {};
53
59
  this.config = opts.config;
54
60
  this.globalContext = opts.globalContext;
61
+ this.noCache = opts.noCache ?? false;
62
+ this.onStepProgress = opts.onStepProgress;
55
63
  }
56
64
 
57
65
  /** Execute a single test case with cache-first strategy */
@@ -62,24 +70,26 @@ export class TestExecutor {
62
70
  ): Promise<TestResult> {
63
71
  const start = Date.now();
64
72
 
65
- // Phase 1: Try cache replay
66
- const cached = await this.cacheManager.load(testCase, baseUrl);
67
- if (cached) {
68
- const replay = await this.replayer.replay(cached.steps);
69
- if (replay.success) {
70
- return {
71
- testName: testCase,
72
- testCase,
73
- status: "passed",
74
- source: "cache",
75
- durationMs: Date.now() - start,
76
- };
73
+ // Phase 1: Try cache replay (unless noCache)
74
+ if (!this.noCache) {
75
+ const cached = await this.cacheManager.load(testCase, baseUrl);
76
+ if (cached) {
77
+ const replay = await this.replayer.replay(cached.steps, this.onStepProgress);
78
+ if (replay.success) {
79
+ return {
80
+ testName: testCase,
81
+ testCase,
82
+ status: "passed",
83
+ source: "cache",
84
+ durationMs: Date.now() - start,
85
+ };
86
+ }
87
+ // Cache stale — fall through to AI with self-heal flag
88
+ return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
77
89
  }
78
- // Cache stale — fall through to AI with self-heal flag
79
- return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
80
90
  }
81
91
 
82
- // Phase 2: No cache — go directly to AI
92
+ // Phase 2: No cache or noCache — go directly to AI
83
93
  return this.executeWithAgent(testCase, baseUrl, start, false, testContext);
84
94
  }
85
95
 
@@ -102,6 +112,7 @@ export class TestExecutor {
102
112
  recursionLimit: this.config.recursionLimit,
103
113
  globalContext: this.globalContext,
104
114
  testContext,
115
+ onStepProgress: this.onStepProgress,
105
116
  });
106
117
 
107
118
  if (result.passed) {
@@ -62,5 +62,6 @@ function aggregateResults(
62
62
  cached: results.filter(
63
63
  (r) => r.source === "cache" && r.status === "passed",
64
64
  ).length,
65
+ skipped: 0,
65
66
  };
66
67
  }
@@ -23,4 +23,5 @@ export interface RunResult {
23
23
  passed: number;
24
24
  failed: number;
25
25
  cached: number;
26
+ skipped: number;
26
27
  }