superghost 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superghost",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
5
5
  "type": "module",
6
6
  "bin": {
@@ -14,8 +14,10 @@
14
14
  "scripts": {
15
15
  "test": "bun test",
16
16
  "typecheck": "bunx tsc --noEmit",
17
+ "lint": "bunx biome check .",
18
+ "lint:fix": "bunx biome check --write .",
17
19
  "build:binary": "bun run scripts/build-binaries.ts",
18
- "prepublishOnly": "bun test && bunx tsc --noEmit",
20
+ "prepublishOnly": "bun run lint && bun test && bunx tsc --noEmit",
19
21
  "e2e": "bun run e2e/run-e2e.ts",
20
22
  "e2e:smoke": "bun run e2e/run-e2e.ts smoke",
21
23
  "e2e:browser": "bun run e2e/run-e2e.ts browser",
@@ -69,6 +71,7 @@
69
71
  "zod": "^4.3.6"
70
72
  },
71
73
  "devDependencies": {
74
+ "@biomejs/biome": "2.4.6",
72
75
  "@types/bun": "^1.3.10",
73
76
  "@types/react": "^19.0.0",
74
77
  "@types/react-dom": "^19.0.0",
@@ -1,10 +1,11 @@
1
1
  import { generateText, Output, stepCountIs } from "ai";
2
2
  import { z } from "zod";
3
+
3
4
  import { StepRecorder } from "../cache/step-recorder.ts";
4
- import type { AgentExecutionResult } from "./types.ts";
5
- import { buildSystemPrompt } from "./prompt.ts";
6
5
  import { describeToolCall } from "../output/tool-name-map.ts";
7
- import type { OnStepProgress } from "../output/types.ts";
6
+ import { type OnStepProgress } from "../output/types.ts";
7
+ import { buildSystemPrompt } from "./prompt.ts";
8
+ import { type AgentExecutionResult } from "./types.ts";
8
9
 
9
10
  /**
10
11
  * Schema for structured agent output.
@@ -12,9 +13,7 @@ import type { OnStepProgress } from "../output/types.ts";
12
13
  */
13
14
  const TestResultSchema = z.object({
14
15
  passed: z.boolean().describe("Whether the test case passed"),
15
- message: z
16
- .string()
17
- .describe("Brief diagnostic: what happened and what the page showed"),
16
+ message: z.string().describe("Brief diagnostic: what happened and what the page showed"),
18
17
  });
19
18
 
20
19
  /**
@@ -40,12 +39,7 @@ export async function executeAgent(config: {
40
39
  const recorder = new StepRecorder();
41
40
  const wrappedTools = recorder.wrapTools(config.tools);
42
41
 
43
- const systemPrompt = buildSystemPrompt(
44
- config.testCase,
45
- config.baseUrl,
46
- config.globalContext,
47
- config.testContext,
48
- );
42
+ const systemPrompt = buildSystemPrompt(config.testCase, config.baseUrl, config.globalContext, config.testContext);
49
43
 
50
44
  let stepCounter = 0;
51
45
 
@@ -60,11 +54,8 @@ export async function executeAgent(config: {
60
54
  ? (event: any) => {
61
55
  if (event.success) {
62
56
  stepCounter++;
63
- const input = (event.toolCall.input ?? {}) as Record<
64
- string,
65
- unknown
66
- >;
67
- config.onStepProgress!({
57
+ const input = (event.toolCall.input ?? {}) as Record<string, unknown>;
58
+ config.onStepProgress?.({
68
59
  stepNumber: stepCounter,
69
60
  toolName: event.toolCall.toolName,
70
61
  input,
@@ -1,6 +1,7 @@
1
1
  import { createMCPClient } from "@ai-sdk/mcp";
2
2
  import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
3
- import type { Config } from "../config/types.ts";
3
+
4
+ import { type Config } from "../config/types.ts";
4
5
  import { getMcpCommand } from "../dist/paths.ts";
5
6
 
6
7
  /**
@@ -12,8 +13,7 @@ import { getMcpCommand } from "../dist/paths.ts";
12
13
  * regardless of test type.
13
14
  */
14
15
  export class McpManager {
15
- private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null =
16
- null;
16
+ private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
17
17
  private curlClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
18
18
 
19
19
  constructor(private readonly config: Pick<Config, "browser" | "headless">) {}
@@ -27,11 +27,7 @@ export class McpManager {
27
27
  const playwrightCmd = getMcpCommand("@playwright/mcp");
28
28
  const curlCmd = getMcpCommand("@calibress/curl-mcp");
29
29
 
30
- const playwrightArgs = [
31
- ...playwrightCmd.args,
32
- "--isolated",
33
- `--browser=${this.config.browser}`,
34
- ];
30
+ const playwrightArgs = [...playwrightCmd.args, "--isolated", `--browser=${this.config.browser}`];
35
31
 
36
32
  if (this.config.headless) {
37
33
  playwrightArgs.splice(playwrightCmd.args.length, 0, "--headless");
@@ -57,8 +53,8 @@ export class McpManager {
57
53
  * Provides ALL tools to the agent regardless of test type.
58
54
  */
59
55
  async getTools(): Promise<Record<string, any>> {
60
- const playwrightTools = await this.playwrightClient!.tools();
61
- const curlTools = await this.curlClient!.tools();
56
+ const playwrightTools = await this.playwrightClient?.tools();
57
+ const curlTools = await this.curlClient?.tools();
62
58
  return { ...playwrightTools, ...curlTools };
63
59
  }
64
60
 
@@ -68,10 +64,7 @@ export class McpManager {
68
64
  * even if one fails to close.
69
65
  */
70
66
  async close(): Promise<void> {
71
- await Promise.allSettled([
72
- this.playwrightClient?.close(),
73
- this.curlClient?.close(),
74
- ]);
67
+ await Promise.allSettled([this.playwrightClient?.close(), this.curlClient?.close()]);
75
68
  this.playwrightClient = null;
76
69
  this.curlClient = null;
77
70
  }
@@ -1,6 +1,6 @@
1
1
  import { anthropic } from "@ai-sdk/anthropic";
2
- import { openai } from "@ai-sdk/openai";
3
2
  import { google } from "@ai-sdk/google";
3
+ import { openai } from "@ai-sdk/openai";
4
4
  import { createOpenRouter } from "@openrouter/ai-sdk-provider";
5
5
 
6
6
  /** Supported LLM provider names */
@@ -1,4 +1,4 @@
1
- import type { CachedStep } from "../cache/types.ts";
1
+ import { type CachedStep } from "../cache/types.ts";
2
2
 
3
3
  /** Result of a single AI agent execution */
4
4
  export interface AgentExecutionResult {
@@ -1,6 +1,7 @@
1
+ import { mkdir, readdir, rename } from "node:fs/promises";
1
2
  import { join } from "node:path";
2
- import { mkdir, rename, readdir } from "node:fs/promises";
3
- import type { CacheEntry, CachedStep } from "./types.ts";
3
+
4
+ import { type CachedStep, type CacheEntry } from "./types.ts";
4
5
 
5
6
  /**
6
7
  * Manages file-based cache entries for test step recordings.
@@ -103,7 +104,7 @@ export class CacheManager {
103
104
  const filePath = join(this.cacheDir, `${hash}.json`);
104
105
 
105
106
  try {
106
- return await Bun.file(filePath).json() as CacheEntry;
107
+ return (await Bun.file(filePath).json()) as CacheEntry;
107
108
  } catch {
108
109
  return null;
109
110
  }
@@ -1,4 +1,4 @@
1
- import type { CachedStep } from "./types.ts";
1
+ import { type CachedStep } from "./types.ts";
2
2
 
3
3
  /**
4
4
  * Records MCP tool calls as CachedStep entries.
@@ -1,12 +1,9 @@
1
- import type { CachedStep } from "./types.ts";
2
- import type { OnStepProgress } from "../output/types.ts";
3
1
  import { describeToolCall } from "../output/tool-name-map.ts";
2
+ import { type OnStepProgress } from "../output/types.ts";
3
+ import { type CachedStep } from "./types.ts";
4
4
 
5
5
  /** Function signature for executing a tool by name with given input */
6
- export type ToolExecutor = (
7
- toolName: string,
8
- toolInput: Record<string, unknown>,
9
- ) => Promise<string>;
6
+ export type ToolExecutor = (toolName: string, toolInput: Record<string, unknown>) => Promise<string>;
10
7
 
11
8
  /** Result of replaying cached steps */
12
9
  export interface ReplayResult {
package/src/cli.ts CHANGED
@@ -2,31 +2,31 @@
2
2
 
3
3
  import { Command } from "commander";
4
4
  import pc from "picocolors";
5
- import { loadConfig, ConfigLoadError } from "./config/loader.ts";
6
- import { TestRunner } from "./runner/test-runner.ts";
7
- import type { ExecuteFn } from "./runner/test-runner.ts";
8
- import { ConsoleReporter, writeStderr } from "./output/reporter.ts";
9
- import { ProcessManager } from "./infra/process-manager.ts";
10
- import { setupSignalHandlers } from "./infra/signals.ts";
5
+ import picomatch from "picomatch";
6
+
7
+ import pkg from "../package.json";
8
+ import { executeAgent } from "./agent/agent-runner.ts";
11
9
  import { McpManager } from "./agent/mcp-manager.ts";
10
+ import { createModel, inferProvider, type ProviderName, validateApiKey } from "./agent/model-factory.ts";
12
11
  import { CacheManager } from "./cache/cache-manager.ts";
13
- import { StepReplayer } from "./cache/step-replayer.ts";
14
- import type { ToolExecutor } from "./cache/step-replayer.ts";
15
- import { TestExecutor } from "./runner/test-executor.ts";
16
- import {
17
- inferProvider,
18
- validateApiKey,
19
- createModel,
20
- } from "./agent/model-factory.ts";
21
- import type { ProviderName } from "./agent/model-factory.ts";
22
- import { executeAgent } from "./agent/agent-runner.ts";
23
- import type { OnStepProgress } from "./output/types.ts";
24
- import picomatch from "picomatch";
25
- import { checkBaseUrlReachable } from "./infra/preflight.ts";
12
+ import { StepReplayer, type ToolExecutor } from "./cache/step-replayer.ts";
13
+ import { ConfigLoadError, loadConfig } from "./config/loader.ts";
26
14
  import { isStandaloneBinary } from "./dist/paths.ts";
27
15
  import { ensureMcpDependencies } from "./dist/setup.ts";
16
+ import { checkBaseUrlReachable } from "./infra/preflight.ts";
17
+ import { ProcessManager } from "./infra/process-manager.ts";
18
+ import { setupSignalHandlers } from "./infra/signals.ts";
28
19
  import { animateBanner } from "./output/banner.ts";
29
- import pkg from "../package.json";
20
+ import {
21
+ formatJsonDryRun,
22
+ formatJsonError,
23
+ formatJsonOutput,
24
+ type JsonOutputMetadata,
25
+ } from "./output/json-formatter.ts";
26
+ import { ConsoleReporter, writeStderr } from "./output/reporter.ts";
27
+ import { type OnStepProgress } from "./output/types.ts";
28
+ import { TestExecutor } from "./runner/test-executor.ts";
29
+ import { type ExecuteFn, TestRunner } from "./runner/test-runner.ts";
30
30
 
31
31
  /** Print the run header and any stacked annotations to stderr */
32
32
  function printRunHeader(testCount: number, totalTestCount: number | undefined, annotations: string[]): void {
@@ -48,6 +48,11 @@ function printRunHeader(testCount: number, totalTestCount: number | undefined, a
48
48
 
49
49
  const program = new Command();
50
50
 
51
+ program.configureOutput({
52
+ writeOut: (str) => writeStderr(str.trimEnd()),
53
+ writeErr: (str) => writeStderr(str.trimEnd()),
54
+ });
55
+
51
56
  program
52
57
  .name("superghost")
53
58
  .description("AI-powered end-to-end browser and API testing")
@@ -58,6 +63,7 @@ program
58
63
  .option("--no-cache", "Bypass cache reads (still writes on success)")
59
64
  .option("--dry-run", "List tests and validate config without executing")
60
65
  .option("--verbose", "Show per-step tool call output during execution")
66
+ .option("--output <format>", "Output format (json)")
61
67
  .exitOverride((err) => {
62
68
  // Commander writes its own error message to stderr.
63
69
  // Re-exit with code 2 for config-class errors (missing required option, unknown option).
@@ -65,167 +71,234 @@ program
65
71
  process.exit(2);
66
72
  }
67
73
  })
68
- .action(async (options: { config: string; headed?: boolean; only?: string; cache: boolean; dryRun?: boolean; verbose?: boolean }) => {
69
- const pm = new ProcessManager();
70
- setupSignalHandlers(pm);
71
-
72
- // Auto-install MCP dependencies for standalone binary on first run
73
- if (isStandaloneBinary()) {
74
- await ensureMcpDependencies();
75
- }
74
+ .action(
75
+ async (options: {
76
+ config: string;
77
+ headed?: boolean;
78
+ only?: string;
79
+ cache: boolean;
80
+ dryRun?: boolean;
81
+ verbose?: boolean;
82
+ output?: string;
83
+ }) => {
84
+ const pm = new ProcessManager();
85
+ setupSignalHandlers(pm);
76
86
 
77
- let mcpManager: McpManager | null = null;
87
+ // Validate --output format early
88
+ if (options.output && options.output !== "json") {
89
+ writeStderr(`${pc.red("Error:")} Unknown output format '${options.output}'. Supported: json`);
90
+ setTimeout(() => process.exit(2), 100);
91
+ return;
92
+ }
78
93
 
79
- try {
80
- const config = await loadConfig(options.config);
81
- if (options.headed) {
82
- config.headless = false;
94
+ // Auto-install MCP dependencies for standalone binary on first run
95
+ if (isStandaloneBinary()) {
96
+ await ensureMcpDependencies();
83
97
  }
84
- const reporter = new ConsoleReporter(options.verbose ?? false);
85
-
86
- // Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
87
- const provider =
88
- config.modelProvider === "anthropic"
89
- ? inferProvider(config.model)
90
- : (config.modelProvider as ProviderName);
91
-
92
- // Validate API key at startup before any tests run
93
- validateApiKey(provider);
94
-
95
- // Apply --only filter before any expensive operations
96
- const totalTestCount = config.tests.length;
97
- if (options.only) {
98
- const allTestNames = config.tests.map((t) => t.name);
99
- const isMatch = picomatch(options.only, { nocase: true });
100
- config.tests = config.tests.filter((t) => isMatch(t.name));
101
-
102
- if (config.tests.length === 0) {
103
- const names = allTestNames.map((n) => ` - ${n}`).join("\n");
104
- writeStderr(`${pc.red("Error:")} No tests match pattern "${options.only}"\n\nAvailable tests:\n${names}`);
105
- setTimeout(() => process.exit(2), 100);
98
+
99
+ let mcpManager: McpManager | null = null;
100
+
101
+ try {
102
+ const config = await loadConfig(options.config);
103
+ if (options.headed) {
104
+ config.headless = false;
105
+ }
106
+ const reporter = new ConsoleReporter(options.verbose ?? false);
107
+
108
+ // Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
109
+ const provider =
110
+ config.modelProvider === "anthropic" ? inferProvider(config.model) : (config.modelProvider as ProviderName);
111
+
112
+ // Validate API key at startup before any tests run
113
+ validateApiKey(provider);
114
+
115
+ // Apply --only filter before any expensive operations
116
+ const totalTestCount = config.tests.length;
117
+ if (options.only) {
118
+ const allTestNames = config.tests.map((t) => t.name);
119
+ const isMatch = picomatch(options.only, { nocase: true });
120
+ config.tests = config.tests.filter((t) => isMatch(t.name));
121
+
122
+ if (config.tests.length === 0) {
123
+ const names = allTestNames.map((n) => ` - ${n}`).join("\n");
124
+ writeStderr(`${pc.red("Error:")} No tests match pattern "${options.only}"\n\nAvailable tests:\n${names}`);
125
+ setTimeout(() => process.exit(2), 100);
126
+ return;
127
+ }
128
+ }
129
+
130
+ // Dry-run: list tests with cache/AI source labels, then exit
131
+ if (options.dryRun) {
132
+ const cacheManager = new CacheManager(config.cacheDir);
133
+
134
+ // Print header with annotations
135
+ const dryRunAnnotations = ["(dry-run)"];
136
+ if (options.only) dryRunAnnotations.push(`(filtered by --only "${options.only}")`);
137
+ printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, dryRunAnnotations);
138
+
139
+ // Determine max test name length for padding
140
+ const maxNameLen = Math.max(...config.tests.map((t) => t.name.length));
141
+ let cachedCount = 0;
142
+ const dryRunTests: Array<{ name: string; case: string; source: "cache" | "ai" }> = [];
143
+
144
+ for (let i = 0; i < config.tests.length; i++) {
145
+ const test = config.tests[i];
146
+ const baseUrl = test.baseUrl ?? config.baseUrl ?? "";
147
+ const entry = await cacheManager.load(test.case, baseUrl);
148
+ const source: "cache" | "ai" = entry ? "cache" : "ai";
149
+ if (entry) cachedCount++;
150
+ dryRunTests.push({ name: test.name, case: test.case, source });
151
+
152
+ const paddedName = test.name.padEnd(maxNameLen);
153
+ writeStderr(` ${i + 1}. ${paddedName} (${source})`);
154
+ }
155
+
156
+ writeStderr("");
157
+ writeStderr(`${config.tests.length} tests, ${cachedCount} cached`);
158
+
159
+ // Write JSON to stdout when --output json is active
160
+ if (options.output === "json") {
161
+ const metadata: JsonOutputMetadata = {
162
+ model: config.model,
163
+ provider,
164
+ configFile: options.config,
165
+ baseUrl: config.baseUrl,
166
+ timestamp: new Date().toISOString(),
167
+ ...(options.only
168
+ ? { filter: { pattern: options.only, matched: config.tests.length, total: totalTestCount } }
169
+ : {}),
170
+ };
171
+ const testList = dryRunTests.map((t) => ({
172
+ name: t.name,
173
+ case: t.case,
174
+ source: t.source,
175
+ }));
176
+ const json = formatJsonDryRun(testList, metadata, pkg.version);
177
+ process.stdout.write(`${json}\n`);
178
+ }
179
+
180
+ setTimeout(() => process.exit(0), 100);
106
181
  return;
107
182
  }
108
- }
109
183
 
110
- // Dry-run: list tests with cache/AI source labels, then exit
111
- if (options.dryRun) {
184
+ // Preflight: check baseUrl reachability (only if global baseUrl configured)
185
+ if (config.baseUrl) {
186
+ try {
187
+ await checkBaseUrlReachable(config.baseUrl);
188
+ } catch {
189
+ writeStderr(`${pc.red("Error:")} baseUrl unreachable: ${config.baseUrl}`);
190
+ writeStderr(` Check that the server is running and the URL is correct.`);
191
+ setTimeout(() => process.exit(2), 100);
192
+ return;
193
+ }
194
+ }
195
+
196
+ // Create AI model
197
+ const model = createModel(config.model, provider);
198
+
199
+ // Initialize MCP servers (shared across test suite, not per-test)
200
+ mcpManager = new McpManager({
201
+ browser: config.browser,
202
+ headless: config.headless,
203
+ });
204
+ await mcpManager.initialize();
205
+ const tools = await mcpManager.getTools();
206
+
207
+ // Create cache subsystem
112
208
  const cacheManager = new CacheManager(config.cacheDir);
209
+ await cacheManager.migrateV1Cache();
210
+ const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
211
+ const tool = tools[toolName];
212
+ if (!tool) throw new Error(`Tool not found: ${toolName}`);
213
+ return await tool.execute(toolInput);
214
+ };
215
+ const replayer = new StepReplayer(toolExecutor);
113
216
 
114
- // Print header with annotations
115
- const dryRunAnnotations = ["(dry-run)"];
116
- if (options.only) dryRunAnnotations.push(`(filtered by --only "${options.only}")`);
117
- printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, dryRunAnnotations);
217
+ // Create onStepProgress callback bound to reporter
218
+ const onStepProgress: OnStepProgress = (step) => reporter.onStepProgress(step);
118
219
 
119
- // Determine max test name length for padding
120
- const maxNameLen = Math.max(...config.tests.map(t => t.name.length));
121
- let cachedCount = 0;
220
+ // Create TestExecutor with cache-first strategy
221
+ const executor = new TestExecutor({
222
+ cacheManager,
223
+ replayer,
224
+ executeAgentFn: executeAgent,
225
+ model,
226
+ tools,
227
+ config,
228
+ globalContext: config.context,
229
+ noCache: !options.cache,
230
+ onStepProgress,
231
+ });
122
232
 
123
- for (let i = 0; i < config.tests.length; i++) {
124
- const test = config.tests[i];
125
- const baseUrl = test.baseUrl ?? config.baseUrl ?? "";
126
- const entry = await cacheManager.load(test.case, baseUrl);
127
- const source = entry ? "cache" : "ai";
128
- if (entry) cachedCount++;
233
+ // Wire execute function for TestRunner
234
+ const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
235
+ executor.execute(testCase, baseUrl, testContext);
129
236
 
130
- const paddedName = test.name.padEnd(maxNameLen);
131
- writeStderr(` ${i + 1}. ${paddedName} (${source})`);
132
- }
237
+ const runAnnotations: string[] = [];
238
+ if (options.only) runAnnotations.push(`(filtered by --only "${options.only}")`);
239
+ if (!options.cache) runAnnotations.push("(cache disabled)");
240
+ if (options.verbose) runAnnotations.push("(verbose)");
241
+ printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, runAnnotations);
133
242
 
134
- writeStderr("");
135
- writeStderr(`${config.tests.length} tests, ${cachedCount} cached`);
243
+ const runner = new TestRunner(config, reporter, executeFn);
244
+ const result = await runner.run();
245
+ result.skipped = options.only ? totalTestCount - config.tests.length : 0;
136
246
 
137
- setTimeout(() => process.exit(0), 100);
138
- return;
139
- }
247
+ await mcpManager.close();
248
+ await pm.killAll();
249
+ const code = result.failed > 0 ? 1 : 0;
140
250
 
141
- // Preflight: check baseUrl reachability (only if global baseUrl configured)
142
- if (config.baseUrl) {
143
- try {
144
- await checkBaseUrlReachable(config.baseUrl);
145
- } catch {
146
- writeStderr(`${pc.red("Error:")} baseUrl unreachable: ${config.baseUrl}`);
147
- writeStderr(` Check that the server is running and the URL is correct.`);
148
- setTimeout(() => process.exit(2), 100);
149
- return;
251
+ // Write JSON to stdout when --output json is active
252
+ if (options.output === "json") {
253
+ const metadata: JsonOutputMetadata = {
254
+ model: config.model,
255
+ provider,
256
+ configFile: options.config,
257
+ baseUrl: config.baseUrl,
258
+ timestamp: new Date().toISOString(),
259
+ ...(options.only
260
+ ? { filter: { pattern: options.only, matched: config.tests.length, total: totalTestCount } }
261
+ : {}),
262
+ };
263
+ const json = formatJsonOutput(result, metadata, pkg.version, code);
264
+ process.stdout.write(`${json}\n`);
150
265
  }
151
- }
152
266
 
153
- // Create AI model
154
- const model = createModel(config.model, provider);
155
-
156
- // Initialize MCP servers (shared across test suite, not per-test)
157
- mcpManager = new McpManager({
158
- browser: config.browser,
159
- headless: config.headless,
160
- });
161
- await mcpManager.initialize();
162
- const tools = await mcpManager.getTools();
163
-
164
- // Create cache subsystem
165
- const cacheManager = new CacheManager(config.cacheDir);
166
- await cacheManager.migrateV1Cache();
167
- const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
168
- const tool = tools[toolName];
169
- if (!tool) throw new Error(`Tool not found: ${toolName}`);
170
- return await tool.execute(toolInput);
171
- };
172
- const replayer = new StepReplayer(toolExecutor);
173
-
174
- // Create onStepProgress callback bound to reporter
175
- const onStepProgress: OnStepProgress = (step) => reporter.onStepProgress(step);
176
-
177
- // Create TestExecutor with cache-first strategy
178
- const executor = new TestExecutor({
179
- cacheManager,
180
- replayer,
181
- executeAgentFn: executeAgent,
182
- model,
183
- tools,
184
- config,
185
- globalContext: config.context,
186
- noCache: !options.cache,
187
- onStepProgress,
188
- });
189
-
190
- // Wire execute function for TestRunner
191
- const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
192
- executor.execute(testCase, baseUrl, testContext);
193
-
194
- const runAnnotations: string[] = [];
195
- if (options.only) runAnnotations.push(`(filtered by --only "${options.only}")`);
196
- if (!options.cache) runAnnotations.push("(cache disabled)");
197
- if (options.verbose) runAnnotations.push("(verbose)");
198
- printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, runAnnotations);
199
-
200
- const runner = new TestRunner(config, reporter, executeFn);
201
- const result = await runner.run();
202
- result.skipped = options.only ? totalTestCount - config.tests.length : 0;
203
-
204
- await mcpManager.close();
205
- await pm.killAll();
206
- const code = result.failed > 0 ? 1 : 0;
207
- setTimeout(() => process.exit(code), 100);
208
- } catch (error) {
209
- if (mcpManager) {
210
- await mcpManager.close().catch(() => {});
211
- }
212
- await pm.killAll();
267
+ setTimeout(() => process.exit(code), 100);
268
+ } catch (error) {
269
+ if (mcpManager) {
270
+ await mcpManager.close().catch(() => {});
271
+ }
272
+ await pm.killAll();
213
273
 
214
- if (error instanceof ConfigLoadError) {
215
- writeStderr(`${pc.red("Error:")} ${error.message}`);
216
- setTimeout(() => process.exit(2), 100);
217
- return;
218
- }
219
- if (error instanceof Error && error.message.startsWith("Missing API key")) {
220
- writeStderr(`${pc.red("Error:")} ${error.message}`);
274
+ if (error instanceof ConfigLoadError) {
275
+ writeStderr(`${pc.red("Error:")} ${error.message}`);
276
+ if (options.output === "json") {
277
+ const json = formatJsonError(error.message, pkg.version, { configFile: options.config });
278
+ process.stdout.write(`${json}\n`);
279
+ }
280
+ setTimeout(() => process.exit(2), 100);
281
+ return;
282
+ }
283
+ if (error instanceof Error && error.message.startsWith("Missing API key")) {
284
+ writeStderr(`${pc.red("Error:")} ${error.message}`);
285
+ if (options.output === "json") {
286
+ const json = formatJsonError(error.message, pkg.version, { configFile: options.config });
287
+ process.stdout.write(`${json}\n`);
288
+ }
289
+ setTimeout(() => process.exit(2), 100);
290
+ return;
291
+ }
292
+ const msg = error instanceof Error ? error.message : String(error);
293
+ writeStderr(`${pc.red("Unexpected error:")} ${msg}`);
294
+ if (options.output === "json") {
295
+ const json = formatJsonError(msg, pkg.version, { configFile: options.config });
296
+ process.stdout.write(`${json}\n`);
297
+ }
221
298
  setTimeout(() => process.exit(2), 100);
222
- return;
223
299
  }
224
- const msg = error instanceof Error ? error.message : String(error);
225
- writeStderr(`${pc.red("Unexpected error:")} ${msg}`);
226
- setTimeout(() => process.exit(2), 100);
227
- }
228
- });
300
+ },
301
+ );
229
302
 
230
303
  (async () => {
231
304
  const isHelpRequest = process.argv.includes("--help") || process.argv.includes("-h");
@@ -1,6 +1,7 @@
1
1
  import { YAML } from "bun";
2
+
2
3
  import { ConfigSchema } from "./schema.ts";
3
- import type { Config } from "./types.ts";
4
+ import { type Config } from "./types.ts";
4
5
 
5
6
  /** Error thrown when config loading or validation fails */
6
7
  export class ConfigLoadError extends Error {
@@ -39,8 +40,7 @@ export async function loadConfig(filePath: string): Promise<Config> {
39
40
  );
40
41
  }
41
42
  throw new ConfigLoadError(
42
- `Cannot read config file: ${filePath}\n` +
43
- ` ${error instanceof Error ? error.message : String(error)}`,
43
+ `Cannot read config file: ${filePath}\n` + ` ${error instanceof Error ? error.message : String(error)}`,
44
44
  error,
45
45
  );
46
46
  }
@@ -50,10 +50,7 @@ export async function loadConfig(filePath: string): Promise<Config> {
50
50
  try {
51
51
  raw = YAML.parse(content);
52
52
  } catch (error) {
53
- throw new ConfigLoadError(
54
- `Invalid YAML syntax: ${error instanceof Error ? error.message : String(error)}`,
55
- error,
56
- );
53
+ throw new ConfigLoadError(`Invalid YAML syntax: ${error instanceof Error ? error.message : String(error)}`, error);
57
54
  }
58
55
 
59
56
  // Layer 3: Zod validation
@@ -61,15 +58,10 @@ export async function loadConfig(filePath: string): Promise<Config> {
61
58
  const result = ConfigSchema.safeParse(raw);
62
59
  if (!result.success) {
63
60
  const issues = result.error.issues
64
- .map(
65
- (issue, i) =>
66
- ` ${i + 1}. ${issue.path.join(".")}: ${issue.message}`,
67
- )
61
+ .map((issue, i) => ` ${i + 1}. ${issue.path.join(".")}: ${issue.message}`)
68
62
  .join("\n");
69
63
  const count = result.error.issues.length;
70
- throw new ConfigLoadError(
71
- `Invalid config (${count} issue${count > 1 ? "s" : ""})\n${issues}`,
72
- );
64
+ throw new ConfigLoadError(`Invalid config (${count} issue${count > 1 ? "s" : ""})\n${issues}`);
73
65
  }
74
66
 
75
67
  return result.data;
@@ -1,5 +1,6 @@
1
- import type { z } from "zod";
2
- import type { ConfigSchema, TestCaseSchema } from "./schema.ts";
1
+ import { type z } from "zod";
2
+
3
+ import { type ConfigSchema, type TestCaseSchema } from "./schema.ts";
3
4
 
4
5
  /** A single test case parsed from the config YAML */
5
6
  export type TestCase = z.infer<typeof TestCaseSchema>;
@@ -1,4 +1,4 @@
1
- import type { Subprocess } from "bun";
1
+ import { type Subprocess } from "bun";
2
2
 
3
3
  /**
4
4
  * Tracks spawned subprocesses and ensures cleanup on shutdown.
@@ -25,7 +25,11 @@ export class ProcessManager {
25
25
  proc.kill("SIGKILL");
26
26
  }
27
27
  }, 5000);
28
- try { await proc.exited; } finally { clearTimeout(timeout); }
28
+ try {
29
+ await proc.exited;
30
+ } finally {
31
+ clearTimeout(timeout);
32
+ }
29
33
  }
30
34
  });
31
35
  await Promise.allSettled(kills);
@@ -1,4 +1,4 @@
1
- import type { ProcessManager } from "./process-manager.ts";
1
+ import { type ProcessManager } from "./process-manager.ts";
2
2
 
3
3
  /**
4
4
  * Register SIGINT and SIGTERM handlers that clean up all tracked subprocesses.
@@ -21,11 +21,7 @@ function rainbowLine(text: string, hueOffset: number): string {
21
21
  }
22
22
 
23
23
  const TITLE = " Super Ghost ";
24
- const BANNER_LINES = [
25
- ` 👻${TITLE}👻`,
26
- ` ─────────────────────`,
27
- ` AI-powered E2E testing`,
28
- ];
24
+ const BANNER_LINES = [` 👻${TITLE}👻`, ` ─────────────────────`, ` AI-powered E2E testing`];
29
25
 
30
26
  function renderBanner(hueOffset: number): string[] {
31
27
  return [
@@ -40,31 +36,31 @@ const FRAME_MS = 60;
40
36
  const HUE_STEP = 24;
41
37
 
42
38
  export async function animateBanner(): Promise<void> {
43
- const isTTY = process.stdout.isTTY === true;
39
+ const isTTY = process.stderr.isTTY === true;
44
40
 
45
41
  if (!isTTY) {
46
42
  const lines = BANNER_LINES;
47
- process.stdout.write(lines.join("\n") + "\n\n");
43
+ process.stderr.write(`${lines.join("\n")}\n\n`);
48
44
  return;
49
45
  }
50
46
 
51
- process.stdout.write("\x1b[?25l"); // hide cursor
47
+ process.stderr.write("\x1b[?25l"); // hide cursor
52
48
 
53
49
  try {
54
50
  for (let frame = 0; frame < FRAMES; frame++) {
55
51
  const lines = renderBanner(frame * HUE_STEP);
56
52
  if (frame > 0) {
57
53
  // Move cursor up N lines to overwrite previous frame
58
- process.stdout.write(`\x1b[${lines.length}A`);
54
+ process.stderr.write(`\x1b[${lines.length}A`);
59
55
  }
60
- process.stdout.write(lines.join("\n") + "\n");
56
+ process.stderr.write(`${lines.join("\n")}\n`);
61
57
 
62
58
  if (frame < FRAMES - 1) {
63
59
  await new Promise<void>((resolve) => setTimeout(resolve, FRAME_MS));
64
60
  }
65
61
  }
66
- process.stdout.write("\n");
62
+ process.stderr.write("\n");
67
63
  } finally {
68
- process.stdout.write("\x1b[?25h"); // restore cursor
64
+ process.stderr.write("\x1b[?25h"); // restore cursor
69
65
  }
70
66
  }
@@ -0,0 +1,150 @@
1
+ import { type RunResult } from "../runner/types.ts";
2
+
3
+ /** Metadata about the test run environment and configuration */
4
+ export interface JsonOutputMetadata {
5
+ model: string;
6
+ provider: string;
7
+ configFile: string;
8
+ baseUrl: string | undefined;
9
+ timestamp: string;
10
+ filter?: {
11
+ pattern: string;
12
+ matched: number;
13
+ total: number;
14
+ };
15
+ }
16
+
17
+ /** Top-level JSON output structure for all output modes */
18
+ export interface JsonOutput {
19
+ version: string;
20
+ success: boolean;
21
+ exitCode: number;
22
+ dryRun?: boolean;
23
+ error?: string;
24
+ metadata: JsonOutputMetadata;
25
+ summary: {
26
+ passed: number;
27
+ failed: number;
28
+ cached: number;
29
+ skipped: number;
30
+ total?: number;
31
+ totalDurationMs?: number;
32
+ };
33
+ tests: Array<{
34
+ testName: string;
35
+ testCase: string;
36
+ status?: string;
37
+ source: string;
38
+ durationMs?: number;
39
+ selfHealed?: boolean;
40
+ error?: string;
41
+ }>;
42
+ }
43
+
44
+ /**
45
+ * Format a completed run result as JSON.
46
+ * Only includes selfHealed when true, only includes error when present.
47
+ */
48
+ export function formatJsonOutput(
49
+ runResult: RunResult,
50
+ metadata: JsonOutputMetadata,
51
+ version: string,
52
+ exitCode: number,
53
+ ): string {
54
+ const output: JsonOutput = {
55
+ version,
56
+ success: exitCode === 0,
57
+ exitCode,
58
+ metadata,
59
+ summary: {
60
+ passed: runResult.passed,
61
+ failed: runResult.failed,
62
+ cached: runResult.cached,
63
+ skipped: runResult.skipped,
64
+ totalDurationMs: runResult.totalDurationMs,
65
+ },
66
+ tests: runResult.results.map((r) => {
67
+ const entry: Record<string, unknown> = {
68
+ testName: r.testName,
69
+ testCase: r.testCase,
70
+ status: r.status,
71
+ source: r.source,
72
+ durationMs: r.durationMs,
73
+ };
74
+ if (r.selfHealed === true) {
75
+ entry.selfHealed = true;
76
+ }
77
+ if (r.error !== undefined) {
78
+ entry.error = r.error;
79
+ }
80
+ return entry as JsonOutput["tests"][number];
81
+ }),
82
+ };
83
+
84
+ return JSON.stringify(output, null, 2);
85
+ }
86
+
87
+ /**
88
+ * Format a dry-run test listing as JSON.
89
+ * Produces dryRun: true, exitCode: 0, success: true.
90
+ */
91
+ export function formatJsonDryRun(
92
+ tests: Array<{ name: string; case: string; source: "cache" | "ai" }>,
93
+ metadata: JsonOutputMetadata,
94
+ version: string,
95
+ ): string {
96
+ const cachedCount = tests.filter((t) => t.source === "cache").length;
97
+
98
+ const output: JsonOutput = {
99
+ version,
100
+ success: true,
101
+ exitCode: 0,
102
+ dryRun: true,
103
+ metadata,
104
+ summary: {
105
+ passed: 0,
106
+ failed: 0,
107
+ cached: cachedCount,
108
+ skipped: 0,
109
+ total: tests.length,
110
+ },
111
+ tests: tests.map((t) => ({
112
+ testName: t.name,
113
+ testCase: t.case,
114
+ source: t.source,
115
+ })),
116
+ };
117
+
118
+ return JSON.stringify(output, null, 2);
119
+ }
120
+
121
+ /**
122
+ * Format an error condition as JSON.
123
+ * Produces success: false, exitCode: 2, with the error message.
124
+ */
125
+ export function formatJsonError(errorMessage: string, version: string, metadata: Partial<JsonOutputMetadata>): string {
126
+ const fullMetadata: JsonOutputMetadata = {
127
+ model: metadata.model ?? "",
128
+ provider: metadata.provider ?? "",
129
+ configFile: metadata.configFile ?? "",
130
+ baseUrl: metadata.baseUrl,
131
+ timestamp: metadata.timestamp ?? new Date().toISOString(),
132
+ };
133
+
134
+ const output: JsonOutput = {
135
+ version,
136
+ success: false,
137
+ exitCode: 2,
138
+ error: errorMessage,
139
+ metadata: fullMetadata,
140
+ summary: {
141
+ passed: 0,
142
+ failed: 0,
143
+ cached: 0,
144
+ skipped: 0,
145
+ },
146
+ tests: [],
147
+ };
148
+
149
+ return JSON.stringify(output, null, 2);
150
+ }
@@ -1,7 +1,8 @@
1
- import pc from "picocolors";
2
1
  import { createSpinner } from "nanospinner";
3
- import type { Reporter, StepInfo } from "./types.ts";
4
- import type { TestResult, RunResult } from "../runner/types.ts";
2
+ import pc from "picocolors";
3
+
4
+ import { type RunResult, type TestResult } from "../runner/types.ts";
5
+ import { type Reporter, type StepInfo } from "./types.ts";
5
6
 
6
7
  /**
7
8
  * Format milliseconds as a human-readable duration string.
@@ -16,7 +17,7 @@ export function formatDuration(ms: number): string {
16
17
 
17
18
  /** Write a line of text to stderr */
18
19
  export function writeStderr(text: string): void {
19
- Bun.write(Bun.stderr, text + "\n");
20
+ Bun.write(Bun.stderr, `${text}\n`);
20
21
  }
21
22
 
22
23
  /**
@@ -64,7 +65,7 @@ export class ConsoleReporter implements Reporter {
64
65
  } else if (this.spinner) {
65
66
  let spinnerText = `${this.currentTestName} \u2014 ${step.description.full}`;
66
67
  if (spinnerText.length > 60) {
67
- spinnerText = spinnerText.slice(0, 57) + "...";
68
+ spinnerText = `${spinnerText.slice(0, 57)}...`;
68
69
  }
69
70
  this.spinner.update(spinnerText);
70
71
  }
@@ -79,9 +80,7 @@ export class ConsoleReporter implements Reporter {
79
80
  writeStderr(` ${bar}`);
80
81
  writeStderr(` Total: ${data.results.length}`);
81
82
  writeStderr(` Passed: ${pc.green(String(data.passed))}`);
82
- writeStderr(
83
- ` Failed: ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
84
- );
83
+ writeStderr(` Failed: ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`);
85
84
  if (data.skipped > 0) {
86
85
  writeStderr(` Skipped: ${data.skipped}`);
87
86
  }
@@ -1,4 +1,4 @@
1
- import type { StepDescription } from "./types.ts";
1
+ import { type StepDescription } from "./types.ts";
2
2
 
3
3
  /** Maps raw MCP tool names to human-readable action names */
4
4
  const PREFIX_MAP: Record<string, string> = {
@@ -46,24 +46,15 @@ const KEY_ARG_MAP: Record<string, string> = {
46
46
  * Unknown tools fall back to: strip underscores, capitalize first letter.
47
47
  * Key arguments are extracted based on tool type (e.g., "url" for navigate).
48
48
  */
49
- export function describeToolCall(
50
- toolName: string,
51
- input: Record<string, unknown>,
52
- ): StepDescription {
49
+ export function describeToolCall(toolName: string, input: Record<string, unknown>): StepDescription {
53
50
  // Look up human name, or derive from raw name as fallback
54
- const action =
55
- PREFIX_MAP[toolName] ??
56
- toolName
57
- .replace(/_/g, " ")
58
- .replace(/^\w/, (c) => c.toUpperCase());
51
+ const action = PREFIX_MAP[toolName] ?? toolName.replace(/_/g, " ").replace(/^\w/, (c) => c.toUpperCase());
59
52
 
60
53
  // Look up which input field is the key argument for this tool
61
54
  const keyArgField = KEY_ARG_MAP[toolName];
62
55
  const rawKeyArg = keyArgField ? input[keyArgField] : undefined;
63
56
  const keyArg =
64
- rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== ""
65
- ? String(rawKeyArg)
66
- : undefined;
57
+ rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== "" ? String(rawKeyArg) : undefined;
67
58
 
68
59
  const full = keyArg ? `${action} \u2192 ${keyArg}` : action;
69
60
 
@@ -1,4 +1,4 @@
1
- import type { RunResult, TestResult } from "../runner/types.ts";
1
+ import { type RunResult, type TestResult } from "../runner/types.ts";
2
2
 
3
3
  /** Describes a tool call in human-readable form */
4
4
  export interface StepDescription {
@@ -1,9 +1,9 @@
1
- import type { CacheManager } from "../cache/cache-manager.ts";
2
- import type { StepReplayer } from "../cache/step-replayer.ts";
3
- import type { AgentExecutionResult } from "../agent/types.ts";
4
- import type { Config } from "../config/types.ts";
5
- import type { TestResult } from "./types.ts";
6
- import type { OnStepProgress } from "../output/types.ts";
1
+ import { type AgentExecutionResult } from "../agent/types.ts";
2
+ import { type CacheManager } from "../cache/cache-manager.ts";
3
+ import { type StepReplayer } from "../cache/step-replayer.ts";
4
+ import { type Config } from "../config/types.ts";
5
+ import { type OnStepProgress } from "../output/types.ts";
6
+ import { type TestResult } from "./types.ts";
7
7
 
8
8
  /** Function signature for executing a test via the AI agent */
9
9
  type ExecuteAgentFn = (config: {
@@ -29,10 +29,9 @@ export class TestExecutor {
29
29
  private readonly executeAgentFn: ExecuteAgentFn;
30
30
  private readonly model: any;
31
31
  private readonly tools: Record<string, any>;
32
- private readonly config: Pick<
33
- Config,
34
- "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
35
- > & { context?: string };
32
+ private readonly config: Pick<Config, "maxAttempts" | "recursionLimit" | "model" | "modelProvider"> & {
33
+ context?: string;
34
+ };
36
35
  private readonly globalContext?: string;
37
36
  private readonly noCache: boolean;
38
37
  private readonly onStepProgress?: OnStepProgress;
@@ -43,10 +42,7 @@ export class TestExecutor {
43
42
  executeAgentFn: ExecuteAgentFn;
44
43
  model?: any;
45
44
  tools?: Record<string, any>;
46
- config: Pick<
47
- Config,
48
- "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
49
- > & { context?: string };
45
+ config: Pick<Config, "maxAttempts" | "recursionLimit" | "model" | "modelProvider"> & { context?: string };
50
46
  globalContext?: string;
51
47
  noCache?: boolean;
52
48
  onStepProgress?: OnStepProgress;
@@ -63,11 +59,7 @@ export class TestExecutor {
63
59
  }
64
60
 
65
61
  /** Execute a single test case with cache-first strategy */
66
- async execute(
67
- testCase: string,
68
- baseUrl: string,
69
- testContext?: string,
70
- ): Promise<TestResult> {
62
+ async execute(testCase: string, baseUrl: string, testContext?: string): Promise<TestResult> {
71
63
  const start = Date.now();
72
64
 
73
65
  // Phase 1: Try cache replay (unless noCache)
@@ -1,13 +1,9 @@
1
- import type { Config } from "../config/types.ts";
2
- import type { Reporter } from "../output/types.ts";
3
- import type { TestResult, RunResult } from "./types.ts";
1
+ import { type Config } from "../config/types.ts";
2
+ import { type Reporter } from "../output/types.ts";
3
+ import { type RunResult, type TestResult } from "./types.ts";
4
4
 
5
5
  /** Function signature for executing a single test case */
6
- export type ExecuteFn = (
7
- testCase: string,
8
- baseUrl: string,
9
- testContext?: string,
10
- ) => Promise<TestResult>;
6
+ export type ExecuteFn = (testCase: string, baseUrl: string, testContext?: string) => Promise<TestResult>;
11
7
 
12
8
  /**
13
9
  * Orchestrates sequential execution of all test cases.
@@ -50,18 +46,13 @@ export class TestRunner {
50
46
  }
51
47
 
52
48
  /** Aggregate individual test results into a run summary */
53
- function aggregateResults(
54
- results: TestResult[],
55
- totalDurationMs: number,
56
- ): RunResult {
49
+ function aggregateResults(results: TestResult[], totalDurationMs: number): RunResult {
57
50
  return {
58
51
  results,
59
52
  totalDurationMs,
60
53
  passed: results.filter((r) => r.status === "passed").length,
61
54
  failed: results.filter((r) => r.status === "failed").length,
62
- cached: results.filter(
63
- (r) => r.source === "cache" && r.status === "passed",
64
- ).length,
55
+ cached: results.filter((r) => r.source === "cache" && r.status === "passed").length,
65
56
  skipped: 0,
66
57
  };
67
58
  }