superghost 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ import type { CachedStep } from "./types.ts";
2
+
3
+ /**
4
+ * Records MCP tool calls as CachedStep entries.
5
+ * Used during AI agent execution to capture the sequence of actions
6
+ * that led to a successful test, enabling later cache replay.
7
+ *
8
+ * Only records successful tool executions -- failed calls are not cached.
9
+ */
10
+ export class StepRecorder {
11
+ private steps: CachedStep[] = [];
12
+
13
+ /** Record a tool invocation manually */
14
+ record(toolName: string, toolInput: Record<string, unknown>): void {
15
+ this.steps.push({ toolName, toolInput });
16
+ }
17
+
18
+ /** Get a copy of all recorded steps */
19
+ getSteps(): CachedStep[] {
20
+ return [...this.steps];
21
+ }
22
+
23
+ /** Clear all recorded steps */
24
+ clear(): void {
25
+ this.steps = [];
26
+ }
27
+
28
+ /**
29
+ * Wrap a tools object to automatically record successful calls.
30
+ * Each tool's execute function is replaced with a version that:
31
+ * 1. Calls the original execute
32
+ * 2. Records the step on success
33
+ * 3. Re-throws on failure (does NOT record failed steps)
34
+ */
35
+ wrapTools(tools: Record<string, any>): Record<string, any> {
36
+ return Object.fromEntries(
37
+ Object.entries(tools).map(([name, tool]) => [
38
+ name,
39
+ {
40
+ ...tool,
41
+ execute: async (input: Record<string, unknown>) => {
42
+ const result = await tool.execute(input);
43
+ this.record(name, input);
44
+ return result;
45
+ },
46
+ },
47
+ ]),
48
+ );
49
+ }
50
+ }
@@ -0,0 +1,51 @@
1
+ import type { CachedStep } from "./types.ts";
2
+
3
+ /** Function signature for executing a tool by name with given input */
4
+ export type ToolExecutor = (
5
+ toolName: string,
6
+ toolInput: Record<string, unknown>,
7
+ ) => Promise<string>;
8
+
9
+ /** Result of replaying cached steps */
10
+ export interface ReplayResult {
11
+ success: boolean;
12
+ failedStep?: number;
13
+ error?: string;
14
+ }
15
+
16
+ /**
17
+ * Replays a sequence of cached MCP tool steps.
18
+ * Executes each step in order using the provided executor.
19
+ * Stops on the first failure, returning the failed step index and error.
20
+ */
21
+ export class StepReplayer {
22
+ private readonly executor: ToolExecutor;
23
+
24
+ constructor(executor: ToolExecutor) {
25
+ this.executor = executor;
26
+ }
27
+
28
+ /**
29
+ * Replay all cached steps in sequence.
30
+ * @param steps - The cached steps to replay
31
+ * @returns Result indicating success or failure with details
32
+ */
33
+ async replay(steps: CachedStep[]): Promise<ReplayResult> {
34
+ for (let i = 0; i < steps.length; i++) {
35
+ const step = steps[i];
36
+ if (!step) continue;
37
+
38
+ try {
39
+ await this.executor(step.toolName, step.toolInput);
40
+ } catch (error) {
41
+ return {
42
+ success: false,
43
+ failedStep: i,
44
+ error: error instanceof Error ? error.message : String(error),
45
+ };
46
+ }
47
+ }
48
+
49
+ return { success: true };
50
+ }
51
+ }
@@ -0,0 +1,27 @@
1
+ /** A single recorded tool call step for caching */
2
+ export interface CachedStep {
3
+ toolName: string;
4
+ toolInput: Record<string, unknown>;
5
+ }
6
+
7
+ /** A complete cache entry with diagnostic metadata */
8
+ export interface CacheEntry {
9
+ version: 1;
10
+ testCase: string;
11
+ baseUrl: string;
12
+ steps: CachedStep[];
13
+ /** Model used for AI execution */
14
+ model: string;
15
+ /** Provider name (anthropic, openai, google, openrouter) */
16
+ provider: string;
17
+ /** Number of steps recorded */
18
+ stepCount: number;
19
+ /** AI verdict message */
20
+ aiMessage: string;
21
+ /** Duration of AI execution in milliseconds */
22
+ durationMs: number;
23
+ /** ISO timestamp when cache was first created */
24
+ createdAt: string;
25
+ /** ISO timestamp when cache was last updated */
26
+ updatedAt: string;
27
+ }
package/src/cli.ts ADDED
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env bun
2
+
3
+ import { Command } from "commander";
4
+ import pc from "picocolors";
5
+ import { loadConfig, ConfigLoadError } from "./config/loader.ts";
6
+ import { TestRunner } from "./runner/test-runner.ts";
7
+ import type { ExecuteFn } from "./runner/test-runner.ts";
8
+ import { ConsoleReporter } from "./output/reporter.ts";
9
+ import { ProcessManager } from "./infra/process-manager.ts";
10
+ import { setupSignalHandlers } from "./infra/signals.ts";
11
+ import { McpManager } from "./agent/mcp-manager.ts";
12
+ import { CacheManager } from "./cache/cache-manager.ts";
13
+ import { StepReplayer } from "./cache/step-replayer.ts";
14
+ import type { ToolExecutor } from "./cache/step-replayer.ts";
15
+ import { TestExecutor } from "./runner/test-executor.ts";
16
+ import {
17
+ inferProvider,
18
+ validateApiKey,
19
+ createModel,
20
+ } from "./agent/model-factory.ts";
21
+ import type { ProviderName } from "./agent/model-factory.ts";
22
+ import { executeAgent } from "./agent/agent-runner.ts";
23
+ import { isStandaloneBinary } from "./dist/paths.ts";
24
+ import { ensureMcpDependencies } from "./dist/setup.ts";
25
+ import pkg from "../package.json";
26
+
27
+ const program = new Command();
28
+
29
+ program
30
+ .name("superghost")
31
+ .description("AI-powered end-to-end browser and API testing")
32
+ .version(pkg.version)
33
+ .requiredOption("-c, --config <path>", "Path to YAML config file")
34
+ .option("--headed", "Run browser in headed mode (visible browser window)")
35
+ .action(async (options: { config: string; headed?: boolean }) => {
36
+ const pm = new ProcessManager();
37
+ setupSignalHandlers(pm);
38
+
39
+ // Auto-install MCP dependencies for standalone binary on first run
40
+ if (isStandaloneBinary()) {
41
+ await ensureMcpDependencies();
42
+ }
43
+
44
+ let mcpManager: McpManager | null = null;
45
+
46
+ try {
47
+ const config = await loadConfig(options.config);
48
+ if (options.headed) {
49
+ config.headless = false;
50
+ }
51
+ const reporter = new ConsoleReporter();
52
+
53
+ // Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
54
+ const provider =
55
+ config.modelProvider === "anthropic"
56
+ ? inferProvider(config.model)
57
+ : (config.modelProvider as ProviderName);
58
+
59
+ // Validate API key at startup before any tests run
60
+ validateApiKey(provider);
61
+
62
+ // Create AI model
63
+ const model = createModel(config.model, provider);
64
+
65
+ // Initialize MCP servers (shared across test suite, not per-test)
66
+ mcpManager = new McpManager({
67
+ browser: config.browser,
68
+ headless: config.headless,
69
+ });
70
+ await mcpManager.initialize();
71
+ const tools = await mcpManager.getTools();
72
+
73
+ // Create cache subsystem
74
+ const cacheManager = new CacheManager(config.cacheDir);
75
+ const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
76
+ const tool = tools[toolName];
77
+ if (!tool) throw new Error(`Tool not found: ${toolName}`);
78
+ return await tool.execute(toolInput);
79
+ };
80
+ const replayer = new StepReplayer(toolExecutor);
81
+
82
+ // Create TestExecutor with cache-first strategy
83
+ const executor = new TestExecutor({
84
+ cacheManager,
85
+ replayer,
86
+ executeAgentFn: executeAgent,
87
+ model,
88
+ tools,
89
+ config,
90
+ globalContext: config.context,
91
+ });
92
+
93
+ // Wire execute function for TestRunner
94
+ const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
95
+ executor.execute(testCase, baseUrl, testContext);
96
+
97
+ console.log(
98
+ `\n${pc.bold("superghost")} v${pkg.version} / Running ${config.tests.length} test(s)...\n`,
99
+ );
100
+
101
+ const runner = new TestRunner(config, reporter, executeFn);
102
+ const result = await runner.run();
103
+
104
+ await mcpManager.close();
105
+ await pm.killAll();
106
+ const code = result.failed > 0 ? 1 : 0;
107
+ setTimeout(() => process.exit(code), 100);
108
+ } catch (error) {
109
+ if (mcpManager) {
110
+ await mcpManager.close().catch(() => {});
111
+ }
112
+ await pm.killAll();
113
+
114
+ if (error instanceof ConfigLoadError) {
115
+ Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
116
+ setTimeout(() => process.exit(1), 100);
117
+ return;
118
+ }
119
+ if (error instanceof Error && error.message.startsWith("Missing API key")) {
120
+ Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
121
+ setTimeout(() => process.exit(1), 100);
122
+ return;
123
+ }
124
+ throw error;
125
+ }
126
+ });
127
+
128
+ await program.parseAsync();
@@ -0,0 +1,76 @@
1
+ import { YAML } from "bun";
2
+ import { ConfigSchema } from "./schema.ts";
3
+ import type { Config } from "./types.ts";
4
+
5
+ /** Error thrown when config loading or validation fails */
6
+ export class ConfigLoadError extends Error {
7
+ constructor(message: string, cause?: unknown) {
8
+ super(message);
9
+ this.name = "ConfigLoadError";
10
+ if (cause) this.cause = cause;
11
+ }
12
+ }
13
+
14
+ /**
15
+ * Load and validate a YAML config file.
16
+ *
17
+ * Three-layer error handling:
18
+ * 1. File existence check (actionable hint)
19
+ * 2. YAML parsing (syntax error with Bun's built-in parser)
20
+ * 3. Zod validation (all issues numbered with field paths)
21
+ *
22
+ * @param filePath - Absolute or relative path to the YAML config
23
+ * @returns Validated Config object with defaults applied
24
+ * @throws ConfigLoadError if file is missing, malformed, or fails validation
25
+ */
26
+ export async function loadConfig(filePath: string): Promise<Config> {
27
+ // Layer 1: Read file (produces actionable error if missing)
28
+ const file = Bun.file(filePath);
29
+ let content: string;
30
+ try {
31
+ content = await file.text();
32
+ } catch (error) {
33
+ const code = (error as NodeJS.ErrnoException).code;
34
+ if (code === "ENOENT") {
35
+ throw new ConfigLoadError(
36
+ `Config file not found: ${filePath}\n` +
37
+ ` Create a config file or specify a different path:\n` +
38
+ ` superghost --config <path>`,
39
+ );
40
+ }
41
+ throw new ConfigLoadError(
42
+ `Cannot read config file: ${filePath}\n` +
43
+ ` ${error instanceof Error ? error.message : String(error)}`,
44
+ error,
45
+ );
46
+ }
47
+
48
+ // Layer 2: YAML parsing
49
+ let raw: unknown;
50
+ try {
51
+ raw = YAML.parse(content);
52
+ } catch (error) {
53
+ throw new ConfigLoadError(
54
+ `Invalid YAML syntax: ${error instanceof Error ? error.message : String(error)}`,
55
+ error,
56
+ );
57
+ }
58
+
59
+ // Layer 3: Zod validation
60
+ // IMPORTANT: Check result.success boolean, NOT instanceof Error (Zod v4 pitfall)
61
+ const result = ConfigSchema.safeParse(raw);
62
+ if (!result.success) {
63
+ const issues = result.error.issues
64
+ .map(
65
+ (issue, i) =>
66
+ ` ${i + 1}. ${issue.path.join(".")}: ${issue.message}`,
67
+ )
68
+ .join("\n");
69
+ const count = result.error.issues.length;
70
+ throw new ConfigLoadError(
71
+ `Invalid config (${count} issue${count > 1 ? "s" : ""})\n${issues}`,
72
+ );
73
+ }
74
+
75
+ return result.data;
76
+ }
@@ -0,0 +1,26 @@
1
+ import { z } from "zod";
2
+
3
+ /** Schema for a single test case in the configuration */
4
+ export const TestCaseSchema = z.object({
5
+ name: z.string().min(1, "Test name cannot be empty"),
6
+ case: z.string().min(1, "Test case description cannot be empty"),
7
+ baseUrl: z.string().url().optional(),
8
+ timeout: z.number().positive().optional(),
9
+ type: z.enum(["browser", "api"]).default("browser"),
10
+ context: z.string().optional(),
11
+ });
12
+
13
+ /** Schema for the full SuperGhost configuration file */
14
+ export const ConfigSchema = z.object({
15
+ baseUrl: z.string().url().optional(),
16
+ browser: z.enum(["chromium", "firefox", "webkit"]).default("chromium"),
17
+ headless: z.boolean().default(true),
18
+ timeout: z.number().positive().default(60_000),
19
+ maxAttempts: z.number().int().positive().max(10).default(3),
20
+ model: z.string().default("claude-sonnet-4-6"),
21
+ modelProvider: z.string().default("anthropic"),
22
+ cacheDir: z.string().default(".superghost-cache"),
23
+ recursionLimit: z.number().int().positive().default(500),
24
+ context: z.string().optional(),
25
+ tests: z.array(TestCaseSchema).min(1, "At least one test case is required"),
26
+ });
@@ -0,0 +1,8 @@
1
+ import type { z } from "zod";
2
+ import type { ConfigSchema, TestCaseSchema } from "./schema.ts";
3
+
4
+ /** A single test case parsed from the config YAML */
5
+ export type TestCase = z.infer<typeof TestCaseSchema>;
6
+
7
+ /** The full configuration object after validation */
8
+ export type Config = z.infer<typeof ConfigSchema>;
@@ -0,0 +1,52 @@
1
+ import { join } from "node:path";
2
+ import { homedir } from "node:os";
3
+
4
+ /** Home directory for standalone binary dependencies */
5
+ export const SUPERGHOST_HOME = join(homedir(), ".superghost");
6
+
7
+ /** Node modules path within the superghost home directory */
8
+ export const MCP_NODE_MODULES = join(SUPERGHOST_HOME, "node_modules");
9
+
10
+ /**
11
+ * Testable standalone binary detection with injectable argv.
12
+ * Compiled binaries: argv[1] is absent or same as argv[0].
13
+ */
14
+ export function _isStandaloneBinaryWith(argv: string[]): boolean {
15
+ return !argv[1] || argv[1] === argv[0];
16
+ }
17
+
18
+ /** Detect if running as a compiled standalone binary */
19
+ export function isStandaloneBinary(): boolean {
20
+ return _isStandaloneBinaryWith(process.argv);
21
+ }
22
+
23
+ /**
24
+ * Get spawn command for an MCP server package.
25
+ * In npm mode, uses bunx with @latest tag.
26
+ * In standalone mode, uses installed path from ~/.superghost/node_modules/.bin/.
27
+ *
28
+ * @param packageName - Full package name (e.g., "@playwright/mcp")
29
+ * @param standalone - Override standalone detection (for testing)
30
+ */
31
+ export function getMcpCommand(
32
+ packageName: string,
33
+ standalone?: boolean,
34
+ ): { command: string; args: string[] } {
35
+ const isStandalone = standalone ?? isStandaloneBinary();
36
+
37
+ if (isStandalone) {
38
+ const binPath = join(MCP_NODE_MODULES, ".bin");
39
+ // Extract short name from scoped package (e.g., "@playwright/mcp" -> "mcp")
40
+ const shortName = packageName.split("/").pop()!;
41
+ return {
42
+ command: join(binPath, shortName),
43
+ args: [],
44
+ };
45
+ }
46
+
47
+ // npm package: use bunx with @latest tag
48
+ return {
49
+ command: "bunx",
50
+ args: [`${packageName}@latest`],
51
+ };
52
+ }
@@ -0,0 +1,66 @@
1
+ import { join } from "node:path";
2
+ import { createSpinner } from "nanospinner";
3
+ import pc from "picocolors";
4
+ import { SUPERGHOST_HOME, MCP_NODE_MODULES } from "./paths.ts";
5
+
6
+ /** MCP server dependencies that standalone binaries need */
7
+ const MCP_DEPS: Record<string, string> = {
8
+ "@playwright/mcp": "latest",
9
+ "@calibress/curl-mcp": "latest",
10
+ };
11
+
12
+ /**
13
+ * Auto-install MCP server dependencies for standalone binary mode.
14
+ *
15
+ * On first run, installs @playwright/mcp and @calibress/curl-mcp
16
+ * to ~/.superghost/ using `bun install` with BUN_BE_BUN=1.
17
+ * Skips installation when dependencies already exist.
18
+ *
19
+ * Shows spinner + colored status messages matching CLI output style.
20
+ */
21
+ export async function ensureMcpDependencies(): Promise<void> {
22
+ // Check marker: if @playwright/mcp is installed, all deps are present
23
+ const markerPath = join(
24
+ MCP_NODE_MODULES,
25
+ "@playwright",
26
+ "mcp",
27
+ "package.json",
28
+ );
29
+ const exists = await Bun.file(markerPath).exists();
30
+ if (exists) return;
31
+
32
+ // Write package.json for dependency installation
33
+ const packageJsonPath = join(SUPERGHOST_HOME, "package.json");
34
+ await Bun.write(
35
+ packageJsonPath,
36
+ JSON.stringify({ private: true, dependencies: MCP_DEPS }),
37
+ );
38
+
39
+ // Show spinner for user feedback
40
+ const spinner = createSpinner(
41
+ pc.cyan("Installing MCP dependencies..."),
42
+ ).start();
43
+
44
+ // Spawn bun install with BUN_BE_BUN=1 to force Bun runtime
45
+ const proc = Bun.spawn([process.argv[0], "install"], {
46
+ cwd: SUPERGHOST_HOME,
47
+ env: { ...Bun.env, BUN_BE_BUN: "1" },
48
+ stdout: "pipe",
49
+ stderr: "pipe",
50
+ });
51
+
52
+ const exitCode = await proc.exited;
53
+
54
+ if (exitCode !== 0) {
55
+ const stderr = await new Response(proc.stderr).text();
56
+ spinner.error({ text: pc.red("Failed to install MCP dependencies") });
57
+ if (stderr) {
58
+ console.error(pc.dim(stderr));
59
+ }
60
+ process.exit(1);
61
+ }
62
+
63
+ spinner.success({
64
+ text: pc.green("MCP dependencies installed to ~/.superghost/"),
65
+ });
66
+ }
@@ -0,0 +1,34 @@
1
+ import type { Subprocess } from "bun";
2
+
3
+ /**
4
+ * Tracks spawned subprocesses and ensures cleanup on shutdown.
5
+ * Sends SIGTERM first, then force-kills with SIGKILL after 5s timeout.
6
+ */
7
+ export class ProcessManager {
8
+ private processes = new Set<Subprocess>();
9
+
10
+ /** Add a subprocess to the tracking set. Automatically removes it when it exits. */
11
+ track(proc: Subprocess): void {
12
+ this.processes.add(proc);
13
+ proc.exited.then(() => {
14
+ this.processes.delete(proc);
15
+ });
16
+ }
17
+
18
+ /** Kill all tracked processes. SIGTERM first, SIGKILL after 5s timeout. */
19
+ async killAll(): Promise<void> {
20
+ const kills = [...this.processes].map(async (proc) => {
21
+ if (!proc.killed) {
22
+ proc.kill("SIGTERM");
23
+ const timeout = setTimeout(() => {
24
+ if (!proc.killed) {
25
+ proc.kill("SIGKILL");
26
+ }
27
+ }, 5000);
28
+ try { await proc.exited; } finally { clearTimeout(timeout); }
29
+ }
30
+ });
31
+ await Promise.allSettled(kills);
32
+ this.processes.clear();
33
+ }
34
+ }
@@ -0,0 +1,20 @@
1
+ import type { ProcessManager } from "./process-manager.ts";
2
+
3
+ /**
4
+ * Register SIGINT and SIGTERM handlers that clean up all tracked subprocesses.
5
+ * Uses a shuttingDown guard to prevent double-cleanup.
6
+ */
7
+ export function setupSignalHandlers(pm: ProcessManager): void {
8
+ let shuttingDown = false;
9
+
10
+ const handler = async (signal: string) => {
11
+ if (shuttingDown) return;
12
+ shuttingDown = true;
13
+
14
+ await pm.killAll();
15
+ process.exit(signal === "SIGINT" ? 130 : 143);
16
+ };
17
+
18
+ process.on("SIGINT", () => handler("SIGINT"));
19
+ process.on("SIGTERM", () => handler("SIGTERM"));
20
+ }
@@ -0,0 +1,75 @@
1
+ import pc from "picocolors";
2
+ import { createSpinner } from "nanospinner";
3
+ import type { Reporter } from "./types.ts";
4
+ import type { TestResult, RunResult } from "../runner/types.ts";
5
+
6
+ /**
7
+ * Format milliseconds as a human-readable duration string.
8
+ * < 1000ms shows as Xms, >= 1000ms shows as X.Xs
9
+ */
10
+ export function formatDuration(ms: number): string {
11
+ if (ms < 1000) {
12
+ return `${ms}ms`;
13
+ }
14
+ return `${(ms / 1000).toFixed(1)}s`;
15
+ }
16
+
17
+ /**
18
+ * Console reporter with colored output, spinners, and box summary.
19
+ * Colors auto-disable when stdout is not a TTY (via picocolors).
20
+ * Spinner animation auto-disables in non-TTY (via nanospinner).
21
+ */
22
+ export class ConsoleReporter implements Reporter {
23
+ private spinner: ReturnType<typeof createSpinner> | null = null;
24
+
25
+ /** Creates a spinner with the test name and starts it */
26
+ onTestStart(testName: string): void {
27
+ this.spinner = createSpinner(testName).start();
28
+ }
29
+
30
+ /** Stops spinner with success (green check) for passed, error (red X) for failed */
31
+ onTestComplete(result: TestResult): void {
32
+ const { testName, status, source, durationMs, selfHealed } = result;
33
+ const sourceLabel = selfHealed ? "ai, self-healed" : source;
34
+ const duration = pc.dim(`(${sourceLabel}, ${formatDuration(durationMs)})`);
35
+ if (status === "passed") {
36
+ this.spinner?.success({ text: `${testName} ${duration}` });
37
+ } else {
38
+ this.spinner?.error({ text: `${testName} ${duration}` });
39
+ }
40
+ if (selfHealed) {
41
+ console.log(pc.dim(" Cache was stale — re-executed and updated"));
42
+ }
43
+ this.spinner = null;
44
+ }
45
+
46
+ /** Prints bordered box summary and lists failed tests with error messages */
47
+ onRunComplete(data: RunResult): void {
48
+ const bar = "\u2501".repeat(40);
49
+ console.log("");
50
+ console.log(` ${bar}`);
51
+ console.log(" SuperGhost Results");
52
+ console.log(` ${bar}`);
53
+ console.log(` Total: ${data.results.length}`);
54
+ console.log(` Passed: ${pc.green(String(data.passed))}`);
55
+ console.log(
56
+ ` Failed: ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
57
+ );
58
+ console.log(` Cached: ${data.cached}`);
59
+ console.log(` Time: ${pc.dim(formatDuration(data.totalDurationMs))}`);
60
+ console.log(` ${bar}`);
61
+
62
+ if (data.failed > 0) {
63
+ console.log("");
64
+ console.log(pc.red(" Failed tests:"));
65
+ for (const result of data.results) {
66
+ if (result.status === "failed") {
67
+ console.log(` ${pc.red("-")} ${result.testName}`);
68
+ if (result.error) {
69
+ console.log(` ${pc.dim(result.error)}`);
70
+ }
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
@@ -0,0 +1,8 @@
1
+ import type { RunResult, TestResult } from "../runner/types.ts";
2
+
3
+ /** Interface for output reporting */
4
+ export interface Reporter {
5
+ onTestStart(testName: string): void;
6
+ onTestComplete(result: TestResult): void;
7
+ onRunComplete(data: RunResult): void;
8
+ }