npm - superghost - Versions diffs - 0.1.0 - Mend

superghost 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/LICENSE +21 -0
package/README.md +172 -0
package/package.json +77 -0
package/src/agent/agent-runner.ts +69 -0
package/src/agent/mcp-manager.ts +78 -0
package/src/agent/model-factory.ts +71 -0
package/src/agent/prompt.ts +47 -0
package/src/agent/types.ts +28 -0
package/src/cache/cache-manager.ts +105 -0
package/src/cache/step-recorder.ts +50 -0
package/src/cache/step-replayer.ts +51 -0
package/src/cache/types.ts +27 -0
package/src/cli.ts +128 -0
package/src/config/loader.ts +76 -0
package/src/config/schema.ts +26 -0
package/src/config/types.ts +8 -0
package/src/dist/paths.ts +52 -0
package/src/dist/setup.ts +66 -0
package/src/infra/process-manager.ts +34 -0
package/src/infra/signals.ts +20 -0
package/src/output/reporter.ts +75 -0
package/src/output/types.ts +8 -0
package/src/runner/test-executor.ts +145 -0
package/src/runner/test-runner.ts +66 -0
package/src/runner/types.ts +26 -0

package/src/cache/step-recorder.ts ADDED Viewed

@@ -0,0 +1,50 @@
+import type { CachedStep } from "./types.ts";
+/**
+ * Records MCP tool calls as CachedStep entries.
+ * Used during AI agent execution to capture the sequence of actions
+ * that led to a successful test, enabling later cache replay.
+ *
+ * Only records successful tool executions -- failed calls are not cached.
+ */
+export class StepRecorder {
+  private steps: CachedStep[] = [];
+  /** Record a tool invocation manually */
+  record(toolName: string, toolInput: Record<string, unknown>): void {
+    this.steps.push({ toolName, toolInput });
+  }
+  /** Get a copy of all recorded steps */
+  getSteps(): CachedStep[] {
+    return [...this.steps];
+  }
+  /** Clear all recorded steps */
+  clear(): void {
+    this.steps = [];
+  }
+  /**
+   * Wrap a tools object to automatically record successful calls.
+   * Each tool's execute function is replaced with a version that:
+   * 1. Calls the original execute
+   * 2. Records the step on success
+   * 3. Re-throws on failure (does NOT record failed steps)
+   */
+  wrapTools(tools: Record<string, any>): Record<string, any> {
+    return Object.fromEntries(
+      Object.entries(tools).map(([name, tool]) => [
+        name,
+        {
+          ...tool,
+          execute: async (input: Record<string, unknown>) => {
+            const result = await tool.execute(input);
+            this.record(name, input);
+            return result;
+          },
+        },
+      ]),
+    );
+  }
+}

package/src/cache/step-replayer.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import type { CachedStep } from "./types.ts";
+/** Function signature for executing a tool by name with given input */
+export type ToolExecutor = (
+  toolName: string,
+  toolInput: Record<string, unknown>,
+) => Promise<string>;
+/** Result of replaying cached steps */
+export interface ReplayResult {
+  success: boolean;
+  failedStep?: number;
+  error?: string;
+}
+/**
+ * Replays a sequence of cached MCP tool steps.
+ * Executes each step in order using the provided executor.
+ * Stops on the first failure, returning the failed step index and error.
+ */
+export class StepReplayer {
+  private readonly executor: ToolExecutor;
+  constructor(executor: ToolExecutor) {
+    this.executor = executor;
+  }
+  /**
+   * Replay all cached steps in sequence.
+   * @param steps - The cached steps to replay
+   * @returns Result indicating success or failure with details
+   */
+  async replay(steps: CachedStep[]): Promise<ReplayResult> {
+    for (let i = 0; i < steps.length; i++) {
+      const step = steps[i];
+      if (!step) continue;
+      try {
+        await this.executor(step.toolName, step.toolInput);
+      } catch (error) {
+        return {
+          success: false,
+          failedStep: i,
+          error: error instanceof Error ? error.message : String(error),
+        };
+      }
+    }
+    return { success: true };
+  }
+}

package/src/cache/types.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/** A single recorded tool call step for caching */
+export interface CachedStep {
+  toolName: string;
+  toolInput: Record<string, unknown>;
+}
+/** A complete cache entry with diagnostic metadata */
+export interface CacheEntry {
+  version: 1;
+  testCase: string;
+  baseUrl: string;
+  steps: CachedStep[];
+  /** Model used for AI execution */
+  model: string;
+  /** Provider name (anthropic, openai, google, openrouter) */
+  provider: string;
+  /** Number of steps recorded */
+  stepCount: number;
+  /** AI verdict message */
+  aiMessage: string;
+  /** Duration of AI execution in milliseconds */
+  durationMs: number;
+  /** ISO timestamp when cache was first created */
+  createdAt: string;
+  /** ISO timestamp when cache was last updated */
+  updatedAt: string;
+}

package/src/cli.ts ADDED Viewed

@@ -0,0 +1,128 @@
+#!/usr/bin/env bun
+import { Command } from "commander";
+import pc from "picocolors";
+import { loadConfig, ConfigLoadError } from "./config/loader.ts";
+import { TestRunner } from "./runner/test-runner.ts";
+import type { ExecuteFn } from "./runner/test-runner.ts";
+import { ConsoleReporter } from "./output/reporter.ts";
+import { ProcessManager } from "./infra/process-manager.ts";
+import { setupSignalHandlers } from "./infra/signals.ts";
+import { McpManager } from "./agent/mcp-manager.ts";
+import { CacheManager } from "./cache/cache-manager.ts";
+import { StepReplayer } from "./cache/step-replayer.ts";
+import type { ToolExecutor } from "./cache/step-replayer.ts";
+import { TestExecutor } from "./runner/test-executor.ts";
+import {
+  inferProvider,
+  validateApiKey,
+  createModel,
+} from "./agent/model-factory.ts";
+import type { ProviderName } from "./agent/model-factory.ts";
+import { executeAgent } from "./agent/agent-runner.ts";
+import { isStandaloneBinary } from "./dist/paths.ts";
+import { ensureMcpDependencies } from "./dist/setup.ts";
+import pkg from "../package.json";
+const program = new Command();
+program
+  .name("superghost")
+  .description("AI-powered end-to-end browser and API testing")
+  .version(pkg.version)
+  .requiredOption("-c, --config <path>", "Path to YAML config file")
+  .option("--headed", "Run browser in headed mode (visible browser window)")
+  .action(async (options: { config: string; headed?: boolean }) => {
+    const pm = new ProcessManager();
+    setupSignalHandlers(pm);
+    // Auto-install MCP dependencies for standalone binary on first run
+    if (isStandaloneBinary()) {
+      await ensureMcpDependencies();
+    }
+    let mcpManager: McpManager | null = null;
+    try {
+      const config = await loadConfig(options.config);
+      if (options.headed) {
+        config.headless = false;
+      }
+      const reporter = new ConsoleReporter();
+      // Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
+      const provider =
+        config.modelProvider === "anthropic"
+          ? inferProvider(config.model)
+          : (config.modelProvider as ProviderName);
+      // Validate API key at startup before any tests run
+      validateApiKey(provider);
+      // Create AI model
+      const model = createModel(config.model, provider);
+      // Initialize MCP servers (shared across test suite, not per-test)
+      mcpManager = new McpManager({
+        browser: config.browser,
+        headless: config.headless,
+      });
+      await mcpManager.initialize();
+      const tools = await mcpManager.getTools();
+      // Create cache subsystem
+      const cacheManager = new CacheManager(config.cacheDir);
+      const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
+        const tool = tools[toolName];
+        if (!tool) throw new Error(`Tool not found: ${toolName}`);
+        return await tool.execute(toolInput);
+      };
+      const replayer = new StepReplayer(toolExecutor);
+      // Create TestExecutor with cache-first strategy
+      const executor = new TestExecutor({
+        cacheManager,
+        replayer,
+        executeAgentFn: executeAgent,
+        model,
+        tools,
+        config,
+        globalContext: config.context,
+      });
+      // Wire execute function for TestRunner
+      const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
+        executor.execute(testCase, baseUrl, testContext);
+      console.log(
+        `\n${pc.bold("superghost")} v${pkg.version} / Running ${config.tests.length} test(s)...\n`,
+      );
+      const runner = new TestRunner(config, reporter, executeFn);
+      const result = await runner.run();
+      await mcpManager.close();
+      await pm.killAll();
+      const code = result.failed > 0 ? 1 : 0;
+      setTimeout(() => process.exit(code), 100);
+    } catch (error) {
+      if (mcpManager) {
+        await mcpManager.close().catch(() => {});
+      }
+      await pm.killAll();
+      if (error instanceof ConfigLoadError) {
+        Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
+        setTimeout(() => process.exit(1), 100);
+        return;
+      }
+      if (error instanceof Error && error.message.startsWith("Missing API key")) {
+        Bun.write(Bun.stderr, `${pc.red("Error:")} ${error.message}\n`);
+        setTimeout(() => process.exit(1), 100);
+        return;
+      }
+      throw error;
+    }
+  });
+await program.parseAsync();

package/src/config/loader.ts ADDED Viewed

@@ -0,0 +1,76 @@
+import { YAML } from "bun";
+import { ConfigSchema } from "./schema.ts";
+import type { Config } from "./types.ts";
+/** Error thrown when config loading or validation fails */
+export class ConfigLoadError extends Error {
+  constructor(message: string, cause?: unknown) {
+    super(message);
+    this.name = "ConfigLoadError";
+    if (cause) this.cause = cause;
+  }
+}
+/**
+ * Load and validate a YAML config file.
+ *
+ * Three-layer error handling:
+ * 1. File existence check (actionable hint)
+ * 2. YAML parsing (syntax error with Bun's built-in parser)
+ * 3. Zod validation (all issues numbered with field paths)
+ *
+ * @param filePath - Absolute or relative path to the YAML config
+ * @returns Validated Config object with defaults applied
+ * @throws ConfigLoadError if file is missing, malformed, or fails validation
+ */
+export async function loadConfig(filePath: string): Promise<Config> {
+  // Layer 1: Read file (produces actionable error if missing)
+  const file = Bun.file(filePath);
+  let content: string;
+  try {
+    content = await file.text();
+  } catch (error) {
+    const code = (error as NodeJS.ErrnoException).code;
+    if (code === "ENOENT") {
+      throw new ConfigLoadError(
+        `Config file not found: ${filePath}\n` +
+          `  Create a config file or specify a different path:\n` +
+          `    superghost --config <path>`,
+      );
+    }
+    throw new ConfigLoadError(
+      `Cannot read config file: ${filePath}\n` +
+        `  ${error instanceof Error ? error.message : String(error)}`,
+      error,
+    );
+  }
+  // Layer 2: YAML parsing
+  let raw: unknown;
+  try {
+    raw = YAML.parse(content);
+  } catch (error) {
+    throw new ConfigLoadError(
+      `Invalid YAML syntax: ${error instanceof Error ? error.message : String(error)}`,
+      error,
+    );
+  }
+  // Layer 3: Zod validation
+  // IMPORTANT: Check result.success boolean, NOT instanceof Error (Zod v4 pitfall)
+  const result = ConfigSchema.safeParse(raw);
+  if (!result.success) {
+    const issues = result.error.issues
+      .map(
+        (issue, i) =>
+          `  ${i + 1}. ${issue.path.join(".")}: ${issue.message}`,
+      )
+      .join("\n");
+    const count = result.error.issues.length;
+    throw new ConfigLoadError(
+      `Invalid config (${count} issue${count > 1 ? "s" : ""})\n${issues}`,
+    );
+  }
+  return result.data;
+}

package/src/config/schema.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import { z } from "zod";
+/** Schema for a single test case in the configuration */
+export const TestCaseSchema = z.object({
+  name: z.string().min(1, "Test name cannot be empty"),
+  case: z.string().min(1, "Test case description cannot be empty"),
+  baseUrl: z.string().url().optional(),
+  timeout: z.number().positive().optional(),
+  type: z.enum(["browser", "api"]).default("browser"),
+  context: z.string().optional(),
+});
+/** Schema for the full SuperGhost configuration file */
+export const ConfigSchema = z.object({
+  baseUrl: z.string().url().optional(),
+  browser: z.enum(["chromium", "firefox", "webkit"]).default("chromium"),
+  headless: z.boolean().default(true),
+  timeout: z.number().positive().default(60_000),
+  maxAttempts: z.number().int().positive().max(10).default(3),
+  model: z.string().default("claude-sonnet-4-6"),
+  modelProvider: z.string().default("anthropic"),
+  cacheDir: z.string().default(".superghost-cache"),
+  recursionLimit: z.number().int().positive().default(500),
+  context: z.string().optional(),
+  tests: z.array(TestCaseSchema).min(1, "At least one test case is required"),
+});

package/src/config/types.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { z } from "zod";
+import type { ConfigSchema, TestCaseSchema } from "./schema.ts";
+/** A single test case parsed from the config YAML */
+export type TestCase = z.infer<typeof TestCaseSchema>;
+/** The full configuration object after validation */
+export type Config = z.infer<typeof ConfigSchema>;

package/src/dist/paths.ts ADDED Viewed

@@ -0,0 +1,52 @@
+import { join } from "node:path";
+import { homedir } from "node:os";
+/** Home directory for standalone binary dependencies */
+export const SUPERGHOST_HOME = join(homedir(), ".superghost");
+/** Node modules path within the superghost home directory */
+export const MCP_NODE_MODULES = join(SUPERGHOST_HOME, "node_modules");
+/**
+ * Testable standalone binary detection with injectable argv.
+ * Compiled binaries: argv[1] is absent or same as argv[0].
+ */
+export function _isStandaloneBinaryWith(argv: string[]): boolean {
+  return !argv[1] || argv[1] === argv[0];
+}
+/** Detect if running as a compiled standalone binary */
+export function isStandaloneBinary(): boolean {
+  return _isStandaloneBinaryWith(process.argv);
+}
+/**
+ * Get spawn command for an MCP server package.
+ * In npm mode, uses bunx with @latest tag.
+ * In standalone mode, uses installed path from ~/.superghost/node_modules/.bin/.
+ *
+ * @param packageName - Full package name (e.g., "@playwright/mcp")
+ * @param standalone - Override standalone detection (for testing)
+ */
+export function getMcpCommand(
+  packageName: string,
+  standalone?: boolean,
+): { command: string; args: string[] } {
+  const isStandalone = standalone ?? isStandaloneBinary();
+  if (isStandalone) {
+    const binPath = join(MCP_NODE_MODULES, ".bin");
+    // Extract short name from scoped package (e.g., "@playwright/mcp" -> "mcp")
+    const shortName = packageName.split("/").pop()!;
+    return {
+      command: join(binPath, shortName),
+      args: [],
+    };
+  }
+  // npm package: use bunx with @latest tag
+  return {
+    command: "bunx",
+    args: [`${packageName}@latest`],
+  };
+}

package/src/dist/setup.ts ADDED Viewed

@@ -0,0 +1,66 @@
+import { join } from "node:path";
+import { createSpinner } from "nanospinner";
+import pc from "picocolors";
+import { SUPERGHOST_HOME, MCP_NODE_MODULES } from "./paths.ts";
+/** MCP server dependencies that standalone binaries need */
+const MCP_DEPS: Record<string, string> = {
+  "@playwright/mcp": "latest",
+  "@calibress/curl-mcp": "latest",
+};
+/**
+ * Auto-install MCP server dependencies for standalone binary mode.
+ *
+ * On first run, installs @playwright/mcp and @calibress/curl-mcp
+ * to ~/.superghost/ using `bun install` with BUN_BE_BUN=1.
+ * Skips installation when dependencies already exist.
+ *
+ * Shows spinner + colored status messages matching CLI output style.
+ */
+export async function ensureMcpDependencies(): Promise<void> {
+  // Check marker: if @playwright/mcp is installed, all deps are present
+  const markerPath = join(
+    MCP_NODE_MODULES,
+    "@playwright",
+    "mcp",
+    "package.json",
+  );
+  const exists = await Bun.file(markerPath).exists();
+  if (exists) return;
+  // Write package.json for dependency installation
+  const packageJsonPath = join(SUPERGHOST_HOME, "package.json");
+  await Bun.write(
+    packageJsonPath,
+    JSON.stringify({ private: true, dependencies: MCP_DEPS }),
+  );
+  // Show spinner for user feedback
+  const spinner = createSpinner(
+    pc.cyan("Installing MCP dependencies..."),
+  ).start();
+  // Spawn bun install with BUN_BE_BUN=1 to force Bun runtime
+  const proc = Bun.spawn([process.argv[0], "install"], {
+    cwd: SUPERGHOST_HOME,
+    env: { ...Bun.env, BUN_BE_BUN: "1" },
+    stdout: "pipe",
+    stderr: "pipe",
+  });
+  const exitCode = await proc.exited;
+  if (exitCode !== 0) {
+    const stderr = await new Response(proc.stderr).text();
+    spinner.error({ text: pc.red("Failed to install MCP dependencies") });
+    if (stderr) {
+      console.error(pc.dim(stderr));
+    }
+    process.exit(1);
+  }
+  spinner.success({
+    text: pc.green("MCP dependencies installed to ~/.superghost/"),
+  });
+}

package/src/infra/process-manager.ts ADDED Viewed

@@ -0,0 +1,34 @@
+import type { Subprocess } from "bun";
+/**
+ * Tracks spawned subprocesses and ensures cleanup on shutdown.
+ * Sends SIGTERM first, then force-kills with SIGKILL after 5s timeout.
+ */
+export class ProcessManager {
+  private processes = new Set<Subprocess>();
+  /** Add a subprocess to the tracking set. Automatically removes it when it exits. */
+  track(proc: Subprocess): void {
+    this.processes.add(proc);
+    proc.exited.then(() => {
+      this.processes.delete(proc);
+    });
+  }
+  /** Kill all tracked processes. SIGTERM first, SIGKILL after 5s timeout. */
+  async killAll(): Promise<void> {
+    const kills = [...this.processes].map(async (proc) => {
+      if (!proc.killed) {
+        proc.kill("SIGTERM");
+        const timeout = setTimeout(() => {
+          if (!proc.killed) {
+            proc.kill("SIGKILL");
+          }
+        }, 5000);
+        try { await proc.exited; } finally { clearTimeout(timeout); }
+      }
+    });
+    await Promise.allSettled(kills);
+    this.processes.clear();
+  }
+}

package/src/infra/signals.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { ProcessManager } from "./process-manager.ts";
+/**
+ * Register SIGINT and SIGTERM handlers that clean up all tracked subprocesses.
+ * Uses a shuttingDown guard to prevent double-cleanup.
+ */
+export function setupSignalHandlers(pm: ProcessManager): void {
+  let shuttingDown = false;
+  const handler = async (signal: string) => {
+    if (shuttingDown) return;
+    shuttingDown = true;
+    await pm.killAll();
+    process.exit(signal === "SIGINT" ? 130 : 143);
+  };
+  process.on("SIGINT", () => handler("SIGINT"));
+  process.on("SIGTERM", () => handler("SIGTERM"));
+}

package/src/output/reporter.ts ADDED Viewed

@@ -0,0 +1,75 @@
+import pc from "picocolors";
+import { createSpinner } from "nanospinner";
+import type { Reporter } from "./types.ts";
+import type { TestResult, RunResult } from "../runner/types.ts";
+/**
+ * Format milliseconds as a human-readable duration string.
+ * < 1000ms shows as Xms, >= 1000ms shows as X.Xs
+ */
+export function formatDuration(ms: number): string {
+  if (ms < 1000) {
+    return `${ms}ms`;
+  }
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+/**
+ * Console reporter with colored output, spinners, and box summary.
+ * Colors auto-disable when stdout is not a TTY (via picocolors).
+ * Spinner animation auto-disables in non-TTY (via nanospinner).
+ */
+export class ConsoleReporter implements Reporter {
+  private spinner: ReturnType<typeof createSpinner> | null = null;
+  /** Creates a spinner with the test name and starts it */
+  onTestStart(testName: string): void {
+    this.spinner = createSpinner(testName).start();
+  }
+  /** Stops spinner with success (green check) for passed, error (red X) for failed */
+  onTestComplete(result: TestResult): void {
+    const { testName, status, source, durationMs, selfHealed } = result;
+    const sourceLabel = selfHealed ? "ai, self-healed" : source;
+    const duration = pc.dim(`(${sourceLabel}, ${formatDuration(durationMs)})`);
+    if (status === "passed") {
+      this.spinner?.success({ text: `${testName} ${duration}` });
+    } else {
+      this.spinner?.error({ text: `${testName} ${duration}` });
+    }
+    if (selfHealed) {
+      console.log(pc.dim("  Cache was stale — re-executed and updated"));
+    }
+    this.spinner = null;
+  }
+  /** Prints bordered box summary and lists failed tests with error messages */
+  onRunComplete(data: RunResult): void {
+    const bar = "\u2501".repeat(40);
+    console.log("");
+    console.log(`  ${bar}`);
+    console.log("    SuperGhost Results");
+    console.log(`  ${bar}`);
+    console.log(`    Total:   ${data.results.length}`);
+    console.log(`    Passed:  ${pc.green(String(data.passed))}`);
+    console.log(
+      `    Failed:  ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
+    );
+    console.log(`    Cached:  ${data.cached}`);
+    console.log(`    Time:    ${pc.dim(formatDuration(data.totalDurationMs))}`);
+    console.log(`  ${bar}`);
+    if (data.failed > 0) {
+      console.log("");
+      console.log(pc.red("  Failed tests:"));
+      for (const result of data.results) {
+        if (result.status === "failed") {
+          console.log(`    ${pc.red("-")} ${result.testName}`);
+          if (result.error) {
+            console.log(`      ${pc.dim(result.error)}`);
+          }
+        }
+      }
+    }
+  }
+}

package/src/output/types.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { RunResult, TestResult } from "../runner/types.ts";
+/** Interface for output reporting */
+export interface Reporter {
+  onTestStart(testName: string): void;
+  onTestComplete(result: TestResult): void;
+  onRunComplete(data: RunResult): void;
+}