npm - superghost - Versions diffs - 0.1.1 → 0.3.0 - Mend

superghost 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/package.json +7 -2
package/src/agent/agent-runner.ts +23 -10
package/src/agent/mcp-manager.ts +7 -14
package/src/agent/model-factory.ts +1 -1
package/src/agent/types.ts +1 -18
package/src/cache/cache-manager.ts +52 -5
package/src/cache/step-recorder.ts +1 -1
package/src/cache/step-replayer.ts +11 -6
package/src/cache/types.ts +1 -1
package/src/cli.ts +282 -103
package/src/config/loader.ts +6 -14
package/src/config/types.ts +3 -2
package/src/infra/preflight.ts +13 -0
package/src/infra/process-manager.ts +6 -2
package/src/infra/signals.ts +1 -1
package/src/output/banner.ts +66 -0
package/src/output/json-formatter.ts +150 -0
package/src/output/reporter.ts +49 -20
package/src/output/tool-name-map.ts +62 -0
package/src/output/types.ts +27 -1
package/src/runner/test-executor.ts +36 -33
package/src/runner/test-runner.ts +7 -15
package/src/runner/types.ts +1 -0

package/src/output/json-formatter.ts ADDED Viewed

@@ -0,0 +1,150 @@
+import { type RunResult } from "../runner/types.ts";
+/** Metadata about the test run environment and configuration */
+export interface JsonOutputMetadata {
+  model: string;
+  provider: string;
+  configFile: string;
+  baseUrl: string | undefined;
+  timestamp: string;
+  filter?: {
+    pattern: string;
+    matched: number;
+    total: number;
+  };
+}
+/** Top-level JSON output structure for all output modes */
+export interface JsonOutput {
+  version: string;
+  success: boolean;
+  exitCode: number;
+  dryRun?: boolean;
+  error?: string;
+  metadata: JsonOutputMetadata;
+  summary: {
+    passed: number;
+    failed: number;
+    cached: number;
+    skipped: number;
+    total?: number;
+    totalDurationMs?: number;
+  };
+  tests: Array<{
+    testName: string;
+    testCase: string;
+    status?: string;
+    source: string;
+    durationMs?: number;
+    selfHealed?: boolean;
+    error?: string;
+  }>;
+}
+/**
+ * Format a completed run result as JSON.
+ * Only includes selfHealed when true, only includes error when present.
+ */
+export function formatJsonOutput(
+  runResult: RunResult,
+  metadata: JsonOutputMetadata,
+  version: string,
+  exitCode: number,
+): string {
+  const output: JsonOutput = {
+    version,
+    success: exitCode === 0,
+    exitCode,
+    metadata,
+    summary: {
+      passed: runResult.passed,
+      failed: runResult.failed,
+      cached: runResult.cached,
+      skipped: runResult.skipped,
+      totalDurationMs: runResult.totalDurationMs,
+    },
+    tests: runResult.results.map((r) => {
+      const entry: Record<string, unknown> = {
+        testName: r.testName,
+        testCase: r.testCase,
+        status: r.status,
+        source: r.source,
+        durationMs: r.durationMs,
+      };
+      if (r.selfHealed === true) {
+        entry.selfHealed = true;
+      }
+      if (r.error !== undefined) {
+        entry.error = r.error;
+      }
+      return entry as JsonOutput["tests"][number];
+    }),
+  };
+  return JSON.stringify(output, null, 2);
+}
+/**
+ * Format a dry-run test listing as JSON.
+ * Produces dryRun: true, exitCode: 0, success: true.
+ */
+export function formatJsonDryRun(
+  tests: Array<{ name: string; case: string; source: "cache" | "ai" }>,
+  metadata: JsonOutputMetadata,
+  version: string,
+): string {
+  const cachedCount = tests.filter((t) => t.source === "cache").length;
+  const output: JsonOutput = {
+    version,
+    success: true,
+    exitCode: 0,
+    dryRun: true,
+    metadata,
+    summary: {
+      passed: 0,
+      failed: 0,
+      cached: cachedCount,
+      skipped: 0,
+      total: tests.length,
+    },
+    tests: tests.map((t) => ({
+      testName: t.name,
+      testCase: t.case,
+      source: t.source,
+    })),
+  };
+  return JSON.stringify(output, null, 2);
+}
+/**
+ * Format an error condition as JSON.
+ * Produces success: false, exitCode: 2, with the error message.
+ */
+export function formatJsonError(errorMessage: string, version: string, metadata: Partial<JsonOutputMetadata>): string {
+  const fullMetadata: JsonOutputMetadata = {
+    model: metadata.model ?? "",
+    provider: metadata.provider ?? "",
+    configFile: metadata.configFile ?? "",
+    baseUrl: metadata.baseUrl,
+    timestamp: metadata.timestamp ?? new Date().toISOString(),
+  };
+  const output: JsonOutput = {
+    version,
+    success: false,
+    exitCode: 2,
+    error: errorMessage,
+    metadata: fullMetadata,
+    summary: {
+      passed: 0,
+      failed: 0,
+      cached: 0,
+      skipped: 0,
+    },
+    tests: [],
+  };
+  return JSON.stringify(output, null, 2);
+}

package/src/output/reporter.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import pc from "picocolors";
 import { createSpinner } from "nanospinner";
-import type { Reporter } from "./types.ts";
-import type { TestResult, RunResult } from "../runner/types.ts";
+import pc from "picocolors";
+import { type RunResult, type TestResult } from "../runner/types.ts";
+import { type Reporter, type StepInfo } from "./types.ts";
 /**
  * Format milliseconds as a human-readable duration string.
@@ -14,16 +15,29 @@ export function formatDuration(ms: number): string {
   return `${(ms / 1000).toFixed(1)}s`;
 }
+/** Write a line of text to stderr */
+export function writeStderr(text: string): void {
+  Bun.write(Bun.stderr, `${text}\n`);
+}
 /**
  * Console reporter with colored output, spinners, and box summary.
+ * All output routes to stderr so stdout is reserved for structured output.
  * Colors auto-disable when stdout is not a TTY (via picocolors).
  * Spinner animation auto-disables in non-TTY (via nanospinner).
  */
 export class ConsoleReporter implements Reporter {
   private spinner: ReturnType<typeof createSpinner> | null = null;
+  private readonly verbose: boolean;
+  private currentTestName: string | null = null;
+  constructor(verbose = false) {
+    this.verbose = verbose;
+  }
   /** Creates a spinner with the test name and starts it */
   onTestStart(testName: string): void {
+    this.currentTestName = testName;
     this.spinner = createSpinner(testName).start();
   }
@@ -38,35 +52,50 @@ export class ConsoleReporter implements Reporter {
       this.spinner?.error({ text: `${testName} ${duration}` });
     }
     if (selfHealed) {
-      console.log(pc.dim("  Cache was stale — re-executed and updated"));
+      writeStderr(pc.dim("  Cache was stale \u2014 re-executed and updated"));
     }
     this.spinner = null;
+    this.currentTestName = null;
+  }
+  /** Handles per-step progress during AI execution */
+  onStepProgress(step: StepInfo): void {
+    if (this.verbose) {
+      writeStderr(pc.dim(`    Step ${step.stepNumber}: ${step.description.full}`));
+    } else if (this.spinner) {
+      let spinnerText = `${this.currentTestName} \u2014 ${step.description.full}`;
+      if (spinnerText.length > 60) {
+        spinnerText = `${spinnerText.slice(0, 57)}...`;
+      }
+      this.spinner.update(spinnerText);
+    }
   }
   /** Prints bordered box summary and lists failed tests with error messages */
   onRunComplete(data: RunResult): void {
     const bar = "\u2501".repeat(40);
-    console.log("");
-    console.log(`  ${bar}`);
-    console.log("    SuperGhost Results");
-    console.log(`  ${bar}`);
-    console.log(`    Total:   ${data.results.length}`);
-    console.log(`    Passed:  ${pc.green(String(data.passed))}`);
-    console.log(
-      `    Failed:  ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
-    );
-    console.log(`    Cached:  ${data.cached}`);
-    console.log(`    Time:    ${pc.dim(formatDuration(data.totalDurationMs))}`);
-    console.log(`  ${bar}`);
+    writeStderr("");
+    writeStderr(`  ${bar}`);
+    writeStderr("    SuperGhost Results");
+    writeStderr(`  ${bar}`);
+    writeStderr(`    Total:   ${data.results.length}`);
+    writeStderr(`    Passed:  ${pc.green(String(data.passed))}`);
+    writeStderr(`    Failed:  ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`);
+    if (data.skipped > 0) {
+      writeStderr(`    Skipped: ${data.skipped}`);
+    }
+    writeStderr(`    Cached:  ${data.cached}`);
+    writeStderr(`    Time:    ${pc.dim(formatDuration(data.totalDurationMs))}`);
+    writeStderr(`  ${bar}`);
     if (data.failed > 0) {
-      console.log("");
-      console.log(pc.red("  Failed tests:"));
+      writeStderr("");
+      writeStderr(pc.red("  Failed tests:"));
       for (const result of data.results) {
         if (result.status === "failed") {
-          console.log(`    ${pc.red("-")} ${result.testName}`);
+          writeStderr(`    ${pc.red("-")} ${result.testName}`);
           if (result.error) {
-            console.log(`      ${pc.dim(result.error)}`);
+            writeStderr(`      ${pc.dim(result.error)}`);
           }
         }
       }

package/src/output/tool-name-map.ts ADDED Viewed

@@ -0,0 +1,62 @@
+import { type StepDescription } from "./types.ts";
+/** Maps raw MCP tool names to human-readable action names */
+const PREFIX_MAP: Record<string, string> = {
+  browser_navigate: "Navigate",
+  browser_click: "Click",
+  browser_type: "Type",
+  browser_screenshot: "Screenshot",
+  browser_wait_for_text: "Wait for text",
+  browser_hover: "Hover",
+  browser_select_option: "Select",
+  browser_go_back: "Go back",
+  browser_go_forward: "Go forward",
+  browser_press_key: "Press key",
+  browser_drag: "Drag",
+  browser_resize: "Resize",
+  browser_handle_dialog: "Handle dialog",
+  browser_file_upload: "Upload file",
+  browser_pdf_save: "Save PDF",
+  browser_close: "Close",
+  browser_console_messages: "Console messages",
+  browser_install: "Install browser",
+  browser_tab_list: "List tabs",
+  browser_tab_new: "New tab",
+  browser_tab_select: "Select tab",
+  browser_tab_close: "Close tab",
+  browser_network_requests: "Network requests",
+  browser_snapshot: "Snapshot",
+};
+/** Maps tool names to the input field used as the key argument */
+const KEY_ARG_MAP: Record<string, string> = {
+  browser_navigate: "url",
+  browser_click: "element",
+  browser_type: "element",
+  browser_hover: "element",
+  browser_select_option: "element",
+  browser_press_key: "key",
+  browser_wait_for_text: "text",
+};
+/**
+ * Convert a raw tool call into a human-readable description.
+ *
+ * Known tools (browser_navigate, browser_click, etc.) map to friendly names.
+ * Unknown tools fall back to: strip underscores, capitalize first letter.
+ * Key arguments are extracted based on tool type (e.g., "url" for navigate).
+ */
+export function describeToolCall(toolName: string, input: Record<string, unknown>): StepDescription {
+  // Look up human name, or derive from raw name as fallback
+  const action = PREFIX_MAP[toolName] ?? toolName.replace(/_/g, " ").replace(/^\w/, (c) => c.toUpperCase());
+  // Look up which input field is the key argument for this tool
+  const keyArgField = KEY_ARG_MAP[toolName];
+  const rawKeyArg = keyArgField ? input[keyArgField] : undefined;
+  const keyArg =
+    rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== "" ? String(rawKeyArg) : undefined;
+  const full = keyArg ? `${action} \u2192 ${keyArg}` : action;
+  return { action, keyArg, full };
+}

package/src/output/types.ts CHANGED Viewed

@@ -1,8 +1,34 @@
-import type { RunResult, TestResult } from "../runner/types.ts";
+import { type RunResult, type TestResult } from "../runner/types.ts";
+/** Describes a tool call in human-readable form */
+export interface StepDescription {
+  /** Human-readable action name, e.g. "Navigate", "Click" */
+  action: string;
+  /** Key argument value, e.g. "/login", "button.submit" */
+  keyArg?: string;
+  /** Full description string, e.g. "Navigate \u2192 /login" */
+  full: string;
+}
+/** Information about a single step (tool call) during AI execution */
+export interface StepInfo {
+  /** 1-based step counter for the current test */
+  stepNumber: number;
+  /** Raw tool name, e.g. "browser_navigate" */
+  toolName: string;
+  /** Tool call input arguments */
+  input: Record<string, unknown>;
+  /** Human-readable description of the tool call */
+  description: StepDescription;
+}
+/** Callback invoked for each tool call during AI execution */
+export type OnStepProgress = (step: StepInfo) => void;
 /** Interface for output reporting */
 export interface Reporter {
   onTestStart(testName: string): void;
   onTestComplete(result: TestResult): void;
   onRunComplete(data: RunResult): void;
+  onStepProgress?(step: StepInfo): void;
 }

package/src/runner/test-executor.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import type { CacheManager } from "../cache/cache-manager.ts";
-import type { StepReplayer } from "../cache/step-replayer.ts";
-import type { AgentExecutionResult } from "../agent/types.ts";
-import type { Config } from "../config/types.ts";
-import type { TestResult } from "./types.ts";
+import { type AgentExecutionResult } from "../agent/types.ts";
+import { type CacheManager } from "../cache/cache-manager.ts";
+import { type StepReplayer } from "../cache/step-replayer.ts";
+import { type Config } from "../config/types.ts";
+import { type OnStepProgress } from "../output/types.ts";
+import { type TestResult } from "./types.ts";
 /** Function signature for executing a test via the AI agent */
 type ExecuteAgentFn = (config: {
@@ -13,6 +14,7 @@ type ExecuteAgentFn = (config: {
   recursionLimit: number;
   globalContext?: string;
   testContext?: string;
+  onStepProgress?: OnStepProgress;
 }) => Promise<AgentExecutionResult>;
 /**
@@ -27,11 +29,12 @@ export class TestExecutor {
   private readonly executeAgentFn: ExecuteAgentFn;
   private readonly model: any;
   private readonly tools: Record<string, any>;
-  private readonly config: Pick<
-    Config,
-    "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
-  > & { context?: string };
+  private readonly config: Pick<Config, "maxAttempts" | "recursionLimit" | "model" | "modelProvider"> & {
+    context?: string;
+  };
   private readonly globalContext?: string;
+  private readonly noCache: boolean;
+  private readonly onStepProgress?: OnStepProgress;
   constructor(opts: {
     cacheManager: CacheManager;
@@ -39,11 +42,10 @@ export class TestExecutor {
     executeAgentFn: ExecuteAgentFn;
     model?: any;
     tools?: Record<string, any>;
-    config: Pick<
-      Config,
-      "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
-    > & { context?: string };
+    config: Pick<Config, "maxAttempts" | "recursionLimit" | "model" | "modelProvider"> & { context?: string };
     globalContext?: string;
+    noCache?: boolean;
+    onStepProgress?: OnStepProgress;
   }) {
     this.cacheManager = opts.cacheManager;
     this.replayer = opts.replayer;
@@ -52,34 +54,34 @@ export class TestExecutor {
     this.tools = opts.tools ?? {};
     this.config = opts.config;
     this.globalContext = opts.globalContext;
+    this.noCache = opts.noCache ?? false;
+    this.onStepProgress = opts.onStepProgress;
   }
   /** Execute a single test case with cache-first strategy */
-  async execute(
-    testCase: string,
-    baseUrl: string,
-    testContext?: string,
-  ): Promise<TestResult> {
+  async execute(testCase: string, baseUrl: string, testContext?: string): Promise<TestResult> {
     const start = Date.now();
-    // Phase 1: Try cache replay
-    const cached = await this.cacheManager.load(testCase, baseUrl);
-    if (cached) {
-      const replay = await this.replayer.replay(cached.steps);
-      if (replay.success) {
-        return {
-          testName: testCase,
-          testCase,
-          status: "passed",
-          source: "cache",
-          durationMs: Date.now() - start,
-        };
+    // Phase 1: Try cache replay (unless noCache)
+    if (!this.noCache) {
+      const cached = await this.cacheManager.load(testCase, baseUrl);
+      if (cached) {
+        const replay = await this.replayer.replay(cached.steps, this.onStepProgress);
+        if (replay.success) {
+          return {
+            testName: testCase,
+            testCase,
+            status: "passed",
+            source: "cache",
+            durationMs: Date.now() - start,
+          };
+        }
+        // Cache stale — fall through to AI with self-heal flag
+        return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
       }
-      // Cache stale — fall through to AI with self-heal flag
-      return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
     }
-    // Phase 2: No cache — go directly to AI
+    // Phase 2: No cache or noCache — go directly to AI
     return this.executeWithAgent(testCase, baseUrl, start, false, testContext);
   }
@@ -102,6 +104,7 @@ export class TestExecutor {
         recursionLimit: this.config.recursionLimit,
         globalContext: this.globalContext,
         testContext,
+        onStepProgress: this.onStepProgress,
       });
       if (result.passed) {

package/src/runner/test-runner.ts CHANGED Viewed

@@ -1,13 +1,9 @@
-import type { Config } from "../config/types.ts";
-import type { Reporter } from "../output/types.ts";
-import type { TestResult, RunResult } from "./types.ts";
+import { type Config } from "../config/types.ts";
+import { type Reporter } from "../output/types.ts";
+import { type RunResult, type TestResult } from "./types.ts";
 /** Function signature for executing a single test case */
-export type ExecuteFn = (
-  testCase: string,
-  baseUrl: string,
-  testContext?: string,
-) => Promise<TestResult>;
+export type ExecuteFn = (testCase: string, baseUrl: string, testContext?: string) => Promise<TestResult>;
 /**
  * Orchestrates sequential execution of all test cases.
@@ -50,17 +46,13 @@ export class TestRunner {
 }
 /** Aggregate individual test results into a run summary */
-function aggregateResults(
-  results: TestResult[],
-  totalDurationMs: number,
-): RunResult {
+function aggregateResults(results: TestResult[], totalDurationMs: number): RunResult {
   return {
     results,
     totalDurationMs,
     passed: results.filter((r) => r.status === "passed").length,
     failed: results.filter((r) => r.status === "failed").length,
-    cached: results.filter(
-      (r) => r.source === "cache" && r.status === "passed",
-    ).length,
+    cached: results.filter((r) => r.source === "cache" && r.status === "passed").length,
+    skipped: 0,
   };
 }

package/src/runner/types.ts CHANGED Viewed

@@ -23,4 +23,5 @@ export interface RunResult {
   passed: number;
   failed: number;
   cached: number;
+  skipped: number;
 }