superghost 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superghost",
3
- "version": "0.1.1",
3
+ "version": "0.3.0",
4
4
  "description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
5
5
  "type": "module",
6
6
  "bin": {
@@ -14,8 +14,10 @@
14
14
  "scripts": {
15
15
  "test": "bun test",
16
16
  "typecheck": "bunx tsc --noEmit",
17
+ "lint": "bunx biome check .",
18
+ "lint:fix": "bunx biome check --write .",
17
19
  "build:binary": "bun run scripts/build-binaries.ts",
18
- "prepublishOnly": "bun test && bunx tsc --noEmit",
20
+ "prepublishOnly": "bun run lint && bun test && bunx tsc --noEmit",
19
21
  "e2e": "bun run e2e/run-e2e.ts",
20
22
  "e2e:smoke": "bun run e2e/run-e2e.ts smoke",
21
23
  "e2e:browser": "bun run e2e/run-e2e.ts browser",
@@ -60,13 +62,16 @@
60
62
  "@ai-sdk/openai": "^3.0.41",
61
63
  "@modelcontextprotocol/sdk": "^1.27.1",
62
64
  "@openrouter/ai-sdk-provider": "^2.2.5",
65
+ "@types/picomatch": "^4.0.2",
63
66
  "ai": "^6.0.116",
64
67
  "commander": "^14.0.3",
65
68
  "nanospinner": "^1.2.2",
66
69
  "picocolors": "^1.1.1",
70
+ "picomatch": "^4.0.3",
67
71
  "zod": "^4.3.6"
68
72
  },
69
73
  "devDependencies": {
74
+ "@biomejs/biome": "2.4.6",
70
75
  "@types/bun": "^1.3.10",
71
76
  "@types/react": "^19.0.0",
72
77
  "@types/react-dom": "^19.0.0",
@@ -1,8 +1,11 @@
1
1
  import { generateText, Output, stepCountIs } from "ai";
2
2
  import { z } from "zod";
3
+
3
4
  import { StepRecorder } from "../cache/step-recorder.ts";
4
- import type { AgentExecutionResult } from "./types.ts";
5
+ import { describeToolCall } from "../output/tool-name-map.ts";
6
+ import { type OnStepProgress } from "../output/types.ts";
5
7
  import { buildSystemPrompt } from "./prompt.ts";
8
+ import { type AgentExecutionResult } from "./types.ts";
6
9
 
7
10
  /**
8
11
  * Schema for structured agent output.
@@ -10,9 +13,7 @@ import { buildSystemPrompt } from "./prompt.ts";
10
13
  */
11
14
  const TestResultSchema = z.object({
12
15
  passed: z.boolean().describe("Whether the test case passed"),
13
- message: z
14
- .string()
15
- .describe("Brief diagnostic: what happened and what the page showed"),
16
+ message: z.string().describe("Brief diagnostic: what happened and what the page showed"),
16
17
  });
17
18
 
18
19
  /**
@@ -33,16 +34,14 @@ export async function executeAgent(config: {
33
34
  recursionLimit: number;
34
35
  globalContext?: string;
35
36
  testContext?: string;
37
+ onStepProgress?: OnStepProgress;
36
38
  }): Promise<AgentExecutionResult> {
37
39
  const recorder = new StepRecorder();
38
40
  const wrappedTools = recorder.wrapTools(config.tools);
39
41
 
40
- const systemPrompt = buildSystemPrompt(
41
- config.testCase,
42
- config.baseUrl,
43
- config.globalContext,
44
- config.testContext,
45
- );
42
+ const systemPrompt = buildSystemPrompt(config.testCase, config.baseUrl, config.globalContext, config.testContext);
43
+
44
+ let stepCounter = 0;
46
45
 
47
46
  const { output } = await generateText({
48
47
  model: config.model,
@@ -51,6 +50,20 @@ export async function executeAgent(config: {
51
50
  prompt: `Execute the test case: "${config.testCase}"`,
52
51
  stopWhen: stepCountIs(config.recursionLimit),
53
52
  output: Output.object({ schema: TestResultSchema }),
53
+ experimental_onToolCallFinish: config.onStepProgress
54
+ ? (event: any) => {
55
+ if (event.success) {
56
+ stepCounter++;
57
+ const input = (event.toolCall.input ?? {}) as Record<string, unknown>;
58
+ config.onStepProgress?.({
59
+ stepNumber: stepCounter,
60
+ toolName: event.toolCall.toolName,
61
+ input,
62
+ description: describeToolCall(event.toolCall.toolName, input),
63
+ });
64
+ }
65
+ }
66
+ : undefined,
54
67
  });
55
68
 
56
69
  if (output === null) {
@@ -1,6 +1,7 @@
1
1
  import { createMCPClient } from "@ai-sdk/mcp";
2
2
  import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
3
- import type { Config } from "../config/types.ts";
3
+
4
+ import { type Config } from "../config/types.ts";
4
5
  import { getMcpCommand } from "../dist/paths.ts";
5
6
 
6
7
  /**
@@ -12,8 +13,7 @@ import { getMcpCommand } from "../dist/paths.ts";
12
13
  * regardless of test type.
13
14
  */
14
15
  export class McpManager {
15
- private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null =
16
- null;
16
+ private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
17
17
  private curlClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
18
18
 
19
19
  constructor(private readonly config: Pick<Config, "browser" | "headless">) {}
@@ -27,11 +27,7 @@ export class McpManager {
27
27
  const playwrightCmd = getMcpCommand("@playwright/mcp");
28
28
  const curlCmd = getMcpCommand("@calibress/curl-mcp");
29
29
 
30
- const playwrightArgs = [
31
- ...playwrightCmd.args,
32
- "--isolated",
33
- `--browser=${this.config.browser}`,
34
- ];
30
+ const playwrightArgs = [...playwrightCmd.args, "--isolated", `--browser=${this.config.browser}`];
35
31
 
36
32
  if (this.config.headless) {
37
33
  playwrightArgs.splice(playwrightCmd.args.length, 0, "--headless");
@@ -57,8 +53,8 @@ export class McpManager {
57
53
  * Provides ALL tools to the agent regardless of test type.
58
54
  */
59
55
  async getTools(): Promise<Record<string, any>> {
60
- const playwrightTools = await this.playwrightClient!.tools();
61
- const curlTools = await this.curlClient!.tools();
56
+ const playwrightTools = await this.playwrightClient?.tools();
57
+ const curlTools = await this.curlClient?.tools();
62
58
  return { ...playwrightTools, ...curlTools };
63
59
  }
64
60
 
@@ -68,10 +64,7 @@ export class McpManager {
68
64
  * even if one fails to close.
69
65
  */
70
66
  async close(): Promise<void> {
71
- await Promise.allSettled([
72
- this.playwrightClient?.close(),
73
- this.curlClient?.close(),
74
- ]);
67
+ await Promise.allSettled([this.playwrightClient?.close(), this.curlClient?.close()]);
75
68
  this.playwrightClient = null;
76
69
  this.curlClient = null;
77
70
  }
@@ -1,6 +1,6 @@
1
1
  import { anthropic } from "@ai-sdk/anthropic";
2
- import { openai } from "@ai-sdk/openai";
3
2
  import { google } from "@ai-sdk/google";
3
+ import { openai } from "@ai-sdk/openai";
4
4
  import { createOpenRouter } from "@openrouter/ai-sdk-provider";
5
5
 
6
6
  /** Supported LLM provider names */
@@ -1,5 +1,4 @@
1
- import type { CachedStep } from "../cache/types.ts";
2
- import type { ProviderName } from "./model-factory.ts";
1
+ import { type CachedStep } from "../cache/types.ts";
3
2
 
4
3
  /** Result of a single AI agent execution */
5
4
  export interface AgentExecutionResult {
@@ -10,19 +9,3 @@ export interface AgentExecutionResult {
10
9
  /** Recorded tool call steps for caching */
11
10
  steps: CachedStep[];
12
11
  }
13
-
14
- /** Configuration for a single agent run */
15
- export interface AgentConfig {
16
- /** Model identifier (e.g., "claude-sonnet-4-6", "gpt-4o") */
17
- model: string;
18
- /** LLM provider */
19
- provider: ProviderName;
20
- /** Maximum number of agent steps */
21
- recursionLimit: number;
22
- /** Plain English test case description */
23
- testCase: string;
24
- /** Base URL for the application under test */
25
- baseUrl: string;
26
- /** Optional per-test context appended to system prompt */
27
- context?: string;
28
- }
@@ -1,6 +1,7 @@
1
+ import { mkdir, readdir, rename } from "node:fs/promises";
1
2
  import { join } from "node:path";
2
- import { mkdir, rename } from "node:fs/promises";
3
- import type { CacheEntry, CachedStep } from "./types.ts";
3
+
4
+ import { type CachedStep, type CacheEntry } from "./types.ts";
4
5
 
5
6
  /**
6
7
  * Manages file-based cache entries for test step recordings.
@@ -17,9 +18,30 @@ export class CacheManager {
17
18
  /**
18
19
  * Generate a deterministic 16-char hex hash key.
19
20
  * Uses Bun-native CryptoHasher for SHA-256 hashing.
21
+ *
22
+ * Normalization pipeline (v2):
23
+ * 1. Unicode NFC normalization + whitespace collapse (case-preserving)
24
+ * 2. URL normalization (lowercase hostname, strip trailing slash)
25
+ * 3. Version-prefixed input string ("v2|...")
20
26
  */
21
27
  static hashKey(testCase: string, baseUrl: string): string {
22
- const input = `${testCase}|${baseUrl}`;
28
+ // Step 1: Unicode NFC + whitespace collapse (case-preserving per user decision)
29
+ const normalizedCase = testCase.normalize("NFC").replace(/\s+/g, " ").trim();
30
+
31
+ // Step 2: URL normalization (lowercase hostname, strip trailing slash)
32
+ let normalizedUrl: string;
33
+ try {
34
+ const url = new URL(baseUrl);
35
+ // new URL() lowercases hostname and strips default ports
36
+ // Manually strip trailing slash(es)
37
+ normalizedUrl = url.href.replace(/\/+$/, "");
38
+ } catch {
39
+ // Fallback for non-URL values (defensive)
40
+ normalizedUrl = baseUrl.replace(/\/+$/, "").toLowerCase();
41
+ }
42
+
43
+ // Step 3: Version-prefixed input
44
+ const input = `v2|${normalizedCase}|${normalizedUrl}`;
23
45
  const hasher = new Bun.CryptoHasher("sha256");
24
46
  hasher.update(input);
25
47
  return hasher.digest("hex").slice(0, 16);
@@ -52,7 +74,7 @@ export class CacheManager {
52
74
  const existing = await this.load(testCase, baseUrl);
53
75
 
54
76
  const entry: CacheEntry = {
55
- version: 1,
77
+ version: 2,
56
78
  testCase,
57
79
  baseUrl,
58
80
  steps,
@@ -82,7 +104,7 @@ export class CacheManager {
82
104
  const filePath = join(this.cacheDir, `${hash}.json`);
83
105
 
84
106
  try {
85
- return await Bun.file(filePath).json() as CacheEntry;
107
+ return (await Bun.file(filePath).json()) as CacheEntry;
86
108
  } catch {
87
109
  return null;
88
110
  }
@@ -102,4 +124,29 @@ export class CacheManager {
102
124
  // No-op if file doesn't exist
103
125
  }
104
126
  }
127
+
128
+ /**
129
+ * Migrate v1 cache entries by deleting them.
130
+ * Scans the cache directory for JSON files with version 1 and removes them.
131
+ * v2 entries are preserved. Handles missing/empty cache directories gracefully.
132
+ */
133
+ async migrateV1Cache(): Promise<void> {
134
+ try {
135
+ const files = await readdir(this.cacheDir);
136
+ for (const file of files) {
137
+ if (!file.endsWith(".json")) continue;
138
+ try {
139
+ const filePath = join(this.cacheDir, file);
140
+ const entry = await Bun.file(filePath).json();
141
+ if (entry?.version === 1) {
142
+ await Bun.file(filePath).delete();
143
+ }
144
+ } catch {
145
+ // Skip corrupted files silently
146
+ }
147
+ }
148
+ } catch {
149
+ // Cache dir doesn't exist yet -- nothing to migrate
150
+ }
151
+ }
105
152
  }
@@ -1,4 +1,4 @@
1
- import type { CachedStep } from "./types.ts";
1
+ import { type CachedStep } from "./types.ts";
2
2
 
3
3
  /**
4
4
  * Records MCP tool calls as CachedStep entries.
@@ -1,10 +1,9 @@
1
- import type { CachedStep } from "./types.ts";
1
+ import { describeToolCall } from "../output/tool-name-map.ts";
2
+ import { type OnStepProgress } from "../output/types.ts";
3
+ import { type CachedStep } from "./types.ts";
2
4
 
3
5
  /** Function signature for executing a tool by name with given input */
4
- export type ToolExecutor = (
5
- toolName: string,
6
- toolInput: Record<string, unknown>,
7
- ) => Promise<string>;
6
+ export type ToolExecutor = (toolName: string, toolInput: Record<string, unknown>) => Promise<string>;
8
7
 
9
8
  /** Result of replaying cached steps */
10
9
  export interface ReplayResult {
@@ -30,13 +29,19 @@ export class StepReplayer {
30
29
  * @param steps - The cached steps to replay
31
30
  * @returns Result indicating success or failure with details
32
31
  */
33
- async replay(steps: CachedStep[]): Promise<ReplayResult> {
32
+ async replay(steps: CachedStep[], onStepProgress?: OnStepProgress): Promise<ReplayResult> {
34
33
  for (let i = 0; i < steps.length; i++) {
35
34
  const step = steps[i];
36
35
  if (!step) continue;
37
36
 
38
37
  try {
39
38
  await this.executor(step.toolName, step.toolInput);
39
+ onStepProgress?.({
40
+ stepNumber: i + 1,
41
+ toolName: step.toolName,
42
+ input: step.toolInput,
43
+ description: describeToolCall(step.toolName, step.toolInput),
44
+ });
40
45
  } catch (error) {
41
46
  return {
42
47
  success: false,
@@ -6,7 +6,7 @@ export interface CachedStep {
6
6
 
7
7
  /** A complete cache entry with diagnostic metadata */
8
8
  export interface CacheEntry {
9
- version: 1;
9
+ version: 1 | 2;
10
10
  testCase: string;
11
11
  baseUrl: string;
12
12
  steps: CachedStep[];