superghost 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -2
- package/src/agent/agent-runner.ts +23 -10
- package/src/agent/mcp-manager.ts +7 -14
- package/src/agent/model-factory.ts +1 -1
- package/src/agent/types.ts +1 -18
- package/src/cache/cache-manager.ts +52 -5
- package/src/cache/step-recorder.ts +1 -1
- package/src/cache/step-replayer.ts +11 -6
- package/src/cache/types.ts +1 -1
- package/src/cli.ts +282 -103
- package/src/config/loader.ts +6 -14
- package/src/config/types.ts +3 -2
- package/src/infra/preflight.ts +13 -0
- package/src/infra/process-manager.ts +6 -2
- package/src/infra/signals.ts +1 -1
- package/src/output/banner.ts +66 -0
- package/src/output/json-formatter.ts +150 -0
- package/src/output/reporter.ts +49 -20
- package/src/output/tool-name-map.ts +62 -0
- package/src/output/types.ts +27 -1
- package/src/runner/test-executor.ts +36 -33
- package/src/runner/test-runner.ts +7 -15
- package/src/runner/types.ts +1 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superghost",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -14,8 +14,10 @@
|
|
|
14
14
|
"scripts": {
|
|
15
15
|
"test": "bun test",
|
|
16
16
|
"typecheck": "bunx tsc --noEmit",
|
|
17
|
+
"lint": "bunx biome check .",
|
|
18
|
+
"lint:fix": "bunx biome check --write .",
|
|
17
19
|
"build:binary": "bun run scripts/build-binaries.ts",
|
|
18
|
-
"prepublishOnly": "bun test && bunx tsc --noEmit",
|
|
20
|
+
"prepublishOnly": "bun run lint && bun test && bunx tsc --noEmit",
|
|
19
21
|
"e2e": "bun run e2e/run-e2e.ts",
|
|
20
22
|
"e2e:smoke": "bun run e2e/run-e2e.ts smoke",
|
|
21
23
|
"e2e:browser": "bun run e2e/run-e2e.ts browser",
|
|
@@ -60,13 +62,16 @@
|
|
|
60
62
|
"@ai-sdk/openai": "^3.0.41",
|
|
61
63
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
62
64
|
"@openrouter/ai-sdk-provider": "^2.2.5",
|
|
65
|
+
"@types/picomatch": "^4.0.2",
|
|
63
66
|
"ai": "^6.0.116",
|
|
64
67
|
"commander": "^14.0.3",
|
|
65
68
|
"nanospinner": "^1.2.2",
|
|
66
69
|
"picocolors": "^1.1.1",
|
|
70
|
+
"picomatch": "^4.0.3",
|
|
67
71
|
"zod": "^4.3.6"
|
|
68
72
|
},
|
|
69
73
|
"devDependencies": {
|
|
74
|
+
"@biomejs/biome": "2.4.6",
|
|
70
75
|
"@types/bun": "^1.3.10",
|
|
71
76
|
"@types/react": "^19.0.0",
|
|
72
77
|
"@types/react-dom": "^19.0.0",
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import { generateText, Output, stepCountIs } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
|
|
3
4
|
import { StepRecorder } from "../cache/step-recorder.ts";
|
|
4
|
-
import
|
|
5
|
+
import { describeToolCall } from "../output/tool-name-map.ts";
|
|
6
|
+
import { type OnStepProgress } from "../output/types.ts";
|
|
5
7
|
import { buildSystemPrompt } from "./prompt.ts";
|
|
8
|
+
import { type AgentExecutionResult } from "./types.ts";
|
|
6
9
|
|
|
7
10
|
/**
|
|
8
11
|
* Schema for structured agent output.
|
|
@@ -10,9 +13,7 @@ import { buildSystemPrompt } from "./prompt.ts";
|
|
|
10
13
|
*/
|
|
11
14
|
const TestResultSchema = z.object({
|
|
12
15
|
passed: z.boolean().describe("Whether the test case passed"),
|
|
13
|
-
message: z
|
|
14
|
-
.string()
|
|
15
|
-
.describe("Brief diagnostic: what happened and what the page showed"),
|
|
16
|
+
message: z.string().describe("Brief diagnostic: what happened and what the page showed"),
|
|
16
17
|
});
|
|
17
18
|
|
|
18
19
|
/**
|
|
@@ -33,16 +34,14 @@ export async function executeAgent(config: {
|
|
|
33
34
|
recursionLimit: number;
|
|
34
35
|
globalContext?: string;
|
|
35
36
|
testContext?: string;
|
|
37
|
+
onStepProgress?: OnStepProgress;
|
|
36
38
|
}): Promise<AgentExecutionResult> {
|
|
37
39
|
const recorder = new StepRecorder();
|
|
38
40
|
const wrappedTools = recorder.wrapTools(config.tools);
|
|
39
41
|
|
|
40
|
-
const systemPrompt = buildSystemPrompt(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
config.globalContext,
|
|
44
|
-
config.testContext,
|
|
45
|
-
);
|
|
42
|
+
const systemPrompt = buildSystemPrompt(config.testCase, config.baseUrl, config.globalContext, config.testContext);
|
|
43
|
+
|
|
44
|
+
let stepCounter = 0;
|
|
46
45
|
|
|
47
46
|
const { output } = await generateText({
|
|
48
47
|
model: config.model,
|
|
@@ -51,6 +50,20 @@ export async function executeAgent(config: {
|
|
|
51
50
|
prompt: `Execute the test case: "${config.testCase}"`,
|
|
52
51
|
stopWhen: stepCountIs(config.recursionLimit),
|
|
53
52
|
output: Output.object({ schema: TestResultSchema }),
|
|
53
|
+
experimental_onToolCallFinish: config.onStepProgress
|
|
54
|
+
? (event: any) => {
|
|
55
|
+
if (event.success) {
|
|
56
|
+
stepCounter++;
|
|
57
|
+
const input = (event.toolCall.input ?? {}) as Record<string, unknown>;
|
|
58
|
+
config.onStepProgress?.({
|
|
59
|
+
stepNumber: stepCounter,
|
|
60
|
+
toolName: event.toolCall.toolName,
|
|
61
|
+
input,
|
|
62
|
+
description: describeToolCall(event.toolCall.toolName, input),
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
: undefined,
|
|
54
67
|
});
|
|
55
68
|
|
|
56
69
|
if (output === null) {
|
package/src/agent/mcp-manager.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { createMCPClient } from "@ai-sdk/mcp";
|
|
2
2
|
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
import { type Config } from "../config/types.ts";
|
|
4
5
|
import { getMcpCommand } from "../dist/paths.ts";
|
|
5
6
|
|
|
6
7
|
/**
|
|
@@ -12,8 +13,7 @@ import { getMcpCommand } from "../dist/paths.ts";
|
|
|
12
13
|
* regardless of test type.
|
|
13
14
|
*/
|
|
14
15
|
export class McpManager {
|
|
15
|
-
private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null =
|
|
16
|
-
null;
|
|
16
|
+
private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
|
|
17
17
|
private curlClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
|
|
18
18
|
|
|
19
19
|
constructor(private readonly config: Pick<Config, "browser" | "headless">) {}
|
|
@@ -27,11 +27,7 @@ export class McpManager {
|
|
|
27
27
|
const playwrightCmd = getMcpCommand("@playwright/mcp");
|
|
28
28
|
const curlCmd = getMcpCommand("@calibress/curl-mcp");
|
|
29
29
|
|
|
30
|
-
const playwrightArgs = [
|
|
31
|
-
...playwrightCmd.args,
|
|
32
|
-
"--isolated",
|
|
33
|
-
`--browser=${this.config.browser}`,
|
|
34
|
-
];
|
|
30
|
+
const playwrightArgs = [...playwrightCmd.args, "--isolated", `--browser=${this.config.browser}`];
|
|
35
31
|
|
|
36
32
|
if (this.config.headless) {
|
|
37
33
|
playwrightArgs.splice(playwrightCmd.args.length, 0, "--headless");
|
|
@@ -57,8 +53,8 @@ export class McpManager {
|
|
|
57
53
|
* Provides ALL tools to the agent regardless of test type.
|
|
58
54
|
*/
|
|
59
55
|
async getTools(): Promise<Record<string, any>> {
|
|
60
|
-
const playwrightTools = await this.playwrightClient
|
|
61
|
-
const curlTools = await this.curlClient
|
|
56
|
+
const playwrightTools = await this.playwrightClient?.tools();
|
|
57
|
+
const curlTools = await this.curlClient?.tools();
|
|
62
58
|
return { ...playwrightTools, ...curlTools };
|
|
63
59
|
}
|
|
64
60
|
|
|
@@ -68,10 +64,7 @@ export class McpManager {
|
|
|
68
64
|
* even if one fails to close.
|
|
69
65
|
*/
|
|
70
66
|
async close(): Promise<void> {
|
|
71
|
-
await Promise.allSettled([
|
|
72
|
-
this.playwrightClient?.close(),
|
|
73
|
-
this.curlClient?.close(),
|
|
74
|
-
]);
|
|
67
|
+
await Promise.allSettled([this.playwrightClient?.close(), this.curlClient?.close()]);
|
|
75
68
|
this.playwrightClient = null;
|
|
76
69
|
this.curlClient = null;
|
|
77
70
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { anthropic } from "@ai-sdk/anthropic";
|
|
2
|
-
import { openai } from "@ai-sdk/openai";
|
|
3
2
|
import { google } from "@ai-sdk/google";
|
|
3
|
+
import { openai } from "@ai-sdk/openai";
|
|
4
4
|
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
|
5
5
|
|
|
6
6
|
/** Supported LLM provider names */
|
package/src/agent/types.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import type { ProviderName } from "./model-factory.ts";
|
|
1
|
+
import { type CachedStep } from "../cache/types.ts";
|
|
3
2
|
|
|
4
3
|
/** Result of a single AI agent execution */
|
|
5
4
|
export interface AgentExecutionResult {
|
|
@@ -10,19 +9,3 @@ export interface AgentExecutionResult {
|
|
|
10
9
|
/** Recorded tool call steps for caching */
|
|
11
10
|
steps: CachedStep[];
|
|
12
11
|
}
|
|
13
|
-
|
|
14
|
-
/** Configuration for a single agent run */
|
|
15
|
-
export interface AgentConfig {
|
|
16
|
-
/** Model identifier (e.g., "claude-sonnet-4-6", "gpt-4o") */
|
|
17
|
-
model: string;
|
|
18
|
-
/** LLM provider */
|
|
19
|
-
provider: ProviderName;
|
|
20
|
-
/** Maximum number of agent steps */
|
|
21
|
-
recursionLimit: number;
|
|
22
|
-
/** Plain English test case description */
|
|
23
|
-
testCase: string;
|
|
24
|
-
/** Base URL for the application under test */
|
|
25
|
-
baseUrl: string;
|
|
26
|
-
/** Optional per-test context appended to system prompt */
|
|
27
|
-
context?: string;
|
|
28
|
-
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import { mkdir, readdir, rename } from "node:fs/promises";
|
|
1
2
|
import { join } from "node:path";
|
|
2
|
-
|
|
3
|
-
import type
|
|
3
|
+
|
|
4
|
+
import { type CachedStep, type CacheEntry } from "./types.ts";
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* Manages file-based cache entries for test step recordings.
|
|
@@ -17,9 +18,30 @@ export class CacheManager {
|
|
|
17
18
|
/**
|
|
18
19
|
* Generate a deterministic 16-char hex hash key.
|
|
19
20
|
* Uses Bun-native CryptoHasher for SHA-256 hashing.
|
|
21
|
+
*
|
|
22
|
+
* Normalization pipeline (v2):
|
|
23
|
+
* 1. Unicode NFC normalization + whitespace collapse (case-preserving)
|
|
24
|
+
* 2. URL normalization (lowercase hostname, strip trailing slash)
|
|
25
|
+
* 3. Version-prefixed input string ("v2|...")
|
|
20
26
|
*/
|
|
21
27
|
static hashKey(testCase: string, baseUrl: string): string {
|
|
22
|
-
|
|
28
|
+
// Step 1: Unicode NFC + whitespace collapse (case-preserving per user decision)
|
|
29
|
+
const normalizedCase = testCase.normalize("NFC").replace(/\s+/g, " ").trim();
|
|
30
|
+
|
|
31
|
+
// Step 2: URL normalization (lowercase hostname, strip trailing slash)
|
|
32
|
+
let normalizedUrl: string;
|
|
33
|
+
try {
|
|
34
|
+
const url = new URL(baseUrl);
|
|
35
|
+
// new URL() lowercases hostname and strips default ports
|
|
36
|
+
// Manually strip trailing slash(es)
|
|
37
|
+
normalizedUrl = url.href.replace(/\/+$/, "");
|
|
38
|
+
} catch {
|
|
39
|
+
// Fallback for non-URL values (defensive)
|
|
40
|
+
normalizedUrl = baseUrl.replace(/\/+$/, "").toLowerCase();
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Step 3: Version-prefixed input
|
|
44
|
+
const input = `v2|${normalizedCase}|${normalizedUrl}`;
|
|
23
45
|
const hasher = new Bun.CryptoHasher("sha256");
|
|
24
46
|
hasher.update(input);
|
|
25
47
|
return hasher.digest("hex").slice(0, 16);
|
|
@@ -52,7 +74,7 @@ export class CacheManager {
|
|
|
52
74
|
const existing = await this.load(testCase, baseUrl);
|
|
53
75
|
|
|
54
76
|
const entry: CacheEntry = {
|
|
55
|
-
version:
|
|
77
|
+
version: 2,
|
|
56
78
|
testCase,
|
|
57
79
|
baseUrl,
|
|
58
80
|
steps,
|
|
@@ -82,7 +104,7 @@ export class CacheManager {
|
|
|
82
104
|
const filePath = join(this.cacheDir, `${hash}.json`);
|
|
83
105
|
|
|
84
106
|
try {
|
|
85
|
-
return await Bun.file(filePath).json() as CacheEntry;
|
|
107
|
+
return (await Bun.file(filePath).json()) as CacheEntry;
|
|
86
108
|
} catch {
|
|
87
109
|
return null;
|
|
88
110
|
}
|
|
@@ -102,4 +124,29 @@ export class CacheManager {
|
|
|
102
124
|
// No-op if file doesn't exist
|
|
103
125
|
}
|
|
104
126
|
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Migrate v1 cache entries by deleting them.
|
|
130
|
+
* Scans the cache directory for JSON files with version 1 and removes them.
|
|
131
|
+
* v2 entries are preserved. Handles missing/empty cache directories gracefully.
|
|
132
|
+
*/
|
|
133
|
+
async migrateV1Cache(): Promise<void> {
|
|
134
|
+
try {
|
|
135
|
+
const files = await readdir(this.cacheDir);
|
|
136
|
+
for (const file of files) {
|
|
137
|
+
if (!file.endsWith(".json")) continue;
|
|
138
|
+
try {
|
|
139
|
+
const filePath = join(this.cacheDir, file);
|
|
140
|
+
const entry = await Bun.file(filePath).json();
|
|
141
|
+
if (entry?.version === 1) {
|
|
142
|
+
await Bun.file(filePath).delete();
|
|
143
|
+
}
|
|
144
|
+
} catch {
|
|
145
|
+
// Skip corrupted files silently
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
} catch {
|
|
149
|
+
// Cache dir doesn't exist yet -- nothing to migrate
|
|
150
|
+
}
|
|
151
|
+
}
|
|
105
152
|
}
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { describeToolCall } from "../output/tool-name-map.ts";
|
|
2
|
+
import { type OnStepProgress } from "../output/types.ts";
|
|
3
|
+
import { type CachedStep } from "./types.ts";
|
|
2
4
|
|
|
3
5
|
/** Function signature for executing a tool by name with given input */
|
|
4
|
-
export type ToolExecutor = (
|
|
5
|
-
toolName: string,
|
|
6
|
-
toolInput: Record<string, unknown>,
|
|
7
|
-
) => Promise<string>;
|
|
6
|
+
export type ToolExecutor = (toolName: string, toolInput: Record<string, unknown>) => Promise<string>;
|
|
8
7
|
|
|
9
8
|
/** Result of replaying cached steps */
|
|
10
9
|
export interface ReplayResult {
|
|
@@ -30,13 +29,19 @@ export class StepReplayer {
|
|
|
30
29
|
* @param steps - The cached steps to replay
|
|
31
30
|
* @returns Result indicating success or failure with details
|
|
32
31
|
*/
|
|
33
|
-
async replay(steps: CachedStep[]): Promise<ReplayResult> {
|
|
32
|
+
async replay(steps: CachedStep[], onStepProgress?: OnStepProgress): Promise<ReplayResult> {
|
|
34
33
|
for (let i = 0; i < steps.length; i++) {
|
|
35
34
|
const step = steps[i];
|
|
36
35
|
if (!step) continue;
|
|
37
36
|
|
|
38
37
|
try {
|
|
39
38
|
await this.executor(step.toolName, step.toolInput);
|
|
39
|
+
onStepProgress?.({
|
|
40
|
+
stepNumber: i + 1,
|
|
41
|
+
toolName: step.toolName,
|
|
42
|
+
input: step.toolInput,
|
|
43
|
+
description: describeToolCall(step.toolName, step.toolInput),
|
|
44
|
+
});
|
|
40
45
|
} catch (error) {
|
|
41
46
|
return {
|
|
42
47
|
success: false,
|