superghost 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/src/agent/agent-runner.ts +22 -0
- package/src/agent/types.ts +0 -17
- package/src/cache/cache-manager.ts +49 -3
- package/src/cache/step-replayer.ts +9 -1
- package/src/cache/types.ts +1 -1
- package/src/cli.ts +120 -12
- package/src/infra/preflight.ts +13 -0
- package/src/output/banner.ts +70 -0
- package/src/output/reporter.ts +46 -16
- package/src/output/tool-name-map.ts +71 -0
- package/src/output/types.ts +26 -0
- package/src/runner/test-executor.ts +26 -15
- package/src/runner/test-runner.ts +1 -0
- package/src/runner/types.ts +1 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superghost",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -60,10 +60,12 @@
|
|
|
60
60
|
"@ai-sdk/openai": "^3.0.41",
|
|
61
61
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
62
62
|
"@openrouter/ai-sdk-provider": "^2.2.5",
|
|
63
|
+
"@types/picomatch": "^4.0.2",
|
|
63
64
|
"ai": "^6.0.116",
|
|
64
65
|
"commander": "^14.0.3",
|
|
65
66
|
"nanospinner": "^1.2.2",
|
|
66
67
|
"picocolors": "^1.1.1",
|
|
68
|
+
"picomatch": "^4.0.3",
|
|
67
69
|
"zod": "^4.3.6"
|
|
68
70
|
},
|
|
69
71
|
"devDependencies": {
|
|
@@ -3,6 +3,8 @@ import { z } from "zod";
|
|
|
3
3
|
import { StepRecorder } from "../cache/step-recorder.ts";
|
|
4
4
|
import type { AgentExecutionResult } from "./types.ts";
|
|
5
5
|
import { buildSystemPrompt } from "./prompt.ts";
|
|
6
|
+
import { describeToolCall } from "../output/tool-name-map.ts";
|
|
7
|
+
import type { OnStepProgress } from "../output/types.ts";
|
|
6
8
|
|
|
7
9
|
/**
|
|
8
10
|
* Schema for structured agent output.
|
|
@@ -33,6 +35,7 @@ export async function executeAgent(config: {
|
|
|
33
35
|
recursionLimit: number;
|
|
34
36
|
globalContext?: string;
|
|
35
37
|
testContext?: string;
|
|
38
|
+
onStepProgress?: OnStepProgress;
|
|
36
39
|
}): Promise<AgentExecutionResult> {
|
|
37
40
|
const recorder = new StepRecorder();
|
|
38
41
|
const wrappedTools = recorder.wrapTools(config.tools);
|
|
@@ -44,6 +47,8 @@ export async function executeAgent(config: {
|
|
|
44
47
|
config.testContext,
|
|
45
48
|
);
|
|
46
49
|
|
|
50
|
+
let stepCounter = 0;
|
|
51
|
+
|
|
47
52
|
const { output } = await generateText({
|
|
48
53
|
model: config.model,
|
|
49
54
|
tools: wrappedTools,
|
|
@@ -51,6 +56,23 @@ export async function executeAgent(config: {
|
|
|
51
56
|
prompt: `Execute the test case: "${config.testCase}"`,
|
|
52
57
|
stopWhen: stepCountIs(config.recursionLimit),
|
|
53
58
|
output: Output.object({ schema: TestResultSchema }),
|
|
59
|
+
experimental_onToolCallFinish: config.onStepProgress
|
|
60
|
+
? (event: any) => {
|
|
61
|
+
if (event.success) {
|
|
62
|
+
stepCounter++;
|
|
63
|
+
const input = (event.toolCall.input ?? {}) as Record<
|
|
64
|
+
string,
|
|
65
|
+
unknown
|
|
66
|
+
>;
|
|
67
|
+
config.onStepProgress!({
|
|
68
|
+
stepNumber: stepCounter,
|
|
69
|
+
toolName: event.toolCall.toolName,
|
|
70
|
+
input,
|
|
71
|
+
description: describeToolCall(event.toolCall.toolName, input),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
: undefined,
|
|
54
76
|
});
|
|
55
77
|
|
|
56
78
|
if (output === null) {
|
package/src/agent/types.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { CachedStep } from "../cache/types.ts";
|
|
2
|
-
import type { ProviderName } from "./model-factory.ts";
|
|
3
2
|
|
|
4
3
|
/** Result of a single AI agent execution */
|
|
5
4
|
export interface AgentExecutionResult {
|
|
@@ -10,19 +9,3 @@ export interface AgentExecutionResult {
|
|
|
10
9
|
/** Recorded tool call steps for caching */
|
|
11
10
|
steps: CachedStep[];
|
|
12
11
|
}
|
|
13
|
-
|
|
14
|
-
/** Configuration for a single agent run */
|
|
15
|
-
export interface AgentConfig {
|
|
16
|
-
/** Model identifier (e.g., "claude-sonnet-4-6", "gpt-4o") */
|
|
17
|
-
model: string;
|
|
18
|
-
/** LLM provider */
|
|
19
|
-
provider: ProviderName;
|
|
20
|
-
/** Maximum number of agent steps */
|
|
21
|
-
recursionLimit: number;
|
|
22
|
-
/** Plain English test case description */
|
|
23
|
-
testCase: string;
|
|
24
|
-
/** Base URL for the application under test */
|
|
25
|
-
baseUrl: string;
|
|
26
|
-
/** Optional per-test context appended to system prompt */
|
|
27
|
-
context?: string;
|
|
28
|
-
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { join } from "node:path";
|
|
2
|
-
import { mkdir, rename } from "node:fs/promises";
|
|
2
|
+
import { mkdir, rename, readdir } from "node:fs/promises";
|
|
3
3
|
import type { CacheEntry, CachedStep } from "./types.ts";
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -17,9 +17,30 @@ export class CacheManager {
|
|
|
17
17
|
/**
|
|
18
18
|
* Generate a deterministic 16-char hex hash key.
|
|
19
19
|
* Uses Bun-native CryptoHasher for SHA-256 hashing.
|
|
20
|
+
*
|
|
21
|
+
* Normalization pipeline (v2):
|
|
22
|
+
* 1. Unicode NFC normalization + whitespace collapse (case-preserving)
|
|
23
|
+
* 2. URL normalization (lowercase hostname, strip trailing slash)
|
|
24
|
+
* 3. Version-prefixed input string ("v2|...")
|
|
20
25
|
*/
|
|
21
26
|
static hashKey(testCase: string, baseUrl: string): string {
|
|
22
|
-
|
|
27
|
+
// Step 1: Unicode NFC + whitespace collapse (case-preserving per user decision)
|
|
28
|
+
const normalizedCase = testCase.normalize("NFC").replace(/\s+/g, " ").trim();
|
|
29
|
+
|
|
30
|
+
// Step 2: URL normalization (lowercase hostname, strip trailing slash)
|
|
31
|
+
let normalizedUrl: string;
|
|
32
|
+
try {
|
|
33
|
+
const url = new URL(baseUrl);
|
|
34
|
+
// new URL() lowercases hostname and strips default ports
|
|
35
|
+
// Manually strip trailing slash(es)
|
|
36
|
+
normalizedUrl = url.href.replace(/\/+$/, "");
|
|
37
|
+
} catch {
|
|
38
|
+
// Fallback for non-URL values (defensive)
|
|
39
|
+
normalizedUrl = baseUrl.replace(/\/+$/, "").toLowerCase();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Step 3: Version-prefixed input
|
|
43
|
+
const input = `v2|${normalizedCase}|${normalizedUrl}`;
|
|
23
44
|
const hasher = new Bun.CryptoHasher("sha256");
|
|
24
45
|
hasher.update(input);
|
|
25
46
|
return hasher.digest("hex").slice(0, 16);
|
|
@@ -52,7 +73,7 @@ export class CacheManager {
|
|
|
52
73
|
const existing = await this.load(testCase, baseUrl);
|
|
53
74
|
|
|
54
75
|
const entry: CacheEntry = {
|
|
55
|
-
version:
|
|
76
|
+
version: 2,
|
|
56
77
|
testCase,
|
|
57
78
|
baseUrl,
|
|
58
79
|
steps,
|
|
@@ -102,4 +123,29 @@ export class CacheManager {
|
|
|
102
123
|
// No-op if file doesn't exist
|
|
103
124
|
}
|
|
104
125
|
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Migrate v1 cache entries by deleting them.
|
|
129
|
+
* Scans the cache directory for JSON files with version 1 and removes them.
|
|
130
|
+
* v2 entries are preserved. Handles missing/empty cache directories gracefully.
|
|
131
|
+
*/
|
|
132
|
+
async migrateV1Cache(): Promise<void> {
|
|
133
|
+
try {
|
|
134
|
+
const files = await readdir(this.cacheDir);
|
|
135
|
+
for (const file of files) {
|
|
136
|
+
if (!file.endsWith(".json")) continue;
|
|
137
|
+
try {
|
|
138
|
+
const filePath = join(this.cacheDir, file);
|
|
139
|
+
const entry = await Bun.file(filePath).json();
|
|
140
|
+
if (entry?.version === 1) {
|
|
141
|
+
await Bun.file(filePath).delete();
|
|
142
|
+
}
|
|
143
|
+
} catch {
|
|
144
|
+
// Skip corrupted files silently
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
} catch {
|
|
148
|
+
// Cache dir doesn't exist yet -- nothing to migrate
|
|
149
|
+
}
|
|
150
|
+
}
|
|
105
151
|
}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import type { CachedStep } from "./types.ts";
|
|
2
|
+
import type { OnStepProgress } from "../output/types.ts";
|
|
3
|
+
import { describeToolCall } from "../output/tool-name-map.ts";
|
|
2
4
|
|
|
3
5
|
/** Function signature for executing a tool by name with given input */
|
|
4
6
|
export type ToolExecutor = (
|
|
@@ -30,13 +32,19 @@ export class StepReplayer {
|
|
|
30
32
|
* @param steps - The cached steps to replay
|
|
31
33
|
* @returns Result indicating success or failure with details
|
|
32
34
|
*/
|
|
33
|
-
async replay(steps: CachedStep[]): Promise<ReplayResult> {
|
|
35
|
+
async replay(steps: CachedStep[], onStepProgress?: OnStepProgress): Promise<ReplayResult> {
|
|
34
36
|
for (let i = 0; i < steps.length; i++) {
|
|
35
37
|
const step = steps[i];
|
|
36
38
|
if (!step) continue;
|
|
37
39
|
|
|
38
40
|
try {
|
|
39
41
|
await this.executor(step.toolName, step.toolInput);
|
|
42
|
+
onStepProgress?.({
|
|
43
|
+
stepNumber: i + 1,
|
|
44
|
+
toolName: step.toolName,
|
|
45
|
+
input: step.toolInput,
|
|
46
|
+
description: describeToolCall(step.toolName, step.toolInput),
|
|
47
|
+
});
|
|
40
48
|
} catch (error) {
|
|
41
49
|
return {
|
|
42
50
|
success: false,
|
package/src/cache/types.ts
CHANGED
package/src/cli.ts
CHANGED
|
@@ -5,7 +5,7 @@ import pc from "picocolors";
|
|
|
5
5
|
import { loadConfig, ConfigLoadError } from "./config/loader.ts";
|
|
6
6
|
import { TestRunner } from "./runner/test-runner.ts";
|
|
7
7
|
import type { ExecuteFn } from "./runner/test-runner.ts";
|
|
8
|
-
import { ConsoleReporter } from "./output/reporter.ts";
|
|
8
|
+
import { ConsoleReporter, writeStderr } from "./output/reporter.ts";
|
|
9
9
|
import { ProcessManager } from "./infra/process-manager.ts";
|
|
10
10
|
import { setupSignalHandlers } from "./infra/signals.ts";
|
|
11
11
|
import { McpManager } from "./agent/mcp-manager.ts";
|
|
@@ -20,10 +20,32 @@ import {
|
|
|
20
20
|
} from "./agent/model-factory.ts";
|
|
21
21
|
import type { ProviderName } from "./agent/model-factory.ts";
|
|
22
22
|
import { executeAgent } from "./agent/agent-runner.ts";
|
|
23
|
+
import type { OnStepProgress } from "./output/types.ts";
|
|
24
|
+
import picomatch from "picomatch";
|
|
25
|
+
import { checkBaseUrlReachable } from "./infra/preflight.ts";
|
|
23
26
|
import { isStandaloneBinary } from "./dist/paths.ts";
|
|
24
27
|
import { ensureMcpDependencies } from "./dist/setup.ts";
|
|
28
|
+
import { animateBanner } from "./output/banner.ts";
|
|
25
29
|
import pkg from "../package.json";
|
|
26
30
|
|
|
31
|
+
/** Print the run header and any stacked annotations to stderr */
|
|
32
|
+
function printRunHeader(testCount: number, totalTestCount: number | undefined, annotations: string[]): void {
|
|
33
|
+
let header = `\n${pc.bold("superghost")} v${pkg.version} / Running ${testCount}`;
|
|
34
|
+
if (totalTestCount !== undefined) {
|
|
35
|
+
header += ` of ${totalTestCount}`;
|
|
36
|
+
}
|
|
37
|
+
header += ` test(s)...`;
|
|
38
|
+
writeStderr(header);
|
|
39
|
+
writeStderr("");
|
|
40
|
+
|
|
41
|
+
for (const annotation of annotations) {
|
|
42
|
+
writeStderr(pc.dim(` ${annotation}`));
|
|
43
|
+
}
|
|
44
|
+
if (annotations.length > 0) {
|
|
45
|
+
writeStderr("");
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
27
49
|
const program = new Command();
|
|
28
50
|
|
|
29
51
|
program
|
|
@@ -32,7 +54,18 @@ program
|
|
|
32
54
|
.version(pkg.version)
|
|
33
55
|
.requiredOption("-c, --config <path>", "Path to YAML config file")
|
|
34
56
|
.option("--headed", "Run browser in headed mode (visible browser window)")
|
|
35
|
-
.
|
|
57
|
+
.option("--only <pattern>", "Run only tests matching glob pattern")
|
|
58
|
+
.option("--no-cache", "Bypass cache reads (still writes on success)")
|
|
59
|
+
.option("--dry-run", "List tests and validate config without executing")
|
|
60
|
+
.option("--verbose", "Show per-step tool call output during execution")
|
|
61
|
+
.exitOverride((err) => {
|
|
62
|
+
// Commander writes its own error message to stderr.
|
|
63
|
+
// Re-exit with code 2 for config-class errors (missing required option, unknown option).
|
|
64
|
+
if (err.exitCode !== 0) {
|
|
65
|
+
process.exit(2);
|
|
66
|
+
}
|
|
67
|
+
})
|
|
68
|
+
.action(async (options: { config: string; headed?: boolean; only?: string; cache: boolean; dryRun?: boolean; verbose?: boolean }) => {
|
|
36
69
|
const pm = new ProcessManager();
|
|
37
70
|
setupSignalHandlers(pm);
|
|
38
71
|
|
|
@@ -48,7 +81,7 @@ program
|
|
|
48
81
|
if (options.headed) {
|
|
49
82
|
config.headless = false;
|
|
50
83
|
}
|
|
51
|
-
const reporter = new ConsoleReporter();
|
|
84
|
+
const reporter = new ConsoleReporter(options.verbose ?? false);
|
|
52
85
|
|
|
53
86
|
// Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
|
|
54
87
|
const provider =
|
|
@@ -59,6 +92,64 @@ program
|
|
|
59
92
|
// Validate API key at startup before any tests run
|
|
60
93
|
validateApiKey(provider);
|
|
61
94
|
|
|
95
|
+
// Apply --only filter before any expensive operations
|
|
96
|
+
const totalTestCount = config.tests.length;
|
|
97
|
+
if (options.only) {
|
|
98
|
+
const allTestNames = config.tests.map((t) => t.name);
|
|
99
|
+
const isMatch = picomatch(options.only, { nocase: true });
|
|
100
|
+
config.tests = config.tests.filter((t) => isMatch(t.name));
|
|
101
|
+
|
|
102
|
+
if (config.tests.length === 0) {
|
|
103
|
+
const names = allTestNames.map((n) => ` - ${n}`).join("\n");
|
|
104
|
+
writeStderr(`${pc.red("Error:")} No tests match pattern "${options.only}"\n\nAvailable tests:\n${names}`);
|
|
105
|
+
setTimeout(() => process.exit(2), 100);
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Dry-run: list tests with cache/AI source labels, then exit
|
|
111
|
+
if (options.dryRun) {
|
|
112
|
+
const cacheManager = new CacheManager(config.cacheDir);
|
|
113
|
+
|
|
114
|
+
// Print header with annotations
|
|
115
|
+
const dryRunAnnotations = ["(dry-run)"];
|
|
116
|
+
if (options.only) dryRunAnnotations.push(`(filtered by --only "${options.only}")`);
|
|
117
|
+
printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, dryRunAnnotations);
|
|
118
|
+
|
|
119
|
+
// Determine max test name length for padding
|
|
120
|
+
const maxNameLen = Math.max(...config.tests.map(t => t.name.length));
|
|
121
|
+
let cachedCount = 0;
|
|
122
|
+
|
|
123
|
+
for (let i = 0; i < config.tests.length; i++) {
|
|
124
|
+
const test = config.tests[i];
|
|
125
|
+
const baseUrl = test.baseUrl ?? config.baseUrl ?? "";
|
|
126
|
+
const entry = await cacheManager.load(test.case, baseUrl);
|
|
127
|
+
const source = entry ? "cache" : "ai";
|
|
128
|
+
if (entry) cachedCount++;
|
|
129
|
+
|
|
130
|
+
const paddedName = test.name.padEnd(maxNameLen);
|
|
131
|
+
writeStderr(` ${i + 1}. ${paddedName} (${source})`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
writeStderr("");
|
|
135
|
+
writeStderr(`${config.tests.length} tests, ${cachedCount} cached`);
|
|
136
|
+
|
|
137
|
+
setTimeout(() => process.exit(0), 100);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Preflight: check baseUrl reachability (only if global baseUrl configured)
|
|
142
|
+
if (config.baseUrl) {
|
|
143
|
+
try {
|
|
144
|
+
await checkBaseUrlReachable(config.baseUrl);
|
|
145
|
+
} catch {
|
|
146
|
+
writeStderr(`${pc.red("Error:")} baseUrl unreachable: ${config.baseUrl}`);
|
|
147
|
+
writeStderr(` Check that the server is running and the URL is correct.`);
|
|
148
|
+
setTimeout(() => process.exit(2), 100);
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
62
153
|
// Create AI model
|
|
63
154
|
const model = createModel(config.model, provider);
|
|
64
155
|
|
|
@@ -72,6 +163,7 @@ program
|
|
|
72
163
|
|
|
73
164
|
// Create cache subsystem
|
|
74
165
|
const cacheManager = new CacheManager(config.cacheDir);
|
|
166
|
+
await cacheManager.migrateV1Cache();
|
|
75
167
|
const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
|
|
76
168
|
const tool = tools[toolName];
|
|
77
169
|
if (!tool) throw new Error(`Tool not found: ${toolName}`);
|
|
@@ -79,6 +171,9 @@ program
|
|
|
79
171
|
};
|
|
80
172
|
const replayer = new StepReplayer(toolExecutor);
|
|
81
173
|
|
|
174
|
+
// Create onStepProgress callback bound to reporter
|
|
175
|
+
const onStepProgress: OnStepProgress = (step) => reporter.onStepProgress(step);
|
|
176
|
+
|
|
82
177
|
// Create TestExecutor with cache-first strategy
|
|
83
178
|
const executor = new TestExecutor({
|
|
84
179
|
cacheManager,
|
|
@@ -88,18 +183,23 @@ program
|
|
|
88
183
|
tools,
|
|
89
184
|
config,
|
|
90
185
|
globalContext: config.context,
|
|
186
|
+
noCache: !options.cache,
|
|
187
|
+
onStepProgress,
|
|
91
188
|
});
|
|
92
189
|
|
|
93
190
|
// Wire execute function for TestRunner
|
|
94
191
|
const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
|
|
95
192
|
executor.execute(testCase, baseUrl, testContext);
|
|
96
193
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
);
|
|
194
|
+
const runAnnotations: string[] = [];
|
|
195
|
+
if (options.only) runAnnotations.push(`(filtered by --only "${options.only}")`);
|
|
196
|
+
if (!options.cache) runAnnotations.push("(cache disabled)");
|
|
197
|
+
if (options.verbose) runAnnotations.push("(verbose)");
|
|
198
|
+
printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, runAnnotations);
|
|
100
199
|
|
|
101
200
|
const runner = new TestRunner(config, reporter, executeFn);
|
|
102
201
|
const result = await runner.run();
|
|
202
|
+
result.skipped = options.only ? totalTestCount - config.tests.length : 0;
|
|
103
203
|
|
|
104
204
|
await mcpManager.close();
|
|
105
205
|
await pm.killAll();
|
|
@@ -112,17 +212,25 @@ program
|
|
|
112
212
|
await pm.killAll();
|
|
113
213
|
|
|
114
214
|
if (error instanceof ConfigLoadError) {
|
|
115
|
-
|
|
116
|
-
setTimeout(() => process.exit(
|
|
215
|
+
writeStderr(`${pc.red("Error:")} ${error.message}`);
|
|
216
|
+
setTimeout(() => process.exit(2), 100);
|
|
117
217
|
return;
|
|
118
218
|
}
|
|
119
219
|
if (error instanceof Error && error.message.startsWith("Missing API key")) {
|
|
120
|
-
|
|
121
|
-
setTimeout(() => process.exit(
|
|
220
|
+
writeStderr(`${pc.red("Error:")} ${error.message}`);
|
|
221
|
+
setTimeout(() => process.exit(2), 100);
|
|
122
222
|
return;
|
|
123
223
|
}
|
|
124
|
-
|
|
224
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
225
|
+
writeStderr(`${pc.red("Unexpected error:")} ${msg}`);
|
|
226
|
+
setTimeout(() => process.exit(2), 100);
|
|
125
227
|
}
|
|
126
228
|
});
|
|
127
229
|
|
|
128
|
-
|
|
230
|
+
(async () => {
|
|
231
|
+
const isHelpRequest = process.argv.includes("--help") || process.argv.includes("-h");
|
|
232
|
+
if (isHelpRequest) {
|
|
233
|
+
await animateBanner();
|
|
234
|
+
}
|
|
235
|
+
await program.parseAsync();
|
|
236
|
+
})();
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Preflight reachability check for baseUrl.
|
|
3
|
+
*
|
|
4
|
+
* Resolves on ANY HTTP response (even 4xx/5xx -- those prove the server is reachable).
|
|
5
|
+
* Throws on network-level failures: connection refused, DNS failure, timeout.
|
|
6
|
+
*/
|
|
7
|
+
export async function checkBaseUrlReachable(url: string, timeoutMs = 5000): Promise<void> {
|
|
8
|
+
await fetch(url, {
|
|
9
|
+
method: "HEAD",
|
|
10
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
11
|
+
redirect: "follow",
|
|
12
|
+
});
|
|
13
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
function hslToRgb(h: number, s: number, l: number): [number, number, number] {
|
|
2
|
+
s /= 100;
|
|
3
|
+
l /= 100;
|
|
4
|
+
const k = (n: number) => (n + h / 30) % 12;
|
|
5
|
+
const a = s * Math.min(l, 1 - l);
|
|
6
|
+
const f = (n: number) => l - a * Math.max(-1, Math.min(k(n) - 3, Math.min(9 - k(n), 1)));
|
|
7
|
+
return [Math.round(f(0) * 255), Math.round(f(8) * 255), Math.round(f(4) * 255)];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function colorChar(char: string, hue: number): string {
|
|
11
|
+
const [r, g, b] = hslToRgb(hue % 360, 100, 60);
|
|
12
|
+
return `\x1b[38;2;${r};${g};${b}m${char}\x1b[0m`;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function rainbowLine(text: string, hueOffset: number): string {
|
|
16
|
+
const hueStep = 360 / text.length;
|
|
17
|
+
return text
|
|
18
|
+
.split("")
|
|
19
|
+
.map((char, i) => colorChar(char, (hueOffset + i * hueStep) % 360))
|
|
20
|
+
.join("");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const TITLE = " Super Ghost ";
|
|
24
|
+
const BANNER_LINES = [
|
|
25
|
+
` 👻${TITLE}👻`,
|
|
26
|
+
` ─────────────────────`,
|
|
27
|
+
` AI-powered E2E testing`,
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
function renderBanner(hueOffset: number): string[] {
|
|
31
|
+
return [
|
|
32
|
+
` 👻${rainbowLine(TITLE, hueOffset)}👻`,
|
|
33
|
+
` \x1b[2m─────────────────────\x1b[0m`,
|
|
34
|
+
` \x1b[2mAI-powered E2E testing\x1b[0m`,
|
|
35
|
+
];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const FRAMES = 15;
|
|
39
|
+
const FRAME_MS = 60;
|
|
40
|
+
const HUE_STEP = 24;
|
|
41
|
+
|
|
42
|
+
export async function animateBanner(): Promise<void> {
|
|
43
|
+
const isTTY = process.stdout.isTTY === true;
|
|
44
|
+
|
|
45
|
+
if (!isTTY) {
|
|
46
|
+
const lines = BANNER_LINES;
|
|
47
|
+
process.stdout.write(lines.join("\n") + "\n\n");
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
process.stdout.write("\x1b[?25l"); // hide cursor
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
for (let frame = 0; frame < FRAMES; frame++) {
|
|
55
|
+
const lines = renderBanner(frame * HUE_STEP);
|
|
56
|
+
if (frame > 0) {
|
|
57
|
+
// Move cursor up N lines to overwrite previous frame
|
|
58
|
+
process.stdout.write(`\x1b[${lines.length}A`);
|
|
59
|
+
}
|
|
60
|
+
process.stdout.write(lines.join("\n") + "\n");
|
|
61
|
+
|
|
62
|
+
if (frame < FRAMES - 1) {
|
|
63
|
+
await new Promise<void>((resolve) => setTimeout(resolve, FRAME_MS));
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
process.stdout.write("\n");
|
|
67
|
+
} finally {
|
|
68
|
+
process.stdout.write("\x1b[?25h"); // restore cursor
|
|
69
|
+
}
|
|
70
|
+
}
|
package/src/output/reporter.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import pc from "picocolors";
|
|
2
2
|
import { createSpinner } from "nanospinner";
|
|
3
|
-
import type { Reporter } from "./types.ts";
|
|
3
|
+
import type { Reporter, StepInfo } from "./types.ts";
|
|
4
4
|
import type { TestResult, RunResult } from "../runner/types.ts";
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -14,16 +14,29 @@ export function formatDuration(ms: number): string {
|
|
|
14
14
|
return `${(ms / 1000).toFixed(1)}s`;
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
+
/** Write a line of text to stderr */
|
|
18
|
+
export function writeStderr(text: string): void {
|
|
19
|
+
Bun.write(Bun.stderr, text + "\n");
|
|
20
|
+
}
|
|
21
|
+
|
|
17
22
|
/**
|
|
18
23
|
* Console reporter with colored output, spinners, and box summary.
|
|
24
|
+
* All output routes to stderr so stdout is reserved for structured output.
|
|
19
25
|
* Colors auto-disable when stdout is not a TTY (via picocolors).
|
|
20
26
|
* Spinner animation auto-disables in non-TTY (via nanospinner).
|
|
21
27
|
*/
|
|
22
28
|
export class ConsoleReporter implements Reporter {
|
|
23
29
|
private spinner: ReturnType<typeof createSpinner> | null = null;
|
|
30
|
+
private readonly verbose: boolean;
|
|
31
|
+
private currentTestName: string | null = null;
|
|
32
|
+
|
|
33
|
+
constructor(verbose = false) {
|
|
34
|
+
this.verbose = verbose;
|
|
35
|
+
}
|
|
24
36
|
|
|
25
37
|
/** Creates a spinner with the test name and starts it */
|
|
26
38
|
onTestStart(testName: string): void {
|
|
39
|
+
this.currentTestName = testName;
|
|
27
40
|
this.spinner = createSpinner(testName).start();
|
|
28
41
|
}
|
|
29
42
|
|
|
@@ -38,35 +51,52 @@ export class ConsoleReporter implements Reporter {
|
|
|
38
51
|
this.spinner?.error({ text: `${testName} ${duration}` });
|
|
39
52
|
}
|
|
40
53
|
if (selfHealed) {
|
|
41
|
-
|
|
54
|
+
writeStderr(pc.dim(" Cache was stale \u2014 re-executed and updated"));
|
|
42
55
|
}
|
|
43
56
|
this.spinner = null;
|
|
57
|
+
this.currentTestName = null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Handles per-step progress during AI execution */
|
|
61
|
+
onStepProgress(step: StepInfo): void {
|
|
62
|
+
if (this.verbose) {
|
|
63
|
+
writeStderr(pc.dim(` Step ${step.stepNumber}: ${step.description.full}`));
|
|
64
|
+
} else if (this.spinner) {
|
|
65
|
+
let spinnerText = `${this.currentTestName} \u2014 ${step.description.full}`;
|
|
66
|
+
if (spinnerText.length > 60) {
|
|
67
|
+
spinnerText = spinnerText.slice(0, 57) + "...";
|
|
68
|
+
}
|
|
69
|
+
this.spinner.update(spinnerText);
|
|
70
|
+
}
|
|
44
71
|
}
|
|
45
72
|
|
|
46
73
|
/** Prints bordered box summary and lists failed tests with error messages */
|
|
47
74
|
onRunComplete(data: RunResult): void {
|
|
48
75
|
const bar = "\u2501".repeat(40);
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
76
|
+
writeStderr("");
|
|
77
|
+
writeStderr(` ${bar}`);
|
|
78
|
+
writeStderr(" SuperGhost Results");
|
|
79
|
+
writeStderr(` ${bar}`);
|
|
80
|
+
writeStderr(` Total: ${data.results.length}`);
|
|
81
|
+
writeStderr(` Passed: ${pc.green(String(data.passed))}`);
|
|
82
|
+
writeStderr(
|
|
56
83
|
` Failed: ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
|
|
57
84
|
);
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
85
|
+
if (data.skipped > 0) {
|
|
86
|
+
writeStderr(` Skipped: ${data.skipped}`);
|
|
87
|
+
}
|
|
88
|
+
writeStderr(` Cached: ${data.cached}`);
|
|
89
|
+
writeStderr(` Time: ${pc.dim(formatDuration(data.totalDurationMs))}`);
|
|
90
|
+
writeStderr(` ${bar}`);
|
|
61
91
|
|
|
62
92
|
if (data.failed > 0) {
|
|
63
|
-
|
|
64
|
-
|
|
93
|
+
writeStderr("");
|
|
94
|
+
writeStderr(pc.red(" Failed tests:"));
|
|
65
95
|
for (const result of data.results) {
|
|
66
96
|
if (result.status === "failed") {
|
|
67
|
-
|
|
97
|
+
writeStderr(` ${pc.red("-")} ${result.testName}`);
|
|
68
98
|
if (result.error) {
|
|
69
|
-
|
|
99
|
+
writeStderr(` ${pc.dim(result.error)}`);
|
|
70
100
|
}
|
|
71
101
|
}
|
|
72
102
|
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { StepDescription } from "./types.ts";
|
|
2
|
+
|
|
3
|
+
/** Maps raw MCP tool names to human-readable action names */
|
|
4
|
+
const PREFIX_MAP: Record<string, string> = {
|
|
5
|
+
browser_navigate: "Navigate",
|
|
6
|
+
browser_click: "Click",
|
|
7
|
+
browser_type: "Type",
|
|
8
|
+
browser_screenshot: "Screenshot",
|
|
9
|
+
browser_wait_for_text: "Wait for text",
|
|
10
|
+
browser_hover: "Hover",
|
|
11
|
+
browser_select_option: "Select",
|
|
12
|
+
browser_go_back: "Go back",
|
|
13
|
+
browser_go_forward: "Go forward",
|
|
14
|
+
browser_press_key: "Press key",
|
|
15
|
+
browser_drag: "Drag",
|
|
16
|
+
browser_resize: "Resize",
|
|
17
|
+
browser_handle_dialog: "Handle dialog",
|
|
18
|
+
browser_file_upload: "Upload file",
|
|
19
|
+
browser_pdf_save: "Save PDF",
|
|
20
|
+
browser_close: "Close",
|
|
21
|
+
browser_console_messages: "Console messages",
|
|
22
|
+
browser_install: "Install browser",
|
|
23
|
+
browser_tab_list: "List tabs",
|
|
24
|
+
browser_tab_new: "New tab",
|
|
25
|
+
browser_tab_select: "Select tab",
|
|
26
|
+
browser_tab_close: "Close tab",
|
|
27
|
+
browser_network_requests: "Network requests",
|
|
28
|
+
browser_snapshot: "Snapshot",
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/** Maps tool names to the input field used as the key argument */
|
|
32
|
+
const KEY_ARG_MAP: Record<string, string> = {
|
|
33
|
+
browser_navigate: "url",
|
|
34
|
+
browser_click: "element",
|
|
35
|
+
browser_type: "element",
|
|
36
|
+
browser_hover: "element",
|
|
37
|
+
browser_select_option: "element",
|
|
38
|
+
browser_press_key: "key",
|
|
39
|
+
browser_wait_for_text: "text",
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Convert a raw tool call into a human-readable description.
|
|
44
|
+
*
|
|
45
|
+
* Known tools (browser_navigate, browser_click, etc.) map to friendly names.
|
|
46
|
+
* Unknown tools fall back to: strip underscores, capitalize first letter.
|
|
47
|
+
* Key arguments are extracted based on tool type (e.g., "url" for navigate).
|
|
48
|
+
*/
|
|
49
|
+
export function describeToolCall(
|
|
50
|
+
toolName: string,
|
|
51
|
+
input: Record<string, unknown>,
|
|
52
|
+
): StepDescription {
|
|
53
|
+
// Look up human name, or derive from raw name as fallback
|
|
54
|
+
const action =
|
|
55
|
+
PREFIX_MAP[toolName] ??
|
|
56
|
+
toolName
|
|
57
|
+
.replace(/_/g, " ")
|
|
58
|
+
.replace(/^\w/, (c) => c.toUpperCase());
|
|
59
|
+
|
|
60
|
+
// Look up which input field is the key argument for this tool
|
|
61
|
+
const keyArgField = KEY_ARG_MAP[toolName];
|
|
62
|
+
const rawKeyArg = keyArgField ? input[keyArgField] : undefined;
|
|
63
|
+
const keyArg =
|
|
64
|
+
rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== ""
|
|
65
|
+
? String(rawKeyArg)
|
|
66
|
+
: undefined;
|
|
67
|
+
|
|
68
|
+
const full = keyArg ? `${action} \u2192 ${keyArg}` : action;
|
|
69
|
+
|
|
70
|
+
return { action, keyArg, full };
|
|
71
|
+
}
|
package/src/output/types.ts
CHANGED
|
@@ -1,8 +1,34 @@
|
|
|
1
1
|
import type { RunResult, TestResult } from "../runner/types.ts";
|
|
2
2
|
|
|
3
|
+
/** Describes a tool call in human-readable form */
|
|
4
|
+
export interface StepDescription {
|
|
5
|
+
/** Human-readable action name, e.g. "Navigate", "Click" */
|
|
6
|
+
action: string;
|
|
7
|
+
/** Key argument value, e.g. "/login", "button.submit" */
|
|
8
|
+
keyArg?: string;
|
|
9
|
+
/** Full description string, e.g. "Navigate \u2192 /login" */
|
|
10
|
+
full: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/** Information about a single step (tool call) during AI execution */
|
|
14
|
+
export interface StepInfo {
|
|
15
|
+
/** 1-based step counter for the current test */
|
|
16
|
+
stepNumber: number;
|
|
17
|
+
/** Raw tool name, e.g. "browser_navigate" */
|
|
18
|
+
toolName: string;
|
|
19
|
+
/** Tool call input arguments */
|
|
20
|
+
input: Record<string, unknown>;
|
|
21
|
+
/** Human-readable description of the tool call */
|
|
22
|
+
description: StepDescription;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Callback invoked for each tool call during AI execution */
|
|
26
|
+
export type OnStepProgress = (step: StepInfo) => void;
|
|
27
|
+
|
|
3
28
|
/** Interface for output reporting */
|
|
4
29
|
export interface Reporter {
|
|
5
30
|
onTestStart(testName: string): void;
|
|
6
31
|
onTestComplete(result: TestResult): void;
|
|
7
32
|
onRunComplete(data: RunResult): void;
|
|
33
|
+
onStepProgress?(step: StepInfo): void;
|
|
8
34
|
}
|
|
@@ -3,6 +3,7 @@ import type { StepReplayer } from "../cache/step-replayer.ts";
|
|
|
3
3
|
import type { AgentExecutionResult } from "../agent/types.ts";
|
|
4
4
|
import type { Config } from "../config/types.ts";
|
|
5
5
|
import type { TestResult } from "./types.ts";
|
|
6
|
+
import type { OnStepProgress } from "../output/types.ts";
|
|
6
7
|
|
|
7
8
|
/** Function signature for executing a test via the AI agent */
|
|
8
9
|
type ExecuteAgentFn = (config: {
|
|
@@ -13,6 +14,7 @@ type ExecuteAgentFn = (config: {
|
|
|
13
14
|
recursionLimit: number;
|
|
14
15
|
globalContext?: string;
|
|
15
16
|
testContext?: string;
|
|
17
|
+
onStepProgress?: OnStepProgress;
|
|
16
18
|
}) => Promise<AgentExecutionResult>;
|
|
17
19
|
|
|
18
20
|
/**
|
|
@@ -32,6 +34,8 @@ export class TestExecutor {
|
|
|
32
34
|
"maxAttempts" | "recursionLimit" | "model" | "modelProvider"
|
|
33
35
|
> & { context?: string };
|
|
34
36
|
private readonly globalContext?: string;
|
|
37
|
+
private readonly noCache: boolean;
|
|
38
|
+
private readonly onStepProgress?: OnStepProgress;
|
|
35
39
|
|
|
36
40
|
constructor(opts: {
|
|
37
41
|
cacheManager: CacheManager;
|
|
@@ -44,6 +48,8 @@ export class TestExecutor {
|
|
|
44
48
|
"maxAttempts" | "recursionLimit" | "model" | "modelProvider"
|
|
45
49
|
> & { context?: string };
|
|
46
50
|
globalContext?: string;
|
|
51
|
+
noCache?: boolean;
|
|
52
|
+
onStepProgress?: OnStepProgress;
|
|
47
53
|
}) {
|
|
48
54
|
this.cacheManager = opts.cacheManager;
|
|
49
55
|
this.replayer = opts.replayer;
|
|
@@ -52,6 +58,8 @@ export class TestExecutor {
|
|
|
52
58
|
this.tools = opts.tools ?? {};
|
|
53
59
|
this.config = opts.config;
|
|
54
60
|
this.globalContext = opts.globalContext;
|
|
61
|
+
this.noCache = opts.noCache ?? false;
|
|
62
|
+
this.onStepProgress = opts.onStepProgress;
|
|
55
63
|
}
|
|
56
64
|
|
|
57
65
|
/** Execute a single test case with cache-first strategy */
|
|
@@ -62,24 +70,26 @@ export class TestExecutor {
|
|
|
62
70
|
): Promise<TestResult> {
|
|
63
71
|
const start = Date.now();
|
|
64
72
|
|
|
65
|
-
// Phase 1: Try cache replay
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
73
|
+
// Phase 1: Try cache replay (unless noCache)
|
|
74
|
+
if (!this.noCache) {
|
|
75
|
+
const cached = await this.cacheManager.load(testCase, baseUrl);
|
|
76
|
+
if (cached) {
|
|
77
|
+
const replay = await this.replayer.replay(cached.steps, this.onStepProgress);
|
|
78
|
+
if (replay.success) {
|
|
79
|
+
return {
|
|
80
|
+
testName: testCase,
|
|
81
|
+
testCase,
|
|
82
|
+
status: "passed",
|
|
83
|
+
source: "cache",
|
|
84
|
+
durationMs: Date.now() - start,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
// Cache stale — fall through to AI with self-heal flag
|
|
88
|
+
return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
|
|
77
89
|
}
|
|
78
|
-
// Cache stale — fall through to AI with self-heal flag
|
|
79
|
-
return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
|
|
80
90
|
}
|
|
81
91
|
|
|
82
|
-
// Phase 2: No cache — go directly to AI
|
|
92
|
+
// Phase 2: No cache or noCache — go directly to AI
|
|
83
93
|
return this.executeWithAgent(testCase, baseUrl, start, false, testContext);
|
|
84
94
|
}
|
|
85
95
|
|
|
@@ -102,6 +112,7 @@ export class TestExecutor {
|
|
|
102
112
|
recursionLimit: this.config.recursionLimit,
|
|
103
113
|
globalContext: this.globalContext,
|
|
104
114
|
testContext,
|
|
115
|
+
onStepProgress: this.onStepProgress,
|
|
105
116
|
});
|
|
106
117
|
|
|
107
118
|
if (result.passed) {
|