superghost 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/src/agent/agent-runner.ts +8 -17
- package/src/agent/mcp-manager.ts +7 -14
- package/src/agent/model-factory.ts +1 -1
- package/src/agent/types.ts +1 -1
- package/src/cache/cache-manager.ts +4 -3
- package/src/cache/step-recorder.ts +1 -1
- package/src/cache/step-replayer.ts +3 -6
- package/src/cli.ts +235 -162
- package/src/config/loader.ts +6 -14
- package/src/config/types.ts +3 -2
- package/src/infra/process-manager.ts +6 -2
- package/src/infra/signals.ts +1 -1
- package/src/output/banner.ts +8 -12
- package/src/output/json-formatter.ts +150 -0
- package/src/output/reporter.ts +7 -8
- package/src/output/tool-name-map.ts +4 -13
- package/src/output/types.ts +1 -1
- package/src/runner/test-executor.ts +11 -19
- package/src/runner/test-runner.ts +6 -15
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superghost",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -14,8 +14,10 @@
|
|
|
14
14
|
"scripts": {
|
|
15
15
|
"test": "bun test",
|
|
16
16
|
"typecheck": "bunx tsc --noEmit",
|
|
17
|
+
"lint": "bunx biome check .",
|
|
18
|
+
"lint:fix": "bunx biome check --write .",
|
|
17
19
|
"build:binary": "bun run scripts/build-binaries.ts",
|
|
18
|
-
"prepublishOnly": "bun test && bunx tsc --noEmit",
|
|
20
|
+
"prepublishOnly": "bun run lint && bun test && bunx tsc --noEmit",
|
|
19
21
|
"e2e": "bun run e2e/run-e2e.ts",
|
|
20
22
|
"e2e:smoke": "bun run e2e/run-e2e.ts smoke",
|
|
21
23
|
"e2e:browser": "bun run e2e/run-e2e.ts browser",
|
|
@@ -69,6 +71,7 @@
|
|
|
69
71
|
"zod": "^4.3.6"
|
|
70
72
|
},
|
|
71
73
|
"devDependencies": {
|
|
74
|
+
"@biomejs/biome": "2.4.6",
|
|
72
75
|
"@types/bun": "^1.3.10",
|
|
73
76
|
"@types/react": "^19.0.0",
|
|
74
77
|
"@types/react-dom": "^19.0.0",
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { generateText, Output, stepCountIs } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
|
+
|
|
3
4
|
import { StepRecorder } from "../cache/step-recorder.ts";
|
|
4
|
-
import type { AgentExecutionResult } from "./types.ts";
|
|
5
|
-
import { buildSystemPrompt } from "./prompt.ts";
|
|
6
5
|
import { describeToolCall } from "../output/tool-name-map.ts";
|
|
7
|
-
import type
|
|
6
|
+
import { type OnStepProgress } from "../output/types.ts";
|
|
7
|
+
import { buildSystemPrompt } from "./prompt.ts";
|
|
8
|
+
import { type AgentExecutionResult } from "./types.ts";
|
|
8
9
|
|
|
9
10
|
/**
|
|
10
11
|
* Schema for structured agent output.
|
|
@@ -12,9 +13,7 @@ import type { OnStepProgress } from "../output/types.ts";
|
|
|
12
13
|
*/
|
|
13
14
|
const TestResultSchema = z.object({
|
|
14
15
|
passed: z.boolean().describe("Whether the test case passed"),
|
|
15
|
-
message: z
|
|
16
|
-
.string()
|
|
17
|
-
.describe("Brief diagnostic: what happened and what the page showed"),
|
|
16
|
+
message: z.string().describe("Brief diagnostic: what happened and what the page showed"),
|
|
18
17
|
});
|
|
19
18
|
|
|
20
19
|
/**
|
|
@@ -40,12 +39,7 @@ export async function executeAgent(config: {
|
|
|
40
39
|
const recorder = new StepRecorder();
|
|
41
40
|
const wrappedTools = recorder.wrapTools(config.tools);
|
|
42
41
|
|
|
43
|
-
const systemPrompt = buildSystemPrompt(
|
|
44
|
-
config.testCase,
|
|
45
|
-
config.baseUrl,
|
|
46
|
-
config.globalContext,
|
|
47
|
-
config.testContext,
|
|
48
|
-
);
|
|
42
|
+
const systemPrompt = buildSystemPrompt(config.testCase, config.baseUrl, config.globalContext, config.testContext);
|
|
49
43
|
|
|
50
44
|
let stepCounter = 0;
|
|
51
45
|
|
|
@@ -60,11 +54,8 @@ export async function executeAgent(config: {
|
|
|
60
54
|
? (event: any) => {
|
|
61
55
|
if (event.success) {
|
|
62
56
|
stepCounter++;
|
|
63
|
-
const input = (event.toolCall.input ?? {}) as Record<
|
|
64
|
-
|
|
65
|
-
unknown
|
|
66
|
-
>;
|
|
67
|
-
config.onStepProgress!({
|
|
57
|
+
const input = (event.toolCall.input ?? {}) as Record<string, unknown>;
|
|
58
|
+
config.onStepProgress?.({
|
|
68
59
|
stepNumber: stepCounter,
|
|
69
60
|
toolName: event.toolCall.toolName,
|
|
70
61
|
input,
|
package/src/agent/mcp-manager.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { createMCPClient } from "@ai-sdk/mcp";
|
|
2
2
|
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
import { type Config } from "../config/types.ts";
|
|
4
5
|
import { getMcpCommand } from "../dist/paths.ts";
|
|
5
6
|
|
|
6
7
|
/**
|
|
@@ -12,8 +13,7 @@ import { getMcpCommand } from "../dist/paths.ts";
|
|
|
12
13
|
* regardless of test type.
|
|
13
14
|
*/
|
|
14
15
|
export class McpManager {
|
|
15
|
-
private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null =
|
|
16
|
-
null;
|
|
16
|
+
private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
|
|
17
17
|
private curlClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
|
|
18
18
|
|
|
19
19
|
constructor(private readonly config: Pick<Config, "browser" | "headless">) {}
|
|
@@ -27,11 +27,7 @@ export class McpManager {
|
|
|
27
27
|
const playwrightCmd = getMcpCommand("@playwright/mcp");
|
|
28
28
|
const curlCmd = getMcpCommand("@calibress/curl-mcp");
|
|
29
29
|
|
|
30
|
-
const playwrightArgs = [
|
|
31
|
-
...playwrightCmd.args,
|
|
32
|
-
"--isolated",
|
|
33
|
-
`--browser=${this.config.browser}`,
|
|
34
|
-
];
|
|
30
|
+
const playwrightArgs = [...playwrightCmd.args, "--isolated", `--browser=${this.config.browser}`];
|
|
35
31
|
|
|
36
32
|
if (this.config.headless) {
|
|
37
33
|
playwrightArgs.splice(playwrightCmd.args.length, 0, "--headless");
|
|
@@ -57,8 +53,8 @@ export class McpManager {
|
|
|
57
53
|
* Provides ALL tools to the agent regardless of test type.
|
|
58
54
|
*/
|
|
59
55
|
async getTools(): Promise<Record<string, any>> {
|
|
60
|
-
const playwrightTools = await this.playwrightClient
|
|
61
|
-
const curlTools = await this.curlClient
|
|
56
|
+
const playwrightTools = await this.playwrightClient?.tools();
|
|
57
|
+
const curlTools = await this.curlClient?.tools();
|
|
62
58
|
return { ...playwrightTools, ...curlTools };
|
|
63
59
|
}
|
|
64
60
|
|
|
@@ -68,10 +64,7 @@ export class McpManager {
|
|
|
68
64
|
* even if one fails to close.
|
|
69
65
|
*/
|
|
70
66
|
async close(): Promise<void> {
|
|
71
|
-
await Promise.allSettled([
|
|
72
|
-
this.playwrightClient?.close(),
|
|
73
|
-
this.curlClient?.close(),
|
|
74
|
-
]);
|
|
67
|
+
await Promise.allSettled([this.playwrightClient?.close(), this.curlClient?.close()]);
|
|
75
68
|
this.playwrightClient = null;
|
|
76
69
|
this.curlClient = null;
|
|
77
70
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { anthropic } from "@ai-sdk/anthropic";
|
|
2
|
-
import { openai } from "@ai-sdk/openai";
|
|
3
2
|
import { google } from "@ai-sdk/google";
|
|
3
|
+
import { openai } from "@ai-sdk/openai";
|
|
4
4
|
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
|
5
5
|
|
|
6
6
|
/** Supported LLM provider names */
|
package/src/agent/types.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import { mkdir, readdir, rename } from "node:fs/promises";
|
|
1
2
|
import { join } from "node:path";
|
|
2
|
-
|
|
3
|
-
import type
|
|
3
|
+
|
|
4
|
+
import { type CachedStep, type CacheEntry } from "./types.ts";
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* Manages file-based cache entries for test step recordings.
|
|
@@ -103,7 +104,7 @@ export class CacheManager {
|
|
|
103
104
|
const filePath = join(this.cacheDir, `${hash}.json`);
|
|
104
105
|
|
|
105
106
|
try {
|
|
106
|
-
return await Bun.file(filePath).json() as CacheEntry;
|
|
107
|
+
return (await Bun.file(filePath).json()) as CacheEntry;
|
|
107
108
|
} catch {
|
|
108
109
|
return null;
|
|
109
110
|
}
|
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
import type { CachedStep } from "./types.ts";
|
|
2
|
-
import type { OnStepProgress } from "../output/types.ts";
|
|
3
1
|
import { describeToolCall } from "../output/tool-name-map.ts";
|
|
2
|
+
import { type OnStepProgress } from "../output/types.ts";
|
|
3
|
+
import { type CachedStep } from "./types.ts";
|
|
4
4
|
|
|
5
5
|
/** Function signature for executing a tool by name with given input */
|
|
6
|
-
export type ToolExecutor = (
|
|
7
|
-
toolName: string,
|
|
8
|
-
toolInput: Record<string, unknown>,
|
|
9
|
-
) => Promise<string>;
|
|
6
|
+
export type ToolExecutor = (toolName: string, toolInput: Record<string, unknown>) => Promise<string>;
|
|
10
7
|
|
|
11
8
|
/** Result of replaying cached steps */
|
|
12
9
|
export interface ReplayResult {
|
package/src/cli.ts
CHANGED
|
@@ -2,31 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
import { Command } from "commander";
|
|
4
4
|
import pc from "picocolors";
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
import
|
|
8
|
-
import {
|
|
9
|
-
import { ProcessManager } from "./infra/process-manager.ts";
|
|
10
|
-
import { setupSignalHandlers } from "./infra/signals.ts";
|
|
5
|
+
import picomatch from "picomatch";
|
|
6
|
+
|
|
7
|
+
import pkg from "../package.json";
|
|
8
|
+
import { executeAgent } from "./agent/agent-runner.ts";
|
|
11
9
|
import { McpManager } from "./agent/mcp-manager.ts";
|
|
10
|
+
import { createModel, inferProvider, type ProviderName, validateApiKey } from "./agent/model-factory.ts";
|
|
12
11
|
import { CacheManager } from "./cache/cache-manager.ts";
|
|
13
|
-
import { StepReplayer } from "./cache/step-replayer.ts";
|
|
14
|
-
import
|
|
15
|
-
import { TestExecutor } from "./runner/test-executor.ts";
|
|
16
|
-
import {
|
|
17
|
-
inferProvider,
|
|
18
|
-
validateApiKey,
|
|
19
|
-
createModel,
|
|
20
|
-
} from "./agent/model-factory.ts";
|
|
21
|
-
import type { ProviderName } from "./agent/model-factory.ts";
|
|
22
|
-
import { executeAgent } from "./agent/agent-runner.ts";
|
|
23
|
-
import type { OnStepProgress } from "./output/types.ts";
|
|
24
|
-
import picomatch from "picomatch";
|
|
25
|
-
import { checkBaseUrlReachable } from "./infra/preflight.ts";
|
|
12
|
+
import { StepReplayer, type ToolExecutor } from "./cache/step-replayer.ts";
|
|
13
|
+
import { ConfigLoadError, loadConfig } from "./config/loader.ts";
|
|
26
14
|
import { isStandaloneBinary } from "./dist/paths.ts";
|
|
27
15
|
import { ensureMcpDependencies } from "./dist/setup.ts";
|
|
16
|
+
import { checkBaseUrlReachable } from "./infra/preflight.ts";
|
|
17
|
+
import { ProcessManager } from "./infra/process-manager.ts";
|
|
18
|
+
import { setupSignalHandlers } from "./infra/signals.ts";
|
|
28
19
|
import { animateBanner } from "./output/banner.ts";
|
|
29
|
-
import
|
|
20
|
+
import {
|
|
21
|
+
formatJsonDryRun,
|
|
22
|
+
formatJsonError,
|
|
23
|
+
formatJsonOutput,
|
|
24
|
+
type JsonOutputMetadata,
|
|
25
|
+
} from "./output/json-formatter.ts";
|
|
26
|
+
import { ConsoleReporter, writeStderr } from "./output/reporter.ts";
|
|
27
|
+
import { type OnStepProgress } from "./output/types.ts";
|
|
28
|
+
import { TestExecutor } from "./runner/test-executor.ts";
|
|
29
|
+
import { type ExecuteFn, TestRunner } from "./runner/test-runner.ts";
|
|
30
30
|
|
|
31
31
|
/** Print the run header and any stacked annotations to stderr */
|
|
32
32
|
function printRunHeader(testCount: number, totalTestCount: number | undefined, annotations: string[]): void {
|
|
@@ -48,6 +48,11 @@ function printRunHeader(testCount: number, totalTestCount: number | undefined, a
|
|
|
48
48
|
|
|
49
49
|
const program = new Command();
|
|
50
50
|
|
|
51
|
+
program.configureOutput({
|
|
52
|
+
writeOut: (str) => writeStderr(str.trimEnd()),
|
|
53
|
+
writeErr: (str) => writeStderr(str.trimEnd()),
|
|
54
|
+
});
|
|
55
|
+
|
|
51
56
|
program
|
|
52
57
|
.name("superghost")
|
|
53
58
|
.description("AI-powered end-to-end browser and API testing")
|
|
@@ -58,6 +63,7 @@ program
|
|
|
58
63
|
.option("--no-cache", "Bypass cache reads (still writes on success)")
|
|
59
64
|
.option("--dry-run", "List tests and validate config without executing")
|
|
60
65
|
.option("--verbose", "Show per-step tool call output during execution")
|
|
66
|
+
.option("--output <format>", "Output format (json)")
|
|
61
67
|
.exitOverride((err) => {
|
|
62
68
|
// Commander writes its own error message to stderr.
|
|
63
69
|
// Re-exit with code 2 for config-class errors (missing required option, unknown option).
|
|
@@ -65,167 +71,234 @@ program
|
|
|
65
71
|
process.exit(2);
|
|
66
72
|
}
|
|
67
73
|
})
|
|
68
|
-
.action(
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
.action(
|
|
75
|
+
async (options: {
|
|
76
|
+
config: string;
|
|
77
|
+
headed?: boolean;
|
|
78
|
+
only?: string;
|
|
79
|
+
cache: boolean;
|
|
80
|
+
dryRun?: boolean;
|
|
81
|
+
verbose?: boolean;
|
|
82
|
+
output?: string;
|
|
83
|
+
}) => {
|
|
84
|
+
const pm = new ProcessManager();
|
|
85
|
+
setupSignalHandlers(pm);
|
|
76
86
|
|
|
77
|
-
|
|
87
|
+
// Validate --output format early
|
|
88
|
+
if (options.output && options.output !== "json") {
|
|
89
|
+
writeStderr(`${pc.red("Error:")} Unknown output format '${options.output}'. Supported: json`);
|
|
90
|
+
setTimeout(() => process.exit(2), 100);
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
78
93
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
config.headless = false;
|
|
94
|
+
// Auto-install MCP dependencies for standalone binary on first run
|
|
95
|
+
if (isStandaloneBinary()) {
|
|
96
|
+
await ensureMcpDependencies();
|
|
83
97
|
}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
config
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
98
|
+
|
|
99
|
+
let mcpManager: McpManager | null = null;
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const config = await loadConfig(options.config);
|
|
103
|
+
if (options.headed) {
|
|
104
|
+
config.headless = false;
|
|
105
|
+
}
|
|
106
|
+
const reporter = new ConsoleReporter(options.verbose ?? false);
|
|
107
|
+
|
|
108
|
+
// Infer provider: use explicit modelProvider unless it matches default and model suggests otherwise
|
|
109
|
+
const provider =
|
|
110
|
+
config.modelProvider === "anthropic" ? inferProvider(config.model) : (config.modelProvider as ProviderName);
|
|
111
|
+
|
|
112
|
+
// Validate API key at startup before any tests run
|
|
113
|
+
validateApiKey(provider);
|
|
114
|
+
|
|
115
|
+
// Apply --only filter before any expensive operations
|
|
116
|
+
const totalTestCount = config.tests.length;
|
|
117
|
+
if (options.only) {
|
|
118
|
+
const allTestNames = config.tests.map((t) => t.name);
|
|
119
|
+
const isMatch = picomatch(options.only, { nocase: true });
|
|
120
|
+
config.tests = config.tests.filter((t) => isMatch(t.name));
|
|
121
|
+
|
|
122
|
+
if (config.tests.length === 0) {
|
|
123
|
+
const names = allTestNames.map((n) => ` - ${n}`).join("\n");
|
|
124
|
+
writeStderr(`${pc.red("Error:")} No tests match pattern "${options.only}"\n\nAvailable tests:\n${names}`);
|
|
125
|
+
setTimeout(() => process.exit(2), 100);
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Dry-run: list tests with cache/AI source labels, then exit
|
|
131
|
+
if (options.dryRun) {
|
|
132
|
+
const cacheManager = new CacheManager(config.cacheDir);
|
|
133
|
+
|
|
134
|
+
// Print header with annotations
|
|
135
|
+
const dryRunAnnotations = ["(dry-run)"];
|
|
136
|
+
if (options.only) dryRunAnnotations.push(`(filtered by --only "${options.only}")`);
|
|
137
|
+
printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, dryRunAnnotations);
|
|
138
|
+
|
|
139
|
+
// Determine max test name length for padding
|
|
140
|
+
const maxNameLen = Math.max(...config.tests.map((t) => t.name.length));
|
|
141
|
+
let cachedCount = 0;
|
|
142
|
+
const dryRunTests: Array<{ name: string; case: string; source: "cache" | "ai" }> = [];
|
|
143
|
+
|
|
144
|
+
for (let i = 0; i < config.tests.length; i++) {
|
|
145
|
+
const test = config.tests[i];
|
|
146
|
+
const baseUrl = test.baseUrl ?? config.baseUrl ?? "";
|
|
147
|
+
const entry = await cacheManager.load(test.case, baseUrl);
|
|
148
|
+
const source: "cache" | "ai" = entry ? "cache" : "ai";
|
|
149
|
+
if (entry) cachedCount++;
|
|
150
|
+
dryRunTests.push({ name: test.name, case: test.case, source });
|
|
151
|
+
|
|
152
|
+
const paddedName = test.name.padEnd(maxNameLen);
|
|
153
|
+
writeStderr(` ${i + 1}. ${paddedName} (${source})`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
writeStderr("");
|
|
157
|
+
writeStderr(`${config.tests.length} tests, ${cachedCount} cached`);
|
|
158
|
+
|
|
159
|
+
// Write JSON to stdout when --output json is active
|
|
160
|
+
if (options.output === "json") {
|
|
161
|
+
const metadata: JsonOutputMetadata = {
|
|
162
|
+
model: config.model,
|
|
163
|
+
provider,
|
|
164
|
+
configFile: options.config,
|
|
165
|
+
baseUrl: config.baseUrl,
|
|
166
|
+
timestamp: new Date().toISOString(),
|
|
167
|
+
...(options.only
|
|
168
|
+
? { filter: { pattern: options.only, matched: config.tests.length, total: totalTestCount } }
|
|
169
|
+
: {}),
|
|
170
|
+
};
|
|
171
|
+
const testList = dryRunTests.map((t) => ({
|
|
172
|
+
name: t.name,
|
|
173
|
+
case: t.case,
|
|
174
|
+
source: t.source,
|
|
175
|
+
}));
|
|
176
|
+
const json = formatJsonDryRun(testList, metadata, pkg.version);
|
|
177
|
+
process.stdout.write(`${json}\n`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
setTimeout(() => process.exit(0), 100);
|
|
106
181
|
return;
|
|
107
182
|
}
|
|
108
|
-
}
|
|
109
183
|
|
|
110
|
-
|
|
111
|
-
|
|
184
|
+
// Preflight: check baseUrl reachability (only if global baseUrl configured)
|
|
185
|
+
if (config.baseUrl) {
|
|
186
|
+
try {
|
|
187
|
+
await checkBaseUrlReachable(config.baseUrl);
|
|
188
|
+
} catch {
|
|
189
|
+
writeStderr(`${pc.red("Error:")} baseUrl unreachable: ${config.baseUrl}`);
|
|
190
|
+
writeStderr(` Check that the server is running and the URL is correct.`);
|
|
191
|
+
setTimeout(() => process.exit(2), 100);
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Create AI model
|
|
197
|
+
const model = createModel(config.model, provider);
|
|
198
|
+
|
|
199
|
+
// Initialize MCP servers (shared across test suite, not per-test)
|
|
200
|
+
mcpManager = new McpManager({
|
|
201
|
+
browser: config.browser,
|
|
202
|
+
headless: config.headless,
|
|
203
|
+
});
|
|
204
|
+
await mcpManager.initialize();
|
|
205
|
+
const tools = await mcpManager.getTools();
|
|
206
|
+
|
|
207
|
+
// Create cache subsystem
|
|
112
208
|
const cacheManager = new CacheManager(config.cacheDir);
|
|
209
|
+
await cacheManager.migrateV1Cache();
|
|
210
|
+
const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
|
|
211
|
+
const tool = tools[toolName];
|
|
212
|
+
if (!tool) throw new Error(`Tool not found: ${toolName}`);
|
|
213
|
+
return await tool.execute(toolInput);
|
|
214
|
+
};
|
|
215
|
+
const replayer = new StepReplayer(toolExecutor);
|
|
113
216
|
|
|
114
|
-
//
|
|
115
|
-
const
|
|
116
|
-
if (options.only) dryRunAnnotations.push(`(filtered by --only "${options.only}")`);
|
|
117
|
-
printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, dryRunAnnotations);
|
|
217
|
+
// Create onStepProgress callback bound to reporter
|
|
218
|
+
const onStepProgress: OnStepProgress = (step) => reporter.onStepProgress(step);
|
|
118
219
|
|
|
119
|
-
//
|
|
120
|
-
const
|
|
121
|
-
|
|
220
|
+
// Create TestExecutor with cache-first strategy
|
|
221
|
+
const executor = new TestExecutor({
|
|
222
|
+
cacheManager,
|
|
223
|
+
replayer,
|
|
224
|
+
executeAgentFn: executeAgent,
|
|
225
|
+
model,
|
|
226
|
+
tools,
|
|
227
|
+
config,
|
|
228
|
+
globalContext: config.context,
|
|
229
|
+
noCache: !options.cache,
|
|
230
|
+
onStepProgress,
|
|
231
|
+
});
|
|
122
232
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
const entry = await cacheManager.load(test.case, baseUrl);
|
|
127
|
-
const source = entry ? "cache" : "ai";
|
|
128
|
-
if (entry) cachedCount++;
|
|
233
|
+
// Wire execute function for TestRunner
|
|
234
|
+
const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
|
|
235
|
+
executor.execute(testCase, baseUrl, testContext);
|
|
129
236
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
237
|
+
const runAnnotations: string[] = [];
|
|
238
|
+
if (options.only) runAnnotations.push(`(filtered by --only "${options.only}")`);
|
|
239
|
+
if (!options.cache) runAnnotations.push("(cache disabled)");
|
|
240
|
+
if (options.verbose) runAnnotations.push("(verbose)");
|
|
241
|
+
printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, runAnnotations);
|
|
133
242
|
|
|
134
|
-
|
|
135
|
-
|
|
243
|
+
const runner = new TestRunner(config, reporter, executeFn);
|
|
244
|
+
const result = await runner.run();
|
|
245
|
+
result.skipped = options.only ? totalTestCount - config.tests.length : 0;
|
|
136
246
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
247
|
+
await mcpManager.close();
|
|
248
|
+
await pm.killAll();
|
|
249
|
+
const code = result.failed > 0 ? 1 : 0;
|
|
140
250
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
251
|
+
// Write JSON to stdout when --output json is active
|
|
252
|
+
if (options.output === "json") {
|
|
253
|
+
const metadata: JsonOutputMetadata = {
|
|
254
|
+
model: config.model,
|
|
255
|
+
provider,
|
|
256
|
+
configFile: options.config,
|
|
257
|
+
baseUrl: config.baseUrl,
|
|
258
|
+
timestamp: new Date().toISOString(),
|
|
259
|
+
...(options.only
|
|
260
|
+
? { filter: { pattern: options.only, matched: config.tests.length, total: totalTestCount } }
|
|
261
|
+
: {}),
|
|
262
|
+
};
|
|
263
|
+
const json = formatJsonOutput(result, metadata, pkg.version, code);
|
|
264
|
+
process.stdout.write(`${json}\n`);
|
|
150
265
|
}
|
|
151
|
-
}
|
|
152
266
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
headless: config.headless,
|
|
160
|
-
});
|
|
161
|
-
await mcpManager.initialize();
|
|
162
|
-
const tools = await mcpManager.getTools();
|
|
163
|
-
|
|
164
|
-
// Create cache subsystem
|
|
165
|
-
const cacheManager = new CacheManager(config.cacheDir);
|
|
166
|
-
await cacheManager.migrateV1Cache();
|
|
167
|
-
const toolExecutor: ToolExecutor = async (toolName, toolInput) => {
|
|
168
|
-
const tool = tools[toolName];
|
|
169
|
-
if (!tool) throw new Error(`Tool not found: ${toolName}`);
|
|
170
|
-
return await tool.execute(toolInput);
|
|
171
|
-
};
|
|
172
|
-
const replayer = new StepReplayer(toolExecutor);
|
|
173
|
-
|
|
174
|
-
// Create onStepProgress callback bound to reporter
|
|
175
|
-
const onStepProgress: OnStepProgress = (step) => reporter.onStepProgress(step);
|
|
176
|
-
|
|
177
|
-
// Create TestExecutor with cache-first strategy
|
|
178
|
-
const executor = new TestExecutor({
|
|
179
|
-
cacheManager,
|
|
180
|
-
replayer,
|
|
181
|
-
executeAgentFn: executeAgent,
|
|
182
|
-
model,
|
|
183
|
-
tools,
|
|
184
|
-
config,
|
|
185
|
-
globalContext: config.context,
|
|
186
|
-
noCache: !options.cache,
|
|
187
|
-
onStepProgress,
|
|
188
|
-
});
|
|
189
|
-
|
|
190
|
-
// Wire execute function for TestRunner
|
|
191
|
-
const executeFn: ExecuteFn = async (testCase, baseUrl, testContext?) =>
|
|
192
|
-
executor.execute(testCase, baseUrl, testContext);
|
|
193
|
-
|
|
194
|
-
const runAnnotations: string[] = [];
|
|
195
|
-
if (options.only) runAnnotations.push(`(filtered by --only "${options.only}")`);
|
|
196
|
-
if (!options.cache) runAnnotations.push("(cache disabled)");
|
|
197
|
-
if (options.verbose) runAnnotations.push("(verbose)");
|
|
198
|
-
printRunHeader(config.tests.length, options.only ? totalTestCount : undefined, runAnnotations);
|
|
199
|
-
|
|
200
|
-
const runner = new TestRunner(config, reporter, executeFn);
|
|
201
|
-
const result = await runner.run();
|
|
202
|
-
result.skipped = options.only ? totalTestCount - config.tests.length : 0;
|
|
203
|
-
|
|
204
|
-
await mcpManager.close();
|
|
205
|
-
await pm.killAll();
|
|
206
|
-
const code = result.failed > 0 ? 1 : 0;
|
|
207
|
-
setTimeout(() => process.exit(code), 100);
|
|
208
|
-
} catch (error) {
|
|
209
|
-
if (mcpManager) {
|
|
210
|
-
await mcpManager.close().catch(() => {});
|
|
211
|
-
}
|
|
212
|
-
await pm.killAll();
|
|
267
|
+
setTimeout(() => process.exit(code), 100);
|
|
268
|
+
} catch (error) {
|
|
269
|
+
if (mcpManager) {
|
|
270
|
+
await mcpManager.close().catch(() => {});
|
|
271
|
+
}
|
|
272
|
+
await pm.killAll();
|
|
213
273
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
274
|
+
if (error instanceof ConfigLoadError) {
|
|
275
|
+
writeStderr(`${pc.red("Error:")} ${error.message}`);
|
|
276
|
+
if (options.output === "json") {
|
|
277
|
+
const json = formatJsonError(error.message, pkg.version, { configFile: options.config });
|
|
278
|
+
process.stdout.write(`${json}\n`);
|
|
279
|
+
}
|
|
280
|
+
setTimeout(() => process.exit(2), 100);
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
if (error instanceof Error && error.message.startsWith("Missing API key")) {
|
|
284
|
+
writeStderr(`${pc.red("Error:")} ${error.message}`);
|
|
285
|
+
if (options.output === "json") {
|
|
286
|
+
const json = formatJsonError(error.message, pkg.version, { configFile: options.config });
|
|
287
|
+
process.stdout.write(`${json}\n`);
|
|
288
|
+
}
|
|
289
|
+
setTimeout(() => process.exit(2), 100);
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
293
|
+
writeStderr(`${pc.red("Unexpected error:")} ${msg}`);
|
|
294
|
+
if (options.output === "json") {
|
|
295
|
+
const json = formatJsonError(msg, pkg.version, { configFile: options.config });
|
|
296
|
+
process.stdout.write(`${json}\n`);
|
|
297
|
+
}
|
|
221
298
|
setTimeout(() => process.exit(2), 100);
|
|
222
|
-
return;
|
|
223
299
|
}
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
setTimeout(() => process.exit(2), 100);
|
|
227
|
-
}
|
|
228
|
-
});
|
|
300
|
+
},
|
|
301
|
+
);
|
|
229
302
|
|
|
230
303
|
(async () => {
|
|
231
304
|
const isHelpRequest = process.argv.includes("--help") || process.argv.includes("-h");
|
package/src/config/loader.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { YAML } from "bun";
|
|
2
|
+
|
|
2
3
|
import { ConfigSchema } from "./schema.ts";
|
|
3
|
-
import type
|
|
4
|
+
import { type Config } from "./types.ts";
|
|
4
5
|
|
|
5
6
|
/** Error thrown when config loading or validation fails */
|
|
6
7
|
export class ConfigLoadError extends Error {
|
|
@@ -39,8 +40,7 @@ export async function loadConfig(filePath: string): Promise<Config> {
|
|
|
39
40
|
);
|
|
40
41
|
}
|
|
41
42
|
throw new ConfigLoadError(
|
|
42
|
-
`Cannot read config file: ${filePath}\n` +
|
|
43
|
-
` ${error instanceof Error ? error.message : String(error)}`,
|
|
43
|
+
`Cannot read config file: ${filePath}\n` + ` ${error instanceof Error ? error.message : String(error)}`,
|
|
44
44
|
error,
|
|
45
45
|
);
|
|
46
46
|
}
|
|
@@ -50,10 +50,7 @@ export async function loadConfig(filePath: string): Promise<Config> {
|
|
|
50
50
|
try {
|
|
51
51
|
raw = YAML.parse(content);
|
|
52
52
|
} catch (error) {
|
|
53
|
-
throw new ConfigLoadError(
|
|
54
|
-
`Invalid YAML syntax: ${error instanceof Error ? error.message : String(error)}`,
|
|
55
|
-
error,
|
|
56
|
-
);
|
|
53
|
+
throw new ConfigLoadError(`Invalid YAML syntax: ${error instanceof Error ? error.message : String(error)}`, error);
|
|
57
54
|
}
|
|
58
55
|
|
|
59
56
|
// Layer 3: Zod validation
|
|
@@ -61,15 +58,10 @@ export async function loadConfig(filePath: string): Promise<Config> {
|
|
|
61
58
|
const result = ConfigSchema.safeParse(raw);
|
|
62
59
|
if (!result.success) {
|
|
63
60
|
const issues = result.error.issues
|
|
64
|
-
.map(
|
|
65
|
-
(issue, i) =>
|
|
66
|
-
` ${i + 1}. ${issue.path.join(".")}: ${issue.message}`,
|
|
67
|
-
)
|
|
61
|
+
.map((issue, i) => ` ${i + 1}. ${issue.path.join(".")}: ${issue.message}`)
|
|
68
62
|
.join("\n");
|
|
69
63
|
const count = result.error.issues.length;
|
|
70
|
-
throw new ConfigLoadError(
|
|
71
|
-
`Invalid config (${count} issue${count > 1 ? "s" : ""})\n${issues}`,
|
|
72
|
-
);
|
|
64
|
+
throw new ConfigLoadError(`Invalid config (${count} issue${count > 1 ? "s" : ""})\n${issues}`);
|
|
73
65
|
}
|
|
74
66
|
|
|
75
67
|
return result.data;
|
package/src/config/types.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import type
|
|
2
|
-
|
|
1
|
+
import { type z } from "zod";
|
|
2
|
+
|
|
3
|
+
import { type ConfigSchema, type TestCaseSchema } from "./schema.ts";
|
|
3
4
|
|
|
4
5
|
/** A single test case parsed from the config YAML */
|
|
5
6
|
export type TestCase = z.infer<typeof TestCaseSchema>;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type Subprocess } from "bun";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Tracks spawned subprocesses and ensures cleanup on shutdown.
|
|
@@ -25,7 +25,11 @@ export class ProcessManager {
|
|
|
25
25
|
proc.kill("SIGKILL");
|
|
26
26
|
}
|
|
27
27
|
}, 5000);
|
|
28
|
-
try {
|
|
28
|
+
try {
|
|
29
|
+
await proc.exited;
|
|
30
|
+
} finally {
|
|
31
|
+
clearTimeout(timeout);
|
|
32
|
+
}
|
|
29
33
|
}
|
|
30
34
|
});
|
|
31
35
|
await Promise.allSettled(kills);
|
package/src/infra/signals.ts
CHANGED
package/src/output/banner.ts
CHANGED
|
@@ -21,11 +21,7 @@ function rainbowLine(text: string, hueOffset: number): string {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
const TITLE = " Super Ghost ";
|
|
24
|
-
const BANNER_LINES = [
|
|
25
|
-
` 👻${TITLE}👻`,
|
|
26
|
-
` ─────────────────────`,
|
|
27
|
-
` AI-powered E2E testing`,
|
|
28
|
-
];
|
|
24
|
+
const BANNER_LINES = [` 👻${TITLE}👻`, ` ─────────────────────`, ` AI-powered E2E testing`];
|
|
29
25
|
|
|
30
26
|
function renderBanner(hueOffset: number): string[] {
|
|
31
27
|
return [
|
|
@@ -40,31 +36,31 @@ const FRAME_MS = 60;
|
|
|
40
36
|
const HUE_STEP = 24;
|
|
41
37
|
|
|
42
38
|
export async function animateBanner(): Promise<void> {
|
|
43
|
-
const isTTY = process.
|
|
39
|
+
const isTTY = process.stderr.isTTY === true;
|
|
44
40
|
|
|
45
41
|
if (!isTTY) {
|
|
46
42
|
const lines = BANNER_LINES;
|
|
47
|
-
process.
|
|
43
|
+
process.stderr.write(`${lines.join("\n")}\n\n`);
|
|
48
44
|
return;
|
|
49
45
|
}
|
|
50
46
|
|
|
51
|
-
process.
|
|
47
|
+
process.stderr.write("\x1b[?25l"); // hide cursor
|
|
52
48
|
|
|
53
49
|
try {
|
|
54
50
|
for (let frame = 0; frame < FRAMES; frame++) {
|
|
55
51
|
const lines = renderBanner(frame * HUE_STEP);
|
|
56
52
|
if (frame > 0) {
|
|
57
53
|
// Move cursor up N lines to overwrite previous frame
|
|
58
|
-
process.
|
|
54
|
+
process.stderr.write(`\x1b[${lines.length}A`);
|
|
59
55
|
}
|
|
60
|
-
process.
|
|
56
|
+
process.stderr.write(`${lines.join("\n")}\n`);
|
|
61
57
|
|
|
62
58
|
if (frame < FRAMES - 1) {
|
|
63
59
|
await new Promise<void>((resolve) => setTimeout(resolve, FRAME_MS));
|
|
64
60
|
}
|
|
65
61
|
}
|
|
66
|
-
process.
|
|
62
|
+
process.stderr.write("\n");
|
|
67
63
|
} finally {
|
|
68
|
-
process.
|
|
64
|
+
process.stderr.write("\x1b[?25h"); // restore cursor
|
|
69
65
|
}
|
|
70
66
|
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { type RunResult } from "../runner/types.ts";
|
|
2
|
+
|
|
3
|
+
/** Metadata about the test run environment and configuration */
|
|
4
|
+
export interface JsonOutputMetadata {
|
|
5
|
+
model: string;
|
|
6
|
+
provider: string;
|
|
7
|
+
configFile: string;
|
|
8
|
+
baseUrl: string | undefined;
|
|
9
|
+
timestamp: string;
|
|
10
|
+
filter?: {
|
|
11
|
+
pattern: string;
|
|
12
|
+
matched: number;
|
|
13
|
+
total: number;
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Top-level JSON output structure for all output modes */
|
|
18
|
+
export interface JsonOutput {
|
|
19
|
+
version: string;
|
|
20
|
+
success: boolean;
|
|
21
|
+
exitCode: number;
|
|
22
|
+
dryRun?: boolean;
|
|
23
|
+
error?: string;
|
|
24
|
+
metadata: JsonOutputMetadata;
|
|
25
|
+
summary: {
|
|
26
|
+
passed: number;
|
|
27
|
+
failed: number;
|
|
28
|
+
cached: number;
|
|
29
|
+
skipped: number;
|
|
30
|
+
total?: number;
|
|
31
|
+
totalDurationMs?: number;
|
|
32
|
+
};
|
|
33
|
+
tests: Array<{
|
|
34
|
+
testName: string;
|
|
35
|
+
testCase: string;
|
|
36
|
+
status?: string;
|
|
37
|
+
source: string;
|
|
38
|
+
durationMs?: number;
|
|
39
|
+
selfHealed?: boolean;
|
|
40
|
+
error?: string;
|
|
41
|
+
}>;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Format a completed run result as JSON.
|
|
46
|
+
* Only includes selfHealed when true, only includes error when present.
|
|
47
|
+
*/
|
|
48
|
+
export function formatJsonOutput(
|
|
49
|
+
runResult: RunResult,
|
|
50
|
+
metadata: JsonOutputMetadata,
|
|
51
|
+
version: string,
|
|
52
|
+
exitCode: number,
|
|
53
|
+
): string {
|
|
54
|
+
const output: JsonOutput = {
|
|
55
|
+
version,
|
|
56
|
+
success: exitCode === 0,
|
|
57
|
+
exitCode,
|
|
58
|
+
metadata,
|
|
59
|
+
summary: {
|
|
60
|
+
passed: runResult.passed,
|
|
61
|
+
failed: runResult.failed,
|
|
62
|
+
cached: runResult.cached,
|
|
63
|
+
skipped: runResult.skipped,
|
|
64
|
+
totalDurationMs: runResult.totalDurationMs,
|
|
65
|
+
},
|
|
66
|
+
tests: runResult.results.map((r) => {
|
|
67
|
+
const entry: Record<string, unknown> = {
|
|
68
|
+
testName: r.testName,
|
|
69
|
+
testCase: r.testCase,
|
|
70
|
+
status: r.status,
|
|
71
|
+
source: r.source,
|
|
72
|
+
durationMs: r.durationMs,
|
|
73
|
+
};
|
|
74
|
+
if (r.selfHealed === true) {
|
|
75
|
+
entry.selfHealed = true;
|
|
76
|
+
}
|
|
77
|
+
if (r.error !== undefined) {
|
|
78
|
+
entry.error = r.error;
|
|
79
|
+
}
|
|
80
|
+
return entry as JsonOutput["tests"][number];
|
|
81
|
+
}),
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
return JSON.stringify(output, null, 2);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Format a dry-run test listing as JSON.
|
|
89
|
+
* Produces dryRun: true, exitCode: 0, success: true.
|
|
90
|
+
*/
|
|
91
|
+
export function formatJsonDryRun(
|
|
92
|
+
tests: Array<{ name: string; case: string; source: "cache" | "ai" }>,
|
|
93
|
+
metadata: JsonOutputMetadata,
|
|
94
|
+
version: string,
|
|
95
|
+
): string {
|
|
96
|
+
const cachedCount = tests.filter((t) => t.source === "cache").length;
|
|
97
|
+
|
|
98
|
+
const output: JsonOutput = {
|
|
99
|
+
version,
|
|
100
|
+
success: true,
|
|
101
|
+
exitCode: 0,
|
|
102
|
+
dryRun: true,
|
|
103
|
+
metadata,
|
|
104
|
+
summary: {
|
|
105
|
+
passed: 0,
|
|
106
|
+
failed: 0,
|
|
107
|
+
cached: cachedCount,
|
|
108
|
+
skipped: 0,
|
|
109
|
+
total: tests.length,
|
|
110
|
+
},
|
|
111
|
+
tests: tests.map((t) => ({
|
|
112
|
+
testName: t.name,
|
|
113
|
+
testCase: t.case,
|
|
114
|
+
source: t.source,
|
|
115
|
+
})),
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
return JSON.stringify(output, null, 2);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Format an error condition as JSON.
|
|
123
|
+
* Produces success: false, exitCode: 2, with the error message.
|
|
124
|
+
*/
|
|
125
|
+
export function formatJsonError(errorMessage: string, version: string, metadata: Partial<JsonOutputMetadata>): string {
|
|
126
|
+
const fullMetadata: JsonOutputMetadata = {
|
|
127
|
+
model: metadata.model ?? "",
|
|
128
|
+
provider: metadata.provider ?? "",
|
|
129
|
+
configFile: metadata.configFile ?? "",
|
|
130
|
+
baseUrl: metadata.baseUrl,
|
|
131
|
+
timestamp: metadata.timestamp ?? new Date().toISOString(),
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
const output: JsonOutput = {
|
|
135
|
+
version,
|
|
136
|
+
success: false,
|
|
137
|
+
exitCode: 2,
|
|
138
|
+
error: errorMessage,
|
|
139
|
+
metadata: fullMetadata,
|
|
140
|
+
summary: {
|
|
141
|
+
passed: 0,
|
|
142
|
+
failed: 0,
|
|
143
|
+
cached: 0,
|
|
144
|
+
skipped: 0,
|
|
145
|
+
},
|
|
146
|
+
tests: [],
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
return JSON.stringify(output, null, 2);
|
|
150
|
+
}
|
package/src/output/reporter.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import pc from "picocolors";
|
|
2
1
|
import { createSpinner } from "nanospinner";
|
|
3
|
-
import
|
|
4
|
-
|
|
2
|
+
import pc from "picocolors";
|
|
3
|
+
|
|
4
|
+
import { type RunResult, type TestResult } from "../runner/types.ts";
|
|
5
|
+
import { type Reporter, type StepInfo } from "./types.ts";
|
|
5
6
|
|
|
6
7
|
/**
|
|
7
8
|
* Format milliseconds as a human-readable duration string.
|
|
@@ -16,7 +17,7 @@ export function formatDuration(ms: number): string {
|
|
|
16
17
|
|
|
17
18
|
/** Write a line of text to stderr */
|
|
18
19
|
export function writeStderr(text: string): void {
|
|
19
|
-
Bun.write(Bun.stderr, text
|
|
20
|
+
Bun.write(Bun.stderr, `${text}\n`);
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
/**
|
|
@@ -64,7 +65,7 @@ export class ConsoleReporter implements Reporter {
|
|
|
64
65
|
} else if (this.spinner) {
|
|
65
66
|
let spinnerText = `${this.currentTestName} \u2014 ${step.description.full}`;
|
|
66
67
|
if (spinnerText.length > 60) {
|
|
67
|
-
spinnerText = spinnerText.slice(0, 57)
|
|
68
|
+
spinnerText = `${spinnerText.slice(0, 57)}...`;
|
|
68
69
|
}
|
|
69
70
|
this.spinner.update(spinnerText);
|
|
70
71
|
}
|
|
@@ -79,9 +80,7 @@ export class ConsoleReporter implements Reporter {
|
|
|
79
80
|
writeStderr(` ${bar}`);
|
|
80
81
|
writeStderr(` Total: ${data.results.length}`);
|
|
81
82
|
writeStderr(` Passed: ${pc.green(String(data.passed))}`);
|
|
82
|
-
writeStderr(
|
|
83
|
-
` Failed: ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`,
|
|
84
|
-
);
|
|
83
|
+
writeStderr(` Failed: ${data.failed > 0 ? pc.red(String(data.failed)) : String(data.failed)}`);
|
|
85
84
|
if (data.skipped > 0) {
|
|
86
85
|
writeStderr(` Skipped: ${data.skipped}`);
|
|
87
86
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type StepDescription } from "./types.ts";
|
|
2
2
|
|
|
3
3
|
/** Maps raw MCP tool names to human-readable action names */
|
|
4
4
|
const PREFIX_MAP: Record<string, string> = {
|
|
@@ -46,24 +46,15 @@ const KEY_ARG_MAP: Record<string, string> = {
|
|
|
46
46
|
* Unknown tools fall back to: strip underscores, capitalize first letter.
|
|
47
47
|
* Key arguments are extracted based on tool type (e.g., "url" for navigate).
|
|
48
48
|
*/
|
|
49
|
-
export function describeToolCall(
|
|
50
|
-
toolName: string,
|
|
51
|
-
input: Record<string, unknown>,
|
|
52
|
-
): StepDescription {
|
|
49
|
+
export function describeToolCall(toolName: string, input: Record<string, unknown>): StepDescription {
|
|
53
50
|
// Look up human name, or derive from raw name as fallback
|
|
54
|
-
const action =
|
|
55
|
-
PREFIX_MAP[toolName] ??
|
|
56
|
-
toolName
|
|
57
|
-
.replace(/_/g, " ")
|
|
58
|
-
.replace(/^\w/, (c) => c.toUpperCase());
|
|
51
|
+
const action = PREFIX_MAP[toolName] ?? toolName.replace(/_/g, " ").replace(/^\w/, (c) => c.toUpperCase());
|
|
59
52
|
|
|
60
53
|
// Look up which input field is the key argument for this tool
|
|
61
54
|
const keyArgField = KEY_ARG_MAP[toolName];
|
|
62
55
|
const rawKeyArg = keyArgField ? input[keyArgField] : undefined;
|
|
63
56
|
const keyArg =
|
|
64
|
-
rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== ""
|
|
65
|
-
? String(rawKeyArg)
|
|
66
|
-
: undefined;
|
|
57
|
+
rawKeyArg !== undefined && rawKeyArg !== null && String(rawKeyArg) !== "" ? String(rawKeyArg) : undefined;
|
|
67
58
|
|
|
68
59
|
const full = keyArg ? `${action} \u2192 ${keyArg}` : action;
|
|
69
60
|
|
package/src/output/types.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import type
|
|
3
|
-
import type
|
|
4
|
-
import type
|
|
5
|
-
import type
|
|
6
|
-
import type
|
|
1
|
+
import { type AgentExecutionResult } from "../agent/types.ts";
|
|
2
|
+
import { type CacheManager } from "../cache/cache-manager.ts";
|
|
3
|
+
import { type StepReplayer } from "../cache/step-replayer.ts";
|
|
4
|
+
import { type Config } from "../config/types.ts";
|
|
5
|
+
import { type OnStepProgress } from "../output/types.ts";
|
|
6
|
+
import { type TestResult } from "./types.ts";
|
|
7
7
|
|
|
8
8
|
/** Function signature for executing a test via the AI agent */
|
|
9
9
|
type ExecuteAgentFn = (config: {
|
|
@@ -29,10 +29,9 @@ export class TestExecutor {
|
|
|
29
29
|
private readonly executeAgentFn: ExecuteAgentFn;
|
|
30
30
|
private readonly model: any;
|
|
31
31
|
private readonly tools: Record<string, any>;
|
|
32
|
-
private readonly config: Pick<
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
> & { context?: string };
|
|
32
|
+
private readonly config: Pick<Config, "maxAttempts" | "recursionLimit" | "model" | "modelProvider"> & {
|
|
33
|
+
context?: string;
|
|
34
|
+
};
|
|
36
35
|
private readonly globalContext?: string;
|
|
37
36
|
private readonly noCache: boolean;
|
|
38
37
|
private readonly onStepProgress?: OnStepProgress;
|
|
@@ -43,10 +42,7 @@ export class TestExecutor {
|
|
|
43
42
|
executeAgentFn: ExecuteAgentFn;
|
|
44
43
|
model?: any;
|
|
45
44
|
tools?: Record<string, any>;
|
|
46
|
-
config: Pick<
|
|
47
|
-
Config,
|
|
48
|
-
"maxAttempts" | "recursionLimit" | "model" | "modelProvider"
|
|
49
|
-
> & { context?: string };
|
|
45
|
+
config: Pick<Config, "maxAttempts" | "recursionLimit" | "model" | "modelProvider"> & { context?: string };
|
|
50
46
|
globalContext?: string;
|
|
51
47
|
noCache?: boolean;
|
|
52
48
|
onStepProgress?: OnStepProgress;
|
|
@@ -63,11 +59,7 @@ export class TestExecutor {
|
|
|
63
59
|
}
|
|
64
60
|
|
|
65
61
|
/** Execute a single test case with cache-first strategy */
|
|
66
|
-
async execute(
|
|
67
|
-
testCase: string,
|
|
68
|
-
baseUrl: string,
|
|
69
|
-
testContext?: string,
|
|
70
|
-
): Promise<TestResult> {
|
|
62
|
+
async execute(testCase: string, baseUrl: string, testContext?: string): Promise<TestResult> {
|
|
71
63
|
const start = Date.now();
|
|
72
64
|
|
|
73
65
|
// Phase 1: Try cache replay (unless noCache)
|
|
@@ -1,13 +1,9 @@
|
|
|
1
|
-
import type
|
|
2
|
-
import type
|
|
3
|
-
import type
|
|
1
|
+
import { type Config } from "../config/types.ts";
|
|
2
|
+
import { type Reporter } from "../output/types.ts";
|
|
3
|
+
import { type RunResult, type TestResult } from "./types.ts";
|
|
4
4
|
|
|
5
5
|
/** Function signature for executing a single test case */
|
|
6
|
-
export type ExecuteFn = (
|
|
7
|
-
testCase: string,
|
|
8
|
-
baseUrl: string,
|
|
9
|
-
testContext?: string,
|
|
10
|
-
) => Promise<TestResult>;
|
|
6
|
+
export type ExecuteFn = (testCase: string, baseUrl: string, testContext?: string) => Promise<TestResult>;
|
|
11
7
|
|
|
12
8
|
/**
|
|
13
9
|
* Orchestrates sequential execution of all test cases.
|
|
@@ -50,18 +46,13 @@ export class TestRunner {
|
|
|
50
46
|
}
|
|
51
47
|
|
|
52
48
|
/** Aggregate individual test results into a run summary */
|
|
53
|
-
function aggregateResults(
|
|
54
|
-
results: TestResult[],
|
|
55
|
-
totalDurationMs: number,
|
|
56
|
-
): RunResult {
|
|
49
|
+
function aggregateResults(results: TestResult[], totalDurationMs: number): RunResult {
|
|
57
50
|
return {
|
|
58
51
|
results,
|
|
59
52
|
totalDurationMs,
|
|
60
53
|
passed: results.filter((r) => r.status === "passed").length,
|
|
61
54
|
failed: results.filter((r) => r.status === "failed").length,
|
|
62
|
-
cached: results.filter(
|
|
63
|
-
(r) => r.source === "cache" && r.status === "passed",
|
|
64
|
-
).length,
|
|
55
|
+
cached: results.filter((r) => r.source === "cache" && r.status === "passed").length,
|
|
65
56
|
skipped: 0,
|
|
66
57
|
};
|
|
67
58
|
}
|