superghost 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +172 -0
- package/package.json +77 -0
- package/src/agent/agent-runner.ts +69 -0
- package/src/agent/mcp-manager.ts +78 -0
- package/src/agent/model-factory.ts +71 -0
- package/src/agent/prompt.ts +47 -0
- package/src/agent/types.ts +28 -0
- package/src/cache/cache-manager.ts +105 -0
- package/src/cache/step-recorder.ts +50 -0
- package/src/cache/step-replayer.ts +51 -0
- package/src/cache/types.ts +27 -0
- package/src/cli.ts +128 -0
- package/src/config/loader.ts +76 -0
- package/src/config/schema.ts +26 -0
- package/src/config/types.ts +8 -0
- package/src/dist/paths.ts +52 -0
- package/src/dist/setup.ts +66 -0
- package/src/infra/process-manager.ts +34 -0
- package/src/infra/signals.ts +20 -0
- package/src/output/reporter.ts +75 -0
- package/src/output/types.ts +8 -0
- package/src/runner/test-executor.ts +145 -0
- package/src/runner/test-runner.ts +66 -0
- package/src/runner/types.ts +26 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Luis Morales
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# SuperGhost
|
|
2
|
+
|
|
3
|
+
Plain English test cases with AI execution and instant cached replay for CI/CD.
|
|
4
|
+
|
|
5
|
+
Write tests in YAML. An AI agent executes them in a real browser or via API calls. Results are cached step-by-step so re-runs are instant and deterministic -- no flaky tests, no test code to maintain.
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
### Zero-install (recommended)
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
bunx superghost --config tests.yaml
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### Global install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
bun install -g superghost
|
|
21
|
+
superghost --config tests.yaml
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Standalone binary
|
|
25
|
+
|
|
26
|
+
Download the latest binary for your platform from [GitHub Releases](https://github.com/lacion/superghost/releases).
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
chmod +x superghost-darwin-arm64
|
|
30
|
+
./superghost-darwin-arm64 --config tests.yaml
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
On first run, the standalone binary automatically installs MCP server dependencies to `~/.superghost/`.
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
Create a `tests.yaml` file:
|
|
38
|
+
|
|
39
|
+
```yaml
|
|
40
|
+
baseUrl: https://example.com
|
|
41
|
+
model: claude-sonnet-4-20250514
|
|
42
|
+
|
|
43
|
+
tests:
|
|
44
|
+
- name: Homepage loads
|
|
45
|
+
case: Navigate to the homepage and verify the page title contains "Example"
|
|
46
|
+
|
|
47
|
+
- name: API health check
|
|
48
|
+
case: Send a GET request to /api/health and verify the response status is 200
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Run it:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
bunx superghost --config tests.yaml
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## CLI
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
Usage: superghost [options]
|
|
61
|
+
|
|
62
|
+
Options:
|
|
63
|
+
-c, --config <path> Path to YAML config file (required)
|
|
64
|
+
-V, --version Output the version number
|
|
65
|
+
-h, --help Display help
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Provider Setup
|
|
69
|
+
|
|
70
|
+
SuperGhost supports four AI providers. Set the appropriate environment variable for your chosen provider.
|
|
71
|
+
|
|
72
|
+
### Anthropic (default)
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
model: claude-sonnet-4-20250514
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### OpenAI
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
export OPENAI_API_KEY=sk-...
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```yaml
|
|
89
|
+
model: gpt-4o
|
|
90
|
+
modelProvider: openai
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Google Gemini
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
export GOOGLE_GENERATIVE_AI_API_KEY=...
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
```yaml
|
|
100
|
+
model: gemini-2.5-flash
|
|
101
|
+
modelProvider: gemini
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### OpenRouter
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
```yaml
|
|
111
|
+
model: anthropic/claude-sonnet-4-20250514
|
|
112
|
+
modelProvider: openrouter
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Configuration
|
|
116
|
+
|
|
117
|
+
All fields in `tests.yaml`:
|
|
118
|
+
|
|
119
|
+
| Field | Type | Default | Description |
|
|
120
|
+
|-------|------|---------|-------------|
|
|
121
|
+
| `baseUrl` | `string` | (required) | Base URL for all tests |
|
|
122
|
+
| `model` | `string` | (required) | AI model identifier |
|
|
123
|
+
| `modelProvider` | `string` | `"anthropic"` | Provider: `anthropic`, `openai`, `gemini`, `openrouter` |
|
|
124
|
+
| `browser` | `string` | `"chromium"` | Browser engine: `chromium`, `firefox`, `webkit` |
|
|
125
|
+
| `headless` | `boolean` | `false` | Run browser in headless mode |
|
|
126
|
+
| `cacheDir` | `string` | `".superghost-cache"` | Directory for cached test steps |
|
|
127
|
+
| `context` | `string` | `undefined` | Global context passed to every test |
|
|
128
|
+
| `tests` | `array` | (required) | Array of test definitions |
|
|
129
|
+
| `tests[].name` | `string` | `undefined` | Display name for the test |
|
|
130
|
+
| `tests[].case` | `string` | (required) | Plain English test instruction |
|
|
131
|
+
| `tests[].context` | `string` | `undefined` | Per-test context for the AI agent |
|
|
132
|
+
|
|
133
|
+
## How It Works
|
|
134
|
+
|
|
135
|
+
1. **First run:** The AI agent reads your plain English test case and executes it step-by-step in a real browser (via Playwright MCP) or via API calls (via curl MCP). Each step is recorded to a cache file.
|
|
136
|
+
|
|
137
|
+
2. **Subsequent runs:** Cached steps are replayed directly against the browser/API without calling the AI. This makes re-runs instant and deterministic.
|
|
138
|
+
|
|
139
|
+
3. **Self-healing:** If a cached step fails during replay (e.g., a selector changed), SuperGhost automatically falls back to the AI agent to re-execute that test. The new steps replace the stale cache.
|
|
140
|
+
|
|
141
|
+
## Example App (E2E)
|
|
142
|
+
|
|
143
|
+
The `e2e/` directory contains a fullstack Task Manager app that validates SuperGhost end-to-end and serves as a reference for writing test configs.
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
# Start the example app
|
|
147
|
+
bun run e2e:app
|
|
148
|
+
# Open http://localhost:3777
|
|
149
|
+
|
|
150
|
+
# Run smoke tests (2 tests — requires an AI API key)
|
|
151
|
+
bun run e2e:smoke
|
|
152
|
+
|
|
153
|
+
# Run browser UI tests (7 tests)
|
|
154
|
+
bun run e2e:browser
|
|
155
|
+
|
|
156
|
+
# Run API endpoint tests (7 tests)
|
|
157
|
+
bun run e2e:api
|
|
158
|
+
|
|
159
|
+
# Run all 16 tests
|
|
160
|
+
bun run e2e:all
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
The test runner exits gracefully when no API key is configured, making it safe for CI environments. See [`e2e/README.md`](e2e/README.md) for details.
|
|
164
|
+
|
|
165
|
+
## Standalone Binary
|
|
166
|
+
|
|
167
|
+
When running as a standalone compiled binary (downloaded from GitHub Releases), SuperGhost cannot use `bunx` to spawn MCP server packages. Instead:
|
|
168
|
+
|
|
169
|
+
- On first run, MCP dependencies (`@playwright/mcp`, `@calibress/curl-mcp`) are automatically installed to `~/.superghost/`
|
|
170
|
+
- Subsequent runs skip the install step
|
|
171
|
+
- You must have a Playwright-compatible browser installed on your system (Chromium, Firefox, or WebKit)
|
|
172
|
+
- SuperGhost does **not** auto-install browser binaries -- if Playwright cannot find a browser, it will display its own error message with install instructions
|
package/package.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "superghost",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"superghost": "src/cli.ts"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"src/",
|
|
11
|
+
"README.md",
|
|
12
|
+
"LICENSE"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"test": "bun test",
|
|
16
|
+
"typecheck": "bunx tsc --noEmit",
|
|
17
|
+
"build:binary": "bun run scripts/build-binaries.ts",
|
|
18
|
+
"prepublishOnly": "bun test && bunx tsc --noEmit",
|
|
19
|
+
"e2e": "bun run e2e/run-e2e.ts",
|
|
20
|
+
"e2e:smoke": "bun run e2e/run-e2e.ts smoke",
|
|
21
|
+
"e2e:browser": "bun run e2e/run-e2e.ts browser",
|
|
22
|
+
"e2e:api": "bun run e2e/run-e2e.ts api",
|
|
23
|
+
"e2e:all": "bun run e2e/run-e2e.ts all",
|
|
24
|
+
"e2e:headed": "bun run e2e/run-e2e.ts smoke --headed",
|
|
25
|
+
"e2e:smoke:headed": "bun run e2e/run-e2e.ts smoke --headed",
|
|
26
|
+
"e2e:browser:headed": "bun run e2e/run-e2e.ts browser --headed",
|
|
27
|
+
"e2e:api:headed": "bun run e2e/run-e2e.ts api --headed",
|
|
28
|
+
"e2e:all:headed": "bun run e2e/run-e2e.ts all --headed",
|
|
29
|
+
"e2e:app": "bun run e2e/app/server.ts"
|
|
30
|
+
},
|
|
31
|
+
"keywords": [
|
|
32
|
+
"testing",
|
|
33
|
+
"ai",
|
|
34
|
+
"browser",
|
|
35
|
+
"e2e",
|
|
36
|
+
"playwright",
|
|
37
|
+
"mcp",
|
|
38
|
+
"cli"
|
|
39
|
+
],
|
|
40
|
+
"license": "MIT",
|
|
41
|
+
"author": "Luis Morales (https://github.com/lacion)",
|
|
42
|
+
"engines": {
|
|
43
|
+
"bun": ">=1.2.0"
|
|
44
|
+
},
|
|
45
|
+
"publishConfig": {
|
|
46
|
+
"access": "public"
|
|
47
|
+
},
|
|
48
|
+
"repository": {
|
|
49
|
+
"type": "git",
|
|
50
|
+
"url": "git+https://github.com/lacion/superghost.git"
|
|
51
|
+
},
|
|
52
|
+
"homepage": "https://github.com/lacion/superghost#readme",
|
|
53
|
+
"bugs": {
|
|
54
|
+
"url": "https://github.com/lacion/superghost/issues"
|
|
55
|
+
},
|
|
56
|
+
"dependencies": {
|
|
57
|
+
"@ai-sdk/anthropic": "^3.0.58",
|
|
58
|
+
"@ai-sdk/google": "^3.0.37",
|
|
59
|
+
"@ai-sdk/mcp": "^1.0.25",
|
|
60
|
+
"@ai-sdk/openai": "^3.0.41",
|
|
61
|
+
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
62
|
+
"@openrouter/ai-sdk-provider": "^2.2.5",
|
|
63
|
+
"ai": "^6.0.116",
|
|
64
|
+
"commander": "^14.0.3",
|
|
65
|
+
"nanospinner": "^1.2.2",
|
|
66
|
+
"picocolors": "^1.1.1",
|
|
67
|
+
"zod": "^4.3.6"
|
|
68
|
+
},
|
|
69
|
+
"devDependencies": {
|
|
70
|
+
"@types/bun": "^1.3.10",
|
|
71
|
+
"@types/react": "^19.0.0",
|
|
72
|
+
"@types/react-dom": "^19.0.0",
|
|
73
|
+
"react": "^19.0.0",
|
|
74
|
+
"react-dom": "^19.0.0",
|
|
75
|
+
"typescript": "^5.9.3"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { generateText, Output, stepCountIs } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { StepRecorder } from "../cache/step-recorder.ts";
|
|
4
|
+
import type { AgentExecutionResult } from "./types.ts";
|
|
5
|
+
import { buildSystemPrompt } from "./prompt.ts";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Schema for structured agent output.
|
|
9
|
+
* The agent must produce a { passed, message } JSON object.
|
|
10
|
+
*/
|
|
11
|
+
const TestResultSchema = z.object({
|
|
12
|
+
passed: z.boolean().describe("Whether the test case passed"),
|
|
13
|
+
message: z
|
|
14
|
+
.string()
|
|
15
|
+
.describe("Brief diagnostic: what happened and what the page showed"),
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Execute a single test case using the AI agent with MCP tools.
|
|
20
|
+
*
|
|
21
|
+
* Uses Vercel AI SDK's generateText with:
|
|
22
|
+
* - Output.object() for structured { passed, message } responses
|
|
23
|
+
* - stopWhen: stepCountIs(recursionLimit) for loop control
|
|
24
|
+
* - StepRecorder tool wrapping for cache step capture
|
|
25
|
+
*
|
|
26
|
+
* @returns AgentExecutionResult with pass/fail status, diagnostic message, and recorded steps
|
|
27
|
+
*/
|
|
28
|
+
export async function executeAgent(config: {
|
|
29
|
+
model: any;
|
|
30
|
+
tools: Record<string, any>;
|
|
31
|
+
testCase: string;
|
|
32
|
+
baseUrl: string;
|
|
33
|
+
recursionLimit: number;
|
|
34
|
+
globalContext?: string;
|
|
35
|
+
testContext?: string;
|
|
36
|
+
}): Promise<AgentExecutionResult> {
|
|
37
|
+
const recorder = new StepRecorder();
|
|
38
|
+
const wrappedTools = recorder.wrapTools(config.tools);
|
|
39
|
+
|
|
40
|
+
const systemPrompt = buildSystemPrompt(
|
|
41
|
+
config.testCase,
|
|
42
|
+
config.baseUrl,
|
|
43
|
+
config.globalContext,
|
|
44
|
+
config.testContext,
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
const { output } = await generateText({
|
|
48
|
+
model: config.model,
|
|
49
|
+
tools: wrappedTools,
|
|
50
|
+
system: systemPrompt,
|
|
51
|
+
prompt: `Execute the test case: "${config.testCase}"`,
|
|
52
|
+
stopWhen: stepCountIs(config.recursionLimit),
|
|
53
|
+
output: Output.object({ schema: TestResultSchema }),
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
if (output === null) {
|
|
57
|
+
return {
|
|
58
|
+
passed: false,
|
|
59
|
+
message: `Agent did not produce a structured result — it may have exceeded the ${config.recursionLimit} step limit`,
|
|
60
|
+
steps: recorder.getSteps(),
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
passed: output.passed,
|
|
66
|
+
message: output.message,
|
|
67
|
+
steps: recorder.getSteps(),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { createMCPClient } from "@ai-sdk/mcp";
|
|
2
|
+
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
|
|
3
|
+
import type { Config } from "../config/types.ts";
|
|
4
|
+
import { getMcpCommand } from "../dist/paths.ts";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Manages the lifecycle of Playwright and curl MCP servers.
|
|
8
|
+
*
|
|
9
|
+
* MCP servers are shared across the test suite (not restarted per test).
|
|
10
|
+
* Fresh browser context per test comes from the `--isolated` flag on
|
|
11
|
+
* Playwright MCP. Both tool sets are merged and provided to the agent
|
|
12
|
+
* regardless of test type.
|
|
13
|
+
*/
|
|
14
|
+
export class McpManager {
|
|
15
|
+
private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null =
|
|
16
|
+
null;
|
|
17
|
+
private curlClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
|
|
18
|
+
|
|
19
|
+
constructor(private readonly config: Pick<Config, "browser" | "headless">) {}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Spawn Playwright MCP and curl MCP servers via stdio transport.
|
|
23
|
+
* Must be called before getTools().
|
|
24
|
+
*/
|
|
25
|
+
async initialize(): Promise<void> {
|
|
26
|
+
// Resolve MCP spawn commands (bunx in npm mode, path-based in standalone)
|
|
27
|
+
const playwrightCmd = getMcpCommand("@playwright/mcp");
|
|
28
|
+
const curlCmd = getMcpCommand("@calibress/curl-mcp");
|
|
29
|
+
|
|
30
|
+
const playwrightArgs = [
|
|
31
|
+
...playwrightCmd.args,
|
|
32
|
+
"--isolated",
|
|
33
|
+
`--browser=${this.config.browser}`,
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
if (this.config.headless) {
|
|
37
|
+
playwrightArgs.splice(playwrightCmd.args.length, 0, "--headless");
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
this.playwrightClient = await createMCPClient({
|
|
41
|
+
transport: new StdioClientTransport({
|
|
42
|
+
command: playwrightCmd.command,
|
|
43
|
+
args: playwrightArgs,
|
|
44
|
+
}),
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
this.curlClient = await createMCPClient({
|
|
48
|
+
transport: new StdioClientTransport({
|
|
49
|
+
command: curlCmd.command,
|
|
50
|
+
args: [...curlCmd.args],
|
|
51
|
+
}),
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Get merged tool set from both Playwright and curl MCP servers.
|
|
57
|
+
* Provides ALL tools to the agent regardless of test type.
|
|
58
|
+
*/
|
|
59
|
+
async getTools(): Promise<Record<string, any>> {
|
|
60
|
+
const playwrightTools = await this.playwrightClient!.tools();
|
|
61
|
+
const curlTools = await this.curlClient!.tools();
|
|
62
|
+
return { ...playwrightTools, ...curlTools };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Close both MCP server connections.
|
|
67
|
+
* Uses Promise.allSettled to ensure both servers are cleaned up
|
|
68
|
+
* even if one fails to close.
|
|
69
|
+
*/
|
|
70
|
+
async close(): Promise<void> {
|
|
71
|
+
await Promise.allSettled([
|
|
72
|
+
this.playwrightClient?.close(),
|
|
73
|
+
this.curlClient?.close(),
|
|
74
|
+
]);
|
|
75
|
+
this.playwrightClient = null;
|
|
76
|
+
this.curlClient = null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { anthropic } from "@ai-sdk/anthropic";
|
|
2
|
+
import { openai } from "@ai-sdk/openai";
|
|
3
|
+
import { google } from "@ai-sdk/google";
|
|
4
|
+
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
|
5
|
+
|
|
6
|
+
/** Supported LLM provider names */
|
|
7
|
+
export type ProviderName = "anthropic" | "openai" | "google" | "openrouter";
|
|
8
|
+
|
|
9
|
+
/** Environment variable names for each provider's API key */
|
|
10
|
+
export const ENV_VARS: Record<ProviderName, string> = {
|
|
11
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
12
|
+
openai: "OPENAI_API_KEY",
|
|
13
|
+
google: "GOOGLE_GENERATIVE_AI_API_KEY",
|
|
14
|
+
openrouter: "OPENROUTER_API_KEY",
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
// Auto-inference rules: model name prefix -> provider
|
|
18
|
+
const MODEL_PREFIX_MAP: Array<[RegExp, ProviderName]> = [
|
|
19
|
+
[/^claude-/, "anthropic"],
|
|
20
|
+
[/^gpt-/, "openai"],
|
|
21
|
+
[/^o\d/, "openai"],
|
|
22
|
+
[/^gemini-/, "google"],
|
|
23
|
+
[/\//, "openrouter"],
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Infer the LLM provider from a model name string.
|
|
28
|
+
* Falls back to "anthropic" if no pattern matches.
|
|
29
|
+
*/
|
|
30
|
+
export function inferProvider(modelName: string): ProviderName {
|
|
31
|
+
for (const [pattern, provider] of MODEL_PREFIX_MAP) {
|
|
32
|
+
if (pattern.test(modelName)) return provider;
|
|
33
|
+
}
|
|
34
|
+
return "anthropic";
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Validate that the API key environment variable is set for the given provider.
|
|
39
|
+
* Throws a descriptive error if the key is missing.
|
|
40
|
+
*/
|
|
41
|
+
export function validateApiKey(provider: ProviderName): void {
|
|
42
|
+
const envVar = ENV_VARS[provider];
|
|
43
|
+
if (!Bun.env[envVar]) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
`Missing API key for ${provider}.\n` +
|
|
46
|
+
` Set the ${envVar} environment variable:\n` +
|
|
47
|
+
` export ${envVar}=your-key-here\n` +
|
|
48
|
+
` Or add it to your .env file.`,
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Create an AI SDK model instance for the given model name and provider.
|
|
55
|
+
*/
|
|
56
|
+
export function createModel(modelName: string, providerName: ProviderName) {
|
|
57
|
+
switch (providerName) {
|
|
58
|
+
case "anthropic":
|
|
59
|
+
return anthropic(modelName);
|
|
60
|
+
case "openai":
|
|
61
|
+
return openai(modelName);
|
|
62
|
+
case "google":
|
|
63
|
+
return google(modelName);
|
|
64
|
+
case "openrouter": {
|
|
65
|
+
const openrouter = createOpenRouter({
|
|
66
|
+
apiKey: Bun.env.OPENROUTER_API_KEY!,
|
|
67
|
+
});
|
|
68
|
+
return openrouter.chat(modelName);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build the system prompt for the QA automation agent.
|
|
3
|
+
*
|
|
4
|
+
* Includes test case, base URL, tool usage instructions, and optional
|
|
5
|
+
* global/per-test context fields.
|
|
6
|
+
*/
|
|
7
|
+
export function buildSystemPrompt(
|
|
8
|
+
testCase: string,
|
|
9
|
+
baseUrl: string,
|
|
10
|
+
globalContext?: string,
|
|
11
|
+
testContext?: string,
|
|
12
|
+
): string {
|
|
13
|
+
const lines: string[] = [
|
|
14
|
+
"You are a QA automation agent. Execute the following test case and determine if it passes or fails.",
|
|
15
|
+
"",
|
|
16
|
+
`Test case: "${testCase}"`,
|
|
17
|
+
`Base URL: "${baseUrl}"`,
|
|
18
|
+
"",
|
|
19
|
+
"You have access to both browser automation tools and HTTP/curl tools.",
|
|
20
|
+
"Choose the appropriate tools based on the test case.",
|
|
21
|
+
"",
|
|
22
|
+
"For browser/UI tests:",
|
|
23
|
+
"- Navigate to the base URL first",
|
|
24
|
+
"- Use browser_snapshot to understand page state before acting",
|
|
25
|
+
"- Use browser_click, browser_type for interactions",
|
|
26
|
+
"",
|
|
27
|
+
"For API tests:",
|
|
28
|
+
"- Use the curl_request tool to make HTTP requests",
|
|
29
|
+
"- Check status codes, headers, and response body",
|
|
30
|
+
"",
|
|
31
|
+
"Instructions:",
|
|
32
|
+
"1. Analyze the test case and decide which tools to use.",
|
|
33
|
+
"2. Execute the actions needed to verify the test case.",
|
|
34
|
+
"3. Be methodical. If something doesn't work, try alternative approaches before declaring failure.",
|
|
35
|
+
"4. When finished, provide your verdict as structured output with passed (boolean) and message (brief diagnostic).",
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
if (globalContext) {
|
|
39
|
+
lines.push("", "Additional context from the user:", globalContext);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (testContext) {
|
|
43
|
+
lines.push("", "Test-specific context:", testContext);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return lines.join("\n");
|
|
47
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { CachedStep } from "../cache/types.ts";
|
|
2
|
+
import type { ProviderName } from "./model-factory.ts";
|
|
3
|
+
|
|
4
|
+
/** Result of a single AI agent execution */
|
|
5
|
+
export interface AgentExecutionResult {
|
|
6
|
+
/** Whether the test case passed */
|
|
7
|
+
passed: boolean;
|
|
8
|
+
/** Diagnostic message describing the outcome */
|
|
9
|
+
message: string;
|
|
10
|
+
/** Recorded tool call steps for caching */
|
|
11
|
+
steps: CachedStep[];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/** Configuration for a single agent run */
|
|
15
|
+
export interface AgentConfig {
|
|
16
|
+
/** Model identifier (e.g., "claude-sonnet-4-6", "gpt-4o") */
|
|
17
|
+
model: string;
|
|
18
|
+
/** LLM provider */
|
|
19
|
+
provider: ProviderName;
|
|
20
|
+
/** Maximum number of agent steps */
|
|
21
|
+
recursionLimit: number;
|
|
22
|
+
/** Plain English test case description */
|
|
23
|
+
testCase: string;
|
|
24
|
+
/** Base URL for the application under test */
|
|
25
|
+
baseUrl: string;
|
|
26
|
+
/** Optional per-test context appended to system prompt */
|
|
27
|
+
context?: string;
|
|
28
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import { mkdir, rename } from "node:fs/promises";
|
|
3
|
+
import type { CacheEntry, CachedStep } from "./types.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Manages file-based cache entries for test step recordings.
|
|
7
|
+
* Each entry is a JSON file keyed by a deterministic SHA-256 hash of (testCase + baseUrl).
|
|
8
|
+
* Uses atomic write-then-rename to prevent corrupted cache files.
|
|
9
|
+
*/
|
|
10
|
+
export class CacheManager {
|
|
11
|
+
private readonly cacheDir: string;
|
|
12
|
+
|
|
13
|
+
constructor(cacheDir: string) {
|
|
14
|
+
this.cacheDir = cacheDir;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Generate a deterministic 16-char hex hash key.
|
|
19
|
+
* Uses Bun-native CryptoHasher for SHA-256 hashing.
|
|
20
|
+
*/
|
|
21
|
+
static hashKey(testCase: string, baseUrl: string): string {
|
|
22
|
+
const input = `${testCase}|${baseUrl}`;
|
|
23
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
24
|
+
hasher.update(input);
|
|
25
|
+
return hasher.digest("hex").slice(0, 16);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Save a cache entry for the given test case.
|
|
30
|
+
* Creates the cache directory if it does not exist.
|
|
31
|
+
* Uses atomic write (tmp file + rename) to prevent corruption.
|
|
32
|
+
* Preserves createdAt from existing entry when updating.
|
|
33
|
+
*/
|
|
34
|
+
async save(
|
|
35
|
+
testCase: string,
|
|
36
|
+
baseUrl: string,
|
|
37
|
+
steps: CachedStep[],
|
|
38
|
+
diagnostics: {
|
|
39
|
+
model: string;
|
|
40
|
+
provider: string;
|
|
41
|
+
stepCount: number;
|
|
42
|
+
aiMessage: string;
|
|
43
|
+
durationMs: number;
|
|
44
|
+
},
|
|
45
|
+
): Promise<void> {
|
|
46
|
+
await mkdir(this.cacheDir, { recursive: true });
|
|
47
|
+
|
|
48
|
+
const hash = CacheManager.hashKey(testCase, baseUrl);
|
|
49
|
+
const now = new Date().toISOString();
|
|
50
|
+
|
|
51
|
+
// Load existing entry to preserve createdAt
|
|
52
|
+
const existing = await this.load(testCase, baseUrl);
|
|
53
|
+
|
|
54
|
+
const entry: CacheEntry = {
|
|
55
|
+
version: 1,
|
|
56
|
+
testCase,
|
|
57
|
+
baseUrl,
|
|
58
|
+
steps,
|
|
59
|
+
model: diagnostics.model,
|
|
60
|
+
provider: diagnostics.provider,
|
|
61
|
+
stepCount: diagnostics.stepCount,
|
|
62
|
+
aiMessage: diagnostics.aiMessage,
|
|
63
|
+
durationMs: diagnostics.durationMs,
|
|
64
|
+
createdAt: existing?.createdAt ?? now,
|
|
65
|
+
updatedAt: now,
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const filePath = join(this.cacheDir, `${hash}.json`);
|
|
69
|
+
const tmpPath = `${filePath}.tmp`;
|
|
70
|
+
|
|
71
|
+
// Atomic write: write to tmp file, then rename
|
|
72
|
+
await Bun.write(tmpPath, JSON.stringify(entry, null, 2));
|
|
73
|
+
await rename(tmpPath, filePath);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Load a cache entry for the given test case.
|
|
78
|
+
* Returns null if the file does not exist or contains invalid JSON.
|
|
79
|
+
*/
|
|
80
|
+
async load(testCase: string, baseUrl: string): Promise<CacheEntry | null> {
|
|
81
|
+
const hash = CacheManager.hashKey(testCase, baseUrl);
|
|
82
|
+
const filePath = join(this.cacheDir, `${hash}.json`);
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
return await Bun.file(filePath).json() as CacheEntry;
|
|
86
|
+
} catch {
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Delete a cache entry for the given test case.
|
|
93
|
+
* No-op if the file does not exist.
|
|
94
|
+
*/
|
|
95
|
+
async delete(testCase: string, baseUrl: string): Promise<void> {
|
|
96
|
+
const hash = CacheManager.hashKey(testCase, baseUrl);
|
|
97
|
+
const filePath = join(this.cacheDir, `${hash}.json`);
|
|
98
|
+
|
|
99
|
+
try {
|
|
100
|
+
await Bun.file(filePath).delete();
|
|
101
|
+
} catch {
|
|
102
|
+
// No-op if file doesn't exist
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|