superghost 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Luis Morales
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,172 @@
1
+ # SuperGhost
2
+
3
+ Plain English test cases with AI execution and instant cached replay for CI/CD.
4
+
5
+ Write tests in YAML. An AI agent executes them in a real browser or via API calls. Results are cached step-by-step so re-runs are instant and deterministic -- no flaky tests, no test code to maintain.
6
+
7
+ ![SuperGhost Demo](demo/demo.gif)
8
+
9
+ ## Install
10
+
11
+ ### Zero-install (recommended)
12
+
13
+ ```bash
14
+ bunx superghost --config tests.yaml
15
+ ```
16
+
17
+ ### Global install
18
+
19
+ ```bash
20
+ bun install -g superghost
21
+ superghost --config tests.yaml
22
+ ```
23
+
24
+ ### Standalone binary
25
+
26
+ Download the latest binary for your platform from [GitHub Releases](https://github.com/lacion/superghost/releases).
27
+
28
+ ```bash
29
+ chmod +x superghost-darwin-arm64
30
+ ./superghost-darwin-arm64 --config tests.yaml
31
+ ```
32
+
33
+ On first run, the standalone binary automatically installs MCP server dependencies to `~/.superghost/`.
34
+
35
+ ## Quick Start
36
+
37
+ Create a `tests.yaml` file:
38
+
39
+ ```yaml
40
+ baseUrl: https://example.com
41
+ model: claude-sonnet-4-20250514
42
+
43
+ tests:
44
+ - name: Homepage loads
45
+ case: Navigate to the homepage and verify the page title contains "Example"
46
+
47
+ - name: API health check
48
+ case: Send a GET request to /api/health and verify the response status is 200
49
+ ```
50
+
51
+ Run it:
52
+
53
+ ```bash
54
+ bunx superghost --config tests.yaml
55
+ ```
56
+
57
+ ## CLI
58
+
59
+ ```
60
+ Usage: superghost [options]
61
+
62
+ Options:
63
+ -c, --config <path> Path to YAML config file (required)
64
+ -V, --version Output the version number
65
+ -h, --help Display help
66
+ ```
67
+
68
+ ## Provider Setup
69
+
70
+ SuperGhost supports four AI providers. Set the appropriate environment variable for your chosen provider.
71
+
72
+ ### Anthropic (default)
73
+
74
+ ```bash
75
+ export ANTHROPIC_API_KEY=sk-ant-...
76
+ ```
77
+
78
+ ```yaml
79
+ model: claude-sonnet-4-20250514
80
+ ```
81
+
82
+ ### OpenAI
83
+
84
+ ```bash
85
+ export OPENAI_API_KEY=sk-...
86
+ ```
87
+
88
+ ```yaml
89
+ model: gpt-4o
90
+ modelProvider: openai
91
+ ```
92
+
93
+ ### Google Gemini
94
+
95
+ ```bash
96
+ export GOOGLE_GENERATIVE_AI_API_KEY=...
97
+ ```
98
+
99
+ ```yaml
100
+ model: gemini-2.5-flash
101
+ modelProvider: gemini
102
+ ```
103
+
104
+ ### OpenRouter
105
+
106
+ ```bash
107
+ export OPENROUTER_API_KEY=sk-or-...
108
+ ```
109
+
110
+ ```yaml
111
+ model: anthropic/claude-sonnet-4-20250514
112
+ modelProvider: openrouter
113
+ ```
114
+
115
+ ## Configuration
116
+
117
+ All fields in `tests.yaml`:
118
+
119
+ | Field | Type | Default | Description |
120
+ |-------|------|---------|-------------|
121
+ | `baseUrl` | `string` | (required) | Base URL for all tests |
122
+ | `model` | `string` | (required) | AI model identifier |
123
+ | `modelProvider` | `string` | `"anthropic"` | Provider: `anthropic`, `openai`, `gemini`, `openrouter` |
124
+ | `browser` | `string` | `"chromium"` | Browser engine: `chromium`, `firefox`, `webkit` |
125
+ | `headless` | `boolean` | `false` | Run browser in headless mode |
126
+ | `cacheDir` | `string` | `".superghost-cache"` | Directory for cached test steps |
127
+ | `context` | `string` | `undefined` | Global context passed to every test |
128
+ | `tests` | `array` | (required) | Array of test definitions |
129
+ | `tests[].name` | `string` | `undefined` | Display name for the test |
130
+ | `tests[].case` | `string` | (required) | Plain English test instruction |
131
+ | `tests[].context` | `string` | `undefined` | Per-test context for the AI agent |
132
+
133
+ ## How It Works
134
+
135
+ 1. **First run:** The AI agent reads your plain English test case and executes it step-by-step in a real browser (via Playwright MCP) or via API calls (via curl MCP). Each step is recorded to a cache file.
136
+
137
+ 2. **Subsequent runs:** Cached steps are replayed directly against the browser/API without calling the AI. This makes re-runs instant and deterministic.
138
+
139
+ 3. **Self-healing:** If a cached step fails during replay (e.g., a selector changed), SuperGhost automatically falls back to the AI agent to re-execute that test. The new steps replace the stale cache.
140
+
141
+ ## Example App (E2E)
142
+
143
+ The `e2e/` directory contains a fullstack Task Manager app that validates SuperGhost end-to-end and serves as a reference for writing test configs.
144
+
145
+ ```bash
146
+ # Start the example app
147
+ bun run e2e:app
148
+ # Open http://localhost:3777
149
+
150
+ # Run smoke tests (2 tests — requires an AI API key)
151
+ bun run e2e:smoke
152
+
153
+ # Run browser UI tests (7 tests)
154
+ bun run e2e:browser
155
+
156
+ # Run API endpoint tests (7 tests)
157
+ bun run e2e:api
158
+
159
+ # Run all 16 tests
160
+ bun run e2e:all
161
+ ```
162
+
163
+ The test runner exits gracefully when no API key is configured, making it safe for CI environments. See [`e2e/README.md`](e2e/README.md) for details.
164
+
165
+ ## Standalone Binary
166
+
167
+ When running as a standalone compiled binary (downloaded from GitHub Releases), SuperGhost cannot use `bunx` to spawn MCP server packages. Instead:
168
+
169
+ - On first run, MCP dependencies (`@playwright/mcp`, `@calibress/curl-mcp`) are automatically installed to `~/.superghost/`
170
+ - Subsequent runs skip the install step
171
+ - You must have a Playwright-compatible browser installed on your system (Chromium, Firefox, or WebKit)
172
+ - SuperGhost does **not** auto-install browser binaries -- if Playwright cannot find a browser, it will display its own error message with install instructions
package/package.json ADDED
@@ -0,0 +1,77 @@
1
+ {
2
+ "name": "superghost",
3
+ "version": "0.1.0",
4
+ "description": "Plain English test cases with AI execution and instant cached replay for CI/CD",
5
+ "type": "module",
6
+ "bin": {
7
+ "superghost": "src/cli.ts"
8
+ },
9
+ "files": [
10
+ "src/",
11
+ "README.md",
12
+ "LICENSE"
13
+ ],
14
+ "scripts": {
15
+ "test": "bun test",
16
+ "typecheck": "bunx tsc --noEmit",
17
+ "build:binary": "bun run scripts/build-binaries.ts",
18
+ "prepublishOnly": "bun test && bunx tsc --noEmit",
19
+ "e2e": "bun run e2e/run-e2e.ts",
20
+ "e2e:smoke": "bun run e2e/run-e2e.ts smoke",
21
+ "e2e:browser": "bun run e2e/run-e2e.ts browser",
22
+ "e2e:api": "bun run e2e/run-e2e.ts api",
23
+ "e2e:all": "bun run e2e/run-e2e.ts all",
24
+ "e2e:headed": "bun run e2e/run-e2e.ts smoke --headed",
25
+ "e2e:smoke:headed": "bun run e2e/run-e2e.ts smoke --headed",
26
+ "e2e:browser:headed": "bun run e2e/run-e2e.ts browser --headed",
27
+ "e2e:api:headed": "bun run e2e/run-e2e.ts api --headed",
28
+ "e2e:all:headed": "bun run e2e/run-e2e.ts all --headed",
29
+ "e2e:app": "bun run e2e/app/server.ts"
30
+ },
31
+ "keywords": [
32
+ "testing",
33
+ "ai",
34
+ "browser",
35
+ "e2e",
36
+ "playwright",
37
+ "mcp",
38
+ "cli"
39
+ ],
40
+ "license": "MIT",
41
+ "author": "Luis Morales (https://github.com/lacion)",
42
+ "engines": {
43
+ "bun": ">=1.2.0"
44
+ },
45
+ "publishConfig": {
46
+ "access": "public"
47
+ },
48
+ "repository": {
49
+ "type": "git",
50
+ "url": "git+https://github.com/lacion/superghost.git"
51
+ },
52
+ "homepage": "https://github.com/lacion/superghost#readme",
53
+ "bugs": {
54
+ "url": "https://github.com/lacion/superghost/issues"
55
+ },
56
+ "dependencies": {
57
+ "@ai-sdk/anthropic": "^3.0.58",
58
+ "@ai-sdk/google": "^3.0.37",
59
+ "@ai-sdk/mcp": "^1.0.25",
60
+ "@ai-sdk/openai": "^3.0.41",
61
+ "@modelcontextprotocol/sdk": "^1.27.1",
62
+ "@openrouter/ai-sdk-provider": "^2.2.5",
63
+ "ai": "^6.0.116",
64
+ "commander": "^14.0.3",
65
+ "nanospinner": "^1.2.2",
66
+ "picocolors": "^1.1.1",
67
+ "zod": "^4.3.6"
68
+ },
69
+ "devDependencies": {
70
+ "@types/bun": "^1.3.10",
71
+ "@types/react": "^19.0.0",
72
+ "@types/react-dom": "^19.0.0",
73
+ "react": "^19.0.0",
74
+ "react-dom": "^19.0.0",
75
+ "typescript": "^5.9.3"
76
+ }
77
+ }
@@ -0,0 +1,69 @@
1
+ import { generateText, Output, stepCountIs } from "ai";
2
+ import { z } from "zod";
3
+ import { StepRecorder } from "../cache/step-recorder.ts";
4
+ import type { AgentExecutionResult } from "./types.ts";
5
+ import { buildSystemPrompt } from "./prompt.ts";
6
+
7
+ /**
8
+ * Schema for structured agent output.
9
+ * The agent must produce a { passed, message } JSON object.
10
+ */
11
+ const TestResultSchema = z.object({
12
+ passed: z.boolean().describe("Whether the test case passed"),
13
+ message: z
14
+ .string()
15
+ .describe("Brief diagnostic: what happened and what the page showed"),
16
+ });
17
+
18
+ /**
19
+ * Execute a single test case using the AI agent with MCP tools.
20
+ *
21
+ * Uses Vercel AI SDK's generateText with:
22
+ * - Output.object() for structured { passed, message } responses
23
+ * - stopWhen: stepCountIs(recursionLimit) for loop control
24
+ * - StepRecorder tool wrapping for cache step capture
25
+ *
26
+ * @returns AgentExecutionResult with pass/fail status, diagnostic message, and recorded steps
27
+ */
28
+ export async function executeAgent(config: {
29
+ model: any;
30
+ tools: Record<string, any>;
31
+ testCase: string;
32
+ baseUrl: string;
33
+ recursionLimit: number;
34
+ globalContext?: string;
35
+ testContext?: string;
36
+ }): Promise<AgentExecutionResult> {
37
+ const recorder = new StepRecorder();
38
+ const wrappedTools = recorder.wrapTools(config.tools);
39
+
40
+ const systemPrompt = buildSystemPrompt(
41
+ config.testCase,
42
+ config.baseUrl,
43
+ config.globalContext,
44
+ config.testContext,
45
+ );
46
+
47
+ const { output } = await generateText({
48
+ model: config.model,
49
+ tools: wrappedTools,
50
+ system: systemPrompt,
51
+ prompt: `Execute the test case: "${config.testCase}"`,
52
+ stopWhen: stepCountIs(config.recursionLimit),
53
+ output: Output.object({ schema: TestResultSchema }),
54
+ });
55
+
56
+ if (output === null) {
57
+ return {
58
+ passed: false,
59
+ message: `Agent did not produce a structured result — it may have exceeded the ${config.recursionLimit} step limit`,
60
+ steps: recorder.getSteps(),
61
+ };
62
+ }
63
+
64
+ return {
65
+ passed: output.passed,
66
+ message: output.message,
67
+ steps: recorder.getSteps(),
68
+ };
69
+ }
@@ -0,0 +1,78 @@
1
+ import { createMCPClient } from "@ai-sdk/mcp";
2
+ import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
3
+ import type { Config } from "../config/types.ts";
4
+ import { getMcpCommand } from "../dist/paths.ts";
5
+
6
+ /**
7
+ * Manages the lifecycle of Playwright and curl MCP servers.
8
+ *
9
+ * MCP servers are shared across the test suite (not restarted per test).
10
+ * Fresh browser context per test comes from the `--isolated` flag on
11
+ * Playwright MCP. Both tool sets are merged and provided to the agent
12
+ * regardless of test type.
13
+ */
14
+ export class McpManager {
15
+ private playwrightClient: Awaited<ReturnType<typeof createMCPClient>> | null =
16
+ null;
17
+ private curlClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
18
+
19
+ constructor(private readonly config: Pick<Config, "browser" | "headless">) {}
20
+
21
+ /**
22
+ * Spawn Playwright MCP and curl MCP servers via stdio transport.
23
+ * Must be called before getTools().
24
+ */
25
+ async initialize(): Promise<void> {
26
+ // Resolve MCP spawn commands (bunx in npm mode, path-based in standalone)
27
+ const playwrightCmd = getMcpCommand("@playwright/mcp");
28
+ const curlCmd = getMcpCommand("@calibress/curl-mcp");
29
+
30
+ const playwrightArgs = [
31
+ ...playwrightCmd.args,
32
+ "--isolated",
33
+ `--browser=${this.config.browser}`,
34
+ ];
35
+
36
+ if (this.config.headless) {
37
+ playwrightArgs.splice(playwrightCmd.args.length, 0, "--headless");
38
+ }
39
+
40
+ this.playwrightClient = await createMCPClient({
41
+ transport: new StdioClientTransport({
42
+ command: playwrightCmd.command,
43
+ args: playwrightArgs,
44
+ }),
45
+ });
46
+
47
+ this.curlClient = await createMCPClient({
48
+ transport: new StdioClientTransport({
49
+ command: curlCmd.command,
50
+ args: [...curlCmd.args],
51
+ }),
52
+ });
53
+ }
54
+
55
+ /**
56
+ * Get merged tool set from both Playwright and curl MCP servers.
57
+ * Provides ALL tools to the agent regardless of test type.
58
+ */
59
+ async getTools(): Promise<Record<string, any>> {
60
+ const playwrightTools = await this.playwrightClient!.tools();
61
+ const curlTools = await this.curlClient!.tools();
62
+ return { ...playwrightTools, ...curlTools };
63
+ }
64
+
65
+ /**
66
+ * Close both MCP server connections.
67
+ * Uses Promise.allSettled to ensure both servers are cleaned up
68
+ * even if one fails to close.
69
+ */
70
+ async close(): Promise<void> {
71
+ await Promise.allSettled([
72
+ this.playwrightClient?.close(),
73
+ this.curlClient?.close(),
74
+ ]);
75
+ this.playwrightClient = null;
76
+ this.curlClient = null;
77
+ }
78
+ }
@@ -0,0 +1,71 @@
1
+ import { anthropic } from "@ai-sdk/anthropic";
2
+ import { openai } from "@ai-sdk/openai";
3
+ import { google } from "@ai-sdk/google";
4
+ import { createOpenRouter } from "@openrouter/ai-sdk-provider";
5
+
6
+ /** Supported LLM provider names */
7
+ export type ProviderName = "anthropic" | "openai" | "google" | "openrouter";
8
+
9
+ /** Environment variable names for each provider's API key */
10
+ export const ENV_VARS: Record<ProviderName, string> = {
11
+ anthropic: "ANTHROPIC_API_KEY",
12
+ openai: "OPENAI_API_KEY",
13
+ google: "GOOGLE_GENERATIVE_AI_API_KEY",
14
+ openrouter: "OPENROUTER_API_KEY",
15
+ };
16
+
17
+ // Auto-inference rules: model name prefix -> provider
18
+ const MODEL_PREFIX_MAP: Array<[RegExp, ProviderName]> = [
19
+ [/^claude-/, "anthropic"],
20
+ [/^gpt-/, "openai"],
21
+ [/^o\d/, "openai"],
22
+ [/^gemini-/, "google"],
23
+ [/\//, "openrouter"],
24
+ ];
25
+
26
+ /**
27
+ * Infer the LLM provider from a model name string.
28
+ * Falls back to "anthropic" if no pattern matches.
29
+ */
30
+ export function inferProvider(modelName: string): ProviderName {
31
+ for (const [pattern, provider] of MODEL_PREFIX_MAP) {
32
+ if (pattern.test(modelName)) return provider;
33
+ }
34
+ return "anthropic";
35
+ }
36
+
37
+ /**
38
+ * Validate that the API key environment variable is set for the given provider.
39
+ * Throws a descriptive error if the key is missing.
40
+ */
41
+ export function validateApiKey(provider: ProviderName): void {
42
+ const envVar = ENV_VARS[provider];
43
+ if (!Bun.env[envVar]) {
44
+ throw new Error(
45
+ `Missing API key for ${provider}.\n` +
46
+ ` Set the ${envVar} environment variable:\n` +
47
+ ` export ${envVar}=your-key-here\n` +
48
+ ` Or add it to your .env file.`,
49
+ );
50
+ }
51
+ }
52
+
53
+ /**
54
+ * Create an AI SDK model instance for the given model name and provider.
55
+ */
56
+ export function createModel(modelName: string, providerName: ProviderName) {
57
+ switch (providerName) {
58
+ case "anthropic":
59
+ return anthropic(modelName);
60
+ case "openai":
61
+ return openai(modelName);
62
+ case "google":
63
+ return google(modelName);
64
+ case "openrouter": {
65
+ const openrouter = createOpenRouter({
66
+ apiKey: Bun.env.OPENROUTER_API_KEY!,
67
+ });
68
+ return openrouter.chat(modelName);
69
+ }
70
+ }
71
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Build the system prompt for the QA automation agent.
3
+ *
4
+ * Includes test case, base URL, tool usage instructions, and optional
5
+ * global/per-test context fields.
6
+ */
7
+ export function buildSystemPrompt(
8
+ testCase: string,
9
+ baseUrl: string,
10
+ globalContext?: string,
11
+ testContext?: string,
12
+ ): string {
13
+ const lines: string[] = [
14
+ "You are a QA automation agent. Execute the following test case and determine if it passes or fails.",
15
+ "",
16
+ `Test case: "${testCase}"`,
17
+ `Base URL: "${baseUrl}"`,
18
+ "",
19
+ "You have access to both browser automation tools and HTTP/curl tools.",
20
+ "Choose the appropriate tools based on the test case.",
21
+ "",
22
+ "For browser/UI tests:",
23
+ "- Navigate to the base URL first",
24
+ "- Use browser_snapshot to understand page state before acting",
25
+ "- Use browser_click, browser_type for interactions",
26
+ "",
27
+ "For API tests:",
28
+ "- Use the curl_request tool to make HTTP requests",
29
+ "- Check status codes, headers, and response body",
30
+ "",
31
+ "Instructions:",
32
+ "1. Analyze the test case and decide which tools to use.",
33
+ "2. Execute the actions needed to verify the test case.",
34
+ "3. Be methodical. If something doesn't work, try alternative approaches before declaring failure.",
35
+ "4. When finished, provide your verdict as structured output with passed (boolean) and message (brief diagnostic).",
36
+ ];
37
+
38
+ if (globalContext) {
39
+ lines.push("", "Additional context from the user:", globalContext);
40
+ }
41
+
42
+ if (testContext) {
43
+ lines.push("", "Test-specific context:", testContext);
44
+ }
45
+
46
+ return lines.join("\n");
47
+ }
@@ -0,0 +1,28 @@
1
+ import type { CachedStep } from "../cache/types.ts";
2
+ import type { ProviderName } from "./model-factory.ts";
3
+
4
+ /** Result of a single AI agent execution */
5
+ export interface AgentExecutionResult {
6
+ /** Whether the test case passed */
7
+ passed: boolean;
8
+ /** Diagnostic message describing the outcome */
9
+ message: string;
10
+ /** Recorded tool call steps for caching */
11
+ steps: CachedStep[];
12
+ }
13
+
14
+ /** Configuration for a single agent run */
15
+ export interface AgentConfig {
16
+ /** Model identifier (e.g., "claude-sonnet-4-6", "gpt-4o") */
17
+ model: string;
18
+ /** LLM provider */
19
+ provider: ProviderName;
20
+ /** Maximum number of agent steps */
21
+ recursionLimit: number;
22
+ /** Plain English test case description */
23
+ testCase: string;
24
+ /** Base URL for the application under test */
25
+ baseUrl: string;
26
+ /** Optional per-test context appended to system prompt */
27
+ context?: string;
28
+ }
@@ -0,0 +1,105 @@
1
+ import { join } from "node:path";
2
+ import { mkdir, rename } from "node:fs/promises";
3
+ import type { CacheEntry, CachedStep } from "./types.ts";
4
+
5
+ /**
6
+ * Manages file-based cache entries for test step recordings.
7
+ * Each entry is a JSON file keyed by a deterministic SHA-256 hash of (testCase + baseUrl).
8
+ * Uses atomic write-then-rename to prevent corrupted cache files.
9
+ */
10
+ export class CacheManager {
11
+ private readonly cacheDir: string;
12
+
13
+ constructor(cacheDir: string) {
14
+ this.cacheDir = cacheDir;
15
+ }
16
+
17
+ /**
18
+ * Generate a deterministic 16-char hex hash key.
19
+ * Uses Bun-native CryptoHasher for SHA-256 hashing.
20
+ */
21
+ static hashKey(testCase: string, baseUrl: string): string {
22
+ const input = `${testCase}|${baseUrl}`;
23
+ const hasher = new Bun.CryptoHasher("sha256");
24
+ hasher.update(input);
25
+ return hasher.digest("hex").slice(0, 16);
26
+ }
27
+
28
+ /**
29
+ * Save a cache entry for the given test case.
30
+ * Creates the cache directory if it does not exist.
31
+ * Uses atomic write (tmp file + rename) to prevent corruption.
32
+ * Preserves createdAt from existing entry when updating.
33
+ */
34
+ async save(
35
+ testCase: string,
36
+ baseUrl: string,
37
+ steps: CachedStep[],
38
+ diagnostics: {
39
+ model: string;
40
+ provider: string;
41
+ stepCount: number;
42
+ aiMessage: string;
43
+ durationMs: number;
44
+ },
45
+ ): Promise<void> {
46
+ await mkdir(this.cacheDir, { recursive: true });
47
+
48
+ const hash = CacheManager.hashKey(testCase, baseUrl);
49
+ const now = new Date().toISOString();
50
+
51
+ // Load existing entry to preserve createdAt
52
+ const existing = await this.load(testCase, baseUrl);
53
+
54
+ const entry: CacheEntry = {
55
+ version: 1,
56
+ testCase,
57
+ baseUrl,
58
+ steps,
59
+ model: diagnostics.model,
60
+ provider: diagnostics.provider,
61
+ stepCount: diagnostics.stepCount,
62
+ aiMessage: diagnostics.aiMessage,
63
+ durationMs: diagnostics.durationMs,
64
+ createdAt: existing?.createdAt ?? now,
65
+ updatedAt: now,
66
+ };
67
+
68
+ const filePath = join(this.cacheDir, `${hash}.json`);
69
+ const tmpPath = `${filePath}.tmp`;
70
+
71
+ // Atomic write: write to tmp file, then rename
72
+ await Bun.write(tmpPath, JSON.stringify(entry, null, 2));
73
+ await rename(tmpPath, filePath);
74
+ }
75
+
76
+ /**
77
+ * Load a cache entry for the given test case.
78
+ * Returns null if the file does not exist or contains invalid JSON.
79
+ */
80
+ async load(testCase: string, baseUrl: string): Promise<CacheEntry | null> {
81
+ const hash = CacheManager.hashKey(testCase, baseUrl);
82
+ const filePath = join(this.cacheDir, `${hash}.json`);
83
+
84
+ try {
85
+ return await Bun.file(filePath).json() as CacheEntry;
86
+ } catch {
87
+ return null;
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Delete a cache entry for the given test case.
93
+ * No-op if the file does not exist.
94
+ */
95
+ async delete(testCase: string, baseUrl: string): Promise<void> {
96
+ const hash = CacheManager.hashKey(testCase, baseUrl);
97
+ const filePath = join(this.cacheDir, `${hash}.json`);
98
+
99
+ try {
100
+ await Bun.file(filePath).delete();
101
+ } catch {
102
+ // No-op if file doesn't exist
103
+ }
104
+ }
105
+ }