superghost 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,145 @@
1
+ import type { CacheManager } from "../cache/cache-manager.ts";
2
+ import type { StepReplayer } from "../cache/step-replayer.ts";
3
+ import type { AgentExecutionResult } from "../agent/types.ts";
4
+ import type { Config } from "../config/types.ts";
5
+ import type { TestResult } from "./types.ts";
6
+
7
+ /** Function signature for executing a test via the AI agent */
8
+ type ExecuteAgentFn = (config: {
9
+ model: any;
10
+ tools: Record<string, any>;
11
+ testCase: string;
12
+ baseUrl: string;
13
+ recursionLimit: number;
14
+ globalContext?: string;
15
+ testContext?: string;
16
+ }) => Promise<AgentExecutionResult>;
17
+
18
+ /**
19
+ * Executes a single test case following the cache-first-then-AI strategy:
20
+ * 1. Try replaying from cache (fast path, ~50ms)
21
+ * 2. On cache miss or replay failure, invoke AI agent with retries
22
+ * 3. Save new steps on AI success; delete stale cache on AI failure after self-heal attempt
23
+ */
24
+ export class TestExecutor {
25
+ private readonly cacheManager: CacheManager;
26
+ private readonly replayer: StepReplayer;
27
+ private readonly executeAgentFn: ExecuteAgentFn;
28
+ private readonly model: any;
29
+ private readonly tools: Record<string, any>;
30
+ private readonly config: Pick<
31
+ Config,
32
+ "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
33
+ > & { context?: string };
34
+ private readonly globalContext?: string;
35
+
36
+ constructor(opts: {
37
+ cacheManager: CacheManager;
38
+ replayer: StepReplayer;
39
+ executeAgentFn: ExecuteAgentFn;
40
+ model?: any;
41
+ tools?: Record<string, any>;
42
+ config: Pick<
43
+ Config,
44
+ "maxAttempts" | "recursionLimit" | "model" | "modelProvider"
45
+ > & { context?: string };
46
+ globalContext?: string;
47
+ }) {
48
+ this.cacheManager = opts.cacheManager;
49
+ this.replayer = opts.replayer;
50
+ this.executeAgentFn = opts.executeAgentFn;
51
+ this.model = opts.model;
52
+ this.tools = opts.tools ?? {};
53
+ this.config = opts.config;
54
+ this.globalContext = opts.globalContext;
55
+ }
56
+
57
+ /** Execute a single test case with cache-first strategy */
58
+ async execute(
59
+ testCase: string,
60
+ baseUrl: string,
61
+ testContext?: string,
62
+ ): Promise<TestResult> {
63
+ const start = Date.now();
64
+
65
+ // Phase 1: Try cache replay
66
+ const cached = await this.cacheManager.load(testCase, baseUrl);
67
+ if (cached) {
68
+ const replay = await this.replayer.replay(cached.steps);
69
+ if (replay.success) {
70
+ return {
71
+ testName: testCase,
72
+ testCase,
73
+ status: "passed",
74
+ source: "cache",
75
+ durationMs: Date.now() - start,
76
+ };
77
+ }
78
+ // Cache stale — fall through to AI with self-heal flag
79
+ return this.executeWithAgent(testCase, baseUrl, start, true, testContext);
80
+ }
81
+
82
+ // Phase 2: No cache — go directly to AI
83
+ return this.executeWithAgent(testCase, baseUrl, start, false, testContext);
84
+ }
85
+
86
+ /** Retry agent execution up to maxAttempts */
87
+ private async executeWithAgent(
88
+ testCase: string,
89
+ baseUrl: string,
90
+ startTime: number,
91
+ selfHeal: boolean,
92
+ testContext?: string,
93
+ ): Promise<TestResult> {
94
+ let lastError = "";
95
+
96
+ for (let attempt = 0; attempt < this.config.maxAttempts; attempt++) {
97
+ const result = await this.executeAgentFn({
98
+ model: this.model,
99
+ tools: this.tools,
100
+ testCase,
101
+ baseUrl,
102
+ recursionLimit: this.config.recursionLimit,
103
+ globalContext: this.globalContext,
104
+ testContext,
105
+ });
106
+
107
+ if (result.passed) {
108
+ // Save cache for future replays
109
+ await this.cacheManager.save(testCase, baseUrl, result.steps, {
110
+ model: this.config.model,
111
+ provider: this.config.modelProvider,
112
+ stepCount: result.steps.length,
113
+ aiMessage: result.message,
114
+ durationMs: Date.now() - startTime,
115
+ });
116
+
117
+ return {
118
+ testName: testCase,
119
+ testCase,
120
+ status: "passed",
121
+ source: "ai",
122
+ durationMs: Date.now() - startTime,
123
+ ...(selfHeal ? { selfHealed: true } : {}),
124
+ };
125
+ }
126
+
127
+ lastError = result.message;
128
+ }
129
+
130
+ // All attempts exhausted
131
+ if (selfHeal) {
132
+ // Delete stale cache that triggered self-heal
133
+ await this.cacheManager.delete(testCase, baseUrl);
134
+ }
135
+
136
+ return {
137
+ testName: testCase,
138
+ testCase,
139
+ status: "failed",
140
+ source: "ai",
141
+ durationMs: Date.now() - startTime,
142
+ error: lastError,
143
+ };
144
+ }
145
+ }
@@ -0,0 +1,66 @@
1
+ import type { Config } from "../config/types.ts";
2
+ import type { Reporter } from "../output/types.ts";
3
+ import type { TestResult, RunResult } from "./types.ts";
4
+
5
+ /** Function signature for executing a single test case */
6
+ export type ExecuteFn = (
7
+ testCase: string,
8
+ baseUrl: string,
9
+ testContext?: string,
10
+ ) => Promise<TestResult>;
11
+
12
+ /**
13
+ * Orchestrates sequential execution of all test cases.
14
+ * Calls reporter hooks before/after each test and after the full run.
15
+ * Delegates individual test execution to the provided execute function.
16
+ */
17
+ export class TestRunner {
18
+ private readonly config: Config;
19
+ private readonly reporter: Reporter;
20
+ private readonly executeFn: ExecuteFn;
21
+
22
+ constructor(config: Config, reporter: Reporter, executeFn: ExecuteFn) {
23
+ this.config = config;
24
+ this.reporter = reporter;
25
+ this.executeFn = executeFn;
26
+ }
27
+
28
+ /** Run all test cases sequentially and return aggregate results */
29
+ async run(): Promise<RunResult> {
30
+ const startTime = Date.now();
31
+ const results: TestResult[] = [];
32
+
33
+ for (const test of this.config.tests) {
34
+ const baseUrl = test.baseUrl ?? this.config.baseUrl ?? "";
35
+
36
+ this.reporter.onTestStart(test.name);
37
+
38
+ const result = await this.executeFn(test.case, baseUrl, test.context);
39
+ // Ensure testName uses the configured test.name (display name), not the raw testCase
40
+ const displayResult = { ...result, testName: test.name };
41
+ results.push(displayResult);
42
+
43
+ this.reporter.onTestComplete(displayResult);
44
+ }
45
+
46
+ const runResult = aggregateResults(results, Date.now() - startTime);
47
+ this.reporter.onRunComplete(runResult);
48
+ return runResult;
49
+ }
50
+ }
51
+
52
+ /** Aggregate individual test results into a run summary */
53
+ function aggregateResults(
54
+ results: TestResult[],
55
+ totalDurationMs: number,
56
+ ): RunResult {
57
+ return {
58
+ results,
59
+ totalDurationMs,
60
+ passed: results.filter((r) => r.status === "passed").length,
61
+ failed: results.filter((r) => r.status === "failed").length,
62
+ cached: results.filter(
63
+ (r) => r.source === "cache" && r.status === "passed",
64
+ ).length,
65
+ };
66
+ }
@@ -0,0 +1,26 @@
1
+ /** Status of a completed test */
2
+ export type TestStatus = "passed" | "failed";
3
+
4
+ /** Source of the test result */
5
+ export type TestSource = "cache" | "ai";
6
+
7
+ /** Result of executing a single test case */
8
+ export interface TestResult {
9
+ testName: string;
10
+ testCase: string;
11
+ status: TestStatus;
12
+ source: TestSource;
13
+ durationMs: number;
14
+ error?: string;
15
+ /** Whether this test self-healed from a stale cache */
16
+ selfHealed?: boolean;
17
+ }
18
+
19
+ /** Overall run result from the test runner */
20
+ export interface RunResult {
21
+ results: TestResult[];
22
+ totalDurationMs: number;
23
+ passed: number;
24
+ failed: number;
25
+ cached: number;
26
+ }