ccqa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ import { readFile, writeFile, unlink } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import { Command } from "commander";
4
+ import { buildTraceSystemPrompt, buildTracePrompt, generateSessionName } from "../prompts/trace.ts";
5
+ import { invokeClaudeStreaming } from "../claude/invoke.ts";
6
+ import type { SDKMessage } from "@anthropic-ai/claude-agent-sdk";
7
+ import { ensureCcqaDir, parseSpecPath, readSpecFile, saveRoute, saveTraceActions, getSetupDir } from "../store/index.ts";
8
+ import { parseTestSpec } from "../spec/parser.ts";
9
+ import type { Route, RouteStep, TraceAction, TraceCommand, AssertType, ParsedStatusLine } from "../types.ts";
10
+ import * as log from "./logger.ts";
11
+
12
+ export const traceCommand = new Command("trace")
13
+ .argument("<feature/spec>", "Spec to trace (e.g. tasks/create-and-complete)")
14
+ .description("Run agent-browser, verify assertions, and record structured actions")
15
+ .action(async (specPath: string) => {
16
+ const { featureName, specName } = parseSpecPath(specPath);
17
+ await runTrace(featureName, specName);
18
+ });
19
+
20
+ async function runTrace(featureName: string, specName: string): Promise<void> {
21
+ log.header("trace", `${featureName}/${specName}`);
22
+
23
+ await ensureCcqaDir();
24
+
25
+ const specContent = await readSpecFile(featureName, specName);
26
+ const spec = parseTestSpec(specContent);
27
+ const hasSetups = (spec.setups?.length ?? 0) > 0;
28
+
29
+ log.meta("spec", spec.title);
30
+ log.meta("url", spec.baseUrl);
31
+ if (hasSetups) log.meta("setups", spec.setups!.map((s) => s.name).join(", "));
32
+ log.meta("steps", spec.steps.length);
33
+ log.blank();
34
+
35
+ // Generate a session name to share between setup execution and trace
36
+ const sessionName = generateSessionName();
37
+
38
+ // Run setups before tracing (same session)
39
+ if (hasSetups) {
40
+ log.info("Running setup procedures...");
41
+ await runSetups(
42
+ spec.setups as Array<{ name: string; params?: Record<string, string> }>,
43
+ sessionName,
44
+ );
45
+ log.blank();
46
+ }
47
+
48
+ const systemPrompt = buildTraceSystemPrompt(spec, {
49
+ sessionName,
50
+ skipCookiesClear: hasSetups,
51
+ });
52
+ const prompt = buildTracePrompt(spec);
53
+
54
+ log.info("Running agent-browser session...");
55
+ log.blank();
56
+
57
+ const routeSteps: RouteStep[] = [];
58
+ let overallStatus: "passed" | "failed" = "passed";
59
+ const traceActions: TraceAction[] = [];
60
+
61
+ const { isError } = await invokeClaudeStreaming(
62
+ {
63
+ prompt,
64
+ systemPrompt,
65
+ allowedTools: ["Bash(*)", "Read", "Grep", "Glob"],
66
+ env: { AGENT_BROWSER_SESSION: sessionName },
67
+ onAbAction: (abAction: string) => {
68
+ const action = parseAbAction(abAction);
69
+ if (action) traceActions.push(action);
70
+ },
71
+ onAbActionFailed: () => {
72
+ traceActions.pop();
73
+ },
74
+ },
75
+ (msg: SDKMessage) => {
76
+ if (msg.type !== "assistant") return;
77
+
78
+ for (const block of msg.message.content ?? []) {
79
+ if (block.type !== "text" || !block.text) continue;
80
+ const text = block.text;
81
+
82
+ const statusLine = parseStatusLine(text);
83
+ if (statusLine) log.step(statusLine.type, statusLine.stepId, statusLine.detail);
84
+
85
+ for (const line of text.split("\n")) {
86
+ const trimmed = line.trim();
87
+ if (trimmed.startsWith("ROUTE_STEP|")) {
88
+ const routeStep = parseRouteStep(trimmed);
89
+ if (routeStep) {
90
+ routeSteps.push(routeStep);
91
+ if (routeStep.status === "FAILED") overallStatus = "failed";
92
+ }
93
+ } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
94
+ const action = parseAbAction(trimmed);
95
+ if (action) traceActions.push(action);
96
+ }
97
+ }
98
+ }
99
+ },
100
+ );
101
+
102
+ if (isError) overallStatus = "failed";
103
+
104
+ const timestamp = new Date().toISOString();
105
+ const route: Route = { specName, timestamp, status: overallStatus, steps: routeSteps };
106
+
107
+ const [routePath, actionsPath] = await Promise.all([
108
+ saveRoute(featureName, specName, route),
109
+ saveTraceActions(featureName, specName, traceActions),
110
+ ]);
111
+
112
+ log.blank();
113
+ log.meta("route", routePath);
114
+ log.meta("saved", actionsPath);
115
+ log.meta("actions", traceActions.length);
116
+ log.meta("status", overallStatus.toUpperCase());
117
+ log.hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
118
+ }
119
+
120
+ /**
121
+ * Execute setup procedures by running their test.spec.ts via vitest with a fixed session name.
122
+ * Creates a temporary runner script that sets the session and imports each setup's test body.
123
+ */
124
+ async function runSetups(
125
+ setups: Array<{ name: string; params?: Record<string, string> }>,
126
+ sessionName: string,
127
+ ): Promise<void> {
128
+ for (const ref of setups) {
129
+ log.info(` setup: ${ref.name}`);
130
+
131
+ const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
132
+ let script = await readFile(scriptPath, "utf-8").catch(() => {
133
+ throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
134
+ });
135
+
136
+ // Replace placeholders with params
137
+ for (const [key, value] of Object.entries(ref.params ?? {})) {
138
+ script = script.replaceAll(`{{${key}}}`, value);
139
+ }
140
+
141
+ // Fix the session name to share with the trace phase
142
+ script = script.replace(
143
+ /process\.env\.AGENT_BROWSER_SESSION\s*=\s*`.+`;/,
144
+ `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`,
145
+ );
146
+
147
+ // Write temp file, run vitest, clean up
148
+ const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
149
+ await writeFile(tmpPath, script, "utf-8");
150
+
151
+ try {
152
+ const proc = Bun.spawn(["bunx", "vitest", "run", tmpPath], {
153
+ stdout: "pipe",
154
+ stderr: "pipe",
155
+ });
156
+ const [stdout, stderr, exitCode] = await Promise.all([
157
+ new Response(proc.stdout).text(),
158
+ new Response(proc.stderr).text(),
159
+ proc.exited,
160
+ ]);
161
+ process.stdout.write(stdout);
162
+ if (stderr) process.stderr.write(stderr);
163
+
164
+ if (exitCode !== 0) {
165
+ throw new Error(`Setup '${ref.name}' failed (exit ${exitCode})`);
166
+ }
167
+ } finally {
168
+ await unlink(tmpPath).catch(() => {});
169
+ }
170
+ }
171
+ }
172
+
173
+ export function parseStatusLine(text: string): ParsedStatusLine | null {
174
+ for (const line of text.split("\n")) {
175
+ const match = line.match(/^(STEP_START|STEP_DONE|ASSERTION_FAILED|STEP_SKIPPED|RUN_COMPLETED)\|([^|]*)\|(.*)$/);
176
+ if (match) {
177
+ return {
178
+ type: match[1] as ParsedStatusLine["type"],
179
+ stepId: match[2] ?? "",
180
+ detail: match[3] ?? "",
181
+ };
182
+ }
183
+ }
184
+ return null;
185
+ }
186
+
187
+ export function parseRouteStep(line: string): RouteStep | null {
188
+ const parts = line.split("|");
189
+ if (parts.length < 6) return null;
190
+
191
+ const title = parts[2] ?? "";
192
+ const action = (parts[3] ?? "").replace(/^ACTION:/, "").trim();
193
+ const observation = (parts[4] ?? "").replace(/^OBSERVATION:/, "").trim();
194
+ const statusRaw = (parts[5] ?? "").replace(/^STATUS:/, "").trim();
195
+
196
+ const status = (["PASSED", "FAILED", "SKIPPED"] as const).find((s) => s === statusRaw) ?? "FAILED";
197
+ return { title, action, observation, status };
198
+ }
199
+
200
+ export function parseAbAction(line: string): TraceAction | null {
201
+ if (!line.startsWith("AB_ACTION|")) return null;
202
+ const parts = line.split("|");
203
+ const command = parts[1] as TraceCommand | undefined;
204
+
205
+ switch (command) {
206
+ case "cookies_clear":
207
+ return { command };
208
+ case "open":
209
+ return { command, value: parts[2] };
210
+ case "press":
211
+ return { command, value: parts[2] };
212
+ case "scroll":
213
+ return { command, direction: parts[2], pixels: parts[3] };
214
+ case "snapshot":
215
+ return { command, observation: parts[2] };
216
+ case "assert":
217
+ return {
218
+ command,
219
+ assertType: parts[2] as AssertType,
220
+ selector: parts[3] || undefined,
221
+ value: parts[4] || undefined,
222
+ observation: parts[5] || undefined,
223
+ };
224
+ case "click":
225
+ case "dblclick":
226
+ case "check":
227
+ case "uncheck":
228
+ case "hover":
229
+ return { command, selector: parts[2], label: parts[3] };
230
+ case "wait": {
231
+ const isTextWait = parts[2] === "--text";
232
+ const selector = isTextWait ? `text=${parts[3]}` : parts[2];
233
+ return { command, selector, label: isTextWait ? parts[4] : parts[3] };
234
+ }
235
+ case "fill":
236
+ case "type":
237
+ case "select":
238
+ return { command, selector: parts[2], value: parts[3], label: parts[4] };
239
+ case "drag":
240
+ return { command, selector: parts[2], target: parts[3], label: parts[4] };
241
+ default:
242
+ return null;
243
+ }
244
+ }
@@ -0,0 +1,188 @@
1
+ import type { TraceAction } from "../types.ts";
2
+
3
+ /**
4
+ * Converts recorded trace actions into a vitest-compatible test.spec.ts.
5
+ * Uses child_process.spawnSync with explicit argument arrays to avoid shell quoting issues.
6
+ * agent-browser bin is resolved via import.meta.resolve to avoid hardcoded absolute paths.
7
+ */
8
+ export interface SetupScript {
9
+ name: string;
10
+ body: string;
11
+ }
12
+
13
+ export function actionsToScript(actions: TraceAction[], title: string, setupScripts?: SetupScript[]): string {
14
+ // Resolve the helpers path relative to this file so it works from any cwd
15
+ const helpersPath = new URL("../runtime/test-helpers.ts", import.meta.url).pathname;
16
+
17
+ const imports = [
18
+ `import { test } from "vitest";`,
19
+ `import { spawnSync } from "node:child_process";`,
20
+ `import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from ${JSON.stringify(helpersPath)};`,
21
+ "",
22
+ `// Single session shared across all tests — reset per run via cookies clear in first test`,
23
+ `process.env.AGENT_BROWSER_SESSION = \`ccqa-run-\${Date.now()}\`;`,
24
+ "",
25
+ ];
26
+
27
+ const parts: string[] = [...imports];
28
+
29
+ // Setup tests (same session — cookies clear in first setup ensures clean state)
30
+ if (setupScripts?.length) {
31
+ for (const setup of setupScripts) {
32
+ parts.push(
33
+ `test("setup: ${setup.name}", () => {`,
34
+ setup.body,
35
+ "}, 3 * 60 * 1000);",
36
+ "",
37
+ );
38
+ }
39
+ }
40
+
41
+ // Main test (same session — setup state is preserved, no cookies clear)
42
+ const testLines = actionsToLines(actions);
43
+ const body = testLines.map((l) => ` ${l}`).join("\n");
44
+ parts.push(
45
+ `test(${JSON.stringify(title)}, () => {`,
46
+ body,
47
+ "}, 5 * 60 * 1000);",
48
+ "",
49
+ );
50
+
51
+ return parts.join("\n");
52
+ }
53
+
54
+ /** Commands that interact with page elements and need the page to be loaded */
55
+ const ELEMENT_COMMANDS = new Set<string>(["click", "dblclick", "fill", "type", "check", "uncheck", "select", "hover", "drag"]);
56
+
57
+ function actionsToLines(actions: TraceAction[]): string[] {
58
+ const lines: string[] = [];
59
+ let prevLine: string | null = null;
60
+ let prevCommand: string | null = null;
61
+ for (const action of actions) {
62
+ const line = actionToLine(action);
63
+ if (line === null) continue;
64
+ if (line === prevLine) continue;
65
+ // After 'open', always insert a sleep — page load is guaranteed to be needed
66
+ if (prevCommand === "open" && ELEMENT_COMMANDS.has(action.command)) {
67
+ lines.push(`spawnSync("sleep", ["3"], { stdio: "inherit" });`);
68
+ }
69
+ lines.push(line);
70
+ prevLine = line;
71
+ prevCommand = action.command;
72
+ }
73
+ return lines;
74
+ }
75
+
76
+ /** Returns true if a selector is a session-specific @ref that cannot be replayed. */
77
+ function isRefSelector(selector: string | undefined): boolean {
78
+ return typeof selector === "string" && /^@/.test(selector.trim());
79
+ }
80
+
81
+ function actionToLine(action: TraceAction): string | null {
82
+ // Skip actions that use @ref selectors — they are session-specific and not replayable
83
+ if ("selector" in action && isRefSelector(action.selector)) return null;
84
+
85
+ switch (action.command) {
86
+ case "cookies_clear":
87
+ return `ab("cookies", "clear");`;
88
+
89
+ case "open": {
90
+ // Strip stray surrounding quotes that can appear when agent-browser is called with quoted URL
91
+ const url = (action.value ?? "").replace(/^["']|["']$/g, "");
92
+ return `ab("open", ${j(url)});`;
93
+ }
94
+
95
+ case "snapshot":
96
+ return action.observation ? `// ${action.observation}` : null;
97
+
98
+ case "click":
99
+ return `ab("click", ${j(action.selector!)});`;
100
+
101
+ case "dblclick":
102
+ return `ab("dblclick", ${j(action.selector!)});`;
103
+
104
+ case "fill":
105
+ return `ab("fill", ${j(action.selector!)}, ${j(action.value!)});`;
106
+
107
+ case "type":
108
+ return `ab("fill", ${j(action.selector!)}, ${j(action.value!)});`;
109
+
110
+ case "check":
111
+ return `ab("check", ${j(action.selector!)});`;
112
+
113
+ case "uncheck":
114
+ return `ab("uncheck", ${j(action.selector!)});`;
115
+
116
+ case "press":
117
+ return `ab("press", ${j(action.value!)});`;
118
+
119
+ case "select":
120
+ return `ab("select", ${j(action.selector!)}, ${j(action.value!)});`;
121
+
122
+ case "hover":
123
+ return `ab("hover", ${j(action.selector!)});`;
124
+
125
+ case "scroll": {
126
+ const args = [action.direction ?? "down", ...(action.pixels ? [action.pixels] : [])];
127
+ return `ab("scroll", ${args.map(j).join(", ")});`;
128
+ }
129
+
130
+ case "drag":
131
+ return `ab("drag", ${j(action.selector!)}, ${j(action.target!)});`;
132
+
133
+ case "wait": {
134
+ const sel = action.selector!;
135
+ // Numeric waits represent sleep durations (from auto-fix)
136
+ if (/^\d+$/.test(sel)) return `spawnSync("sleep", [${j(sel)}], { stdio: "inherit" });`;
137
+ return `abWait(${j(sel)});`;
138
+ }
139
+
140
+ case "assert": {
141
+ // LLM may omit selector/value fields and put the text in observation instead
142
+ // Fall back to observation when the specific field is missing
143
+ const val = action.value ?? action.observation;
144
+ const sel = action.selector ?? action.observation;
145
+ const comment = action.observation ? `// Assert: ${action.observation}` : null;
146
+ let assertLine: string | null = null;
147
+ switch (action.assertType) {
148
+ case "text_visible":
149
+ if (val) assertLine = `abAssertTextVisible(${j(val)});`;
150
+ break;
151
+ case "text_not_visible":
152
+ if (val) assertLine = `abAssertNotVisible(${j("text=" + val)}, 180_000);`;
153
+ break;
154
+ case "element_visible":
155
+ if (sel) assertLine = `abAssertVisible(${j(sel)});`;
156
+ break;
157
+ case "element_not_visible":
158
+ if (sel) assertLine = `abAssertNotVisible(${j(sel)});`;
159
+ break;
160
+ case "url_contains":
161
+ if (val) assertLine = `abAssertUrl(${j(val)});`;
162
+ break;
163
+ case "element_enabled":
164
+ // is enabled is unreliable with text= and [aria-label=] selectors that may not exist in DOM
165
+ if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
166
+ break;
167
+ case "element_disabled":
168
+ // is enabled is unreliable with text= and [aria-label=] selectors that may not exist in DOM
169
+ if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertDisabled(${j(sel)});`;
170
+ break;
171
+ case "element_checked":
172
+ if (sel) assertLine = `abAssertChecked(${j(sel)});`;
173
+ break;
174
+ case "element_unchecked":
175
+ if (sel) assertLine = `abAssertUnchecked(${j(sel)});`;
176
+ break;
177
+ }
178
+ if (comment && assertLine) return `${comment}\n ${assertLine}`;
179
+ return assertLine ?? comment;
180
+ }
181
+
182
+ default:
183
+ return null;
184
+ }
185
+ }
186
+
187
+ /** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
188
+ const j = (s: string) => JSON.stringify(s);
@@ -0,0 +1,73 @@
1
+ import type { TraceAction } from "../types.ts";
2
+
3
+ export function buildAutoFixPrompt(script: string, failureLog: string): string {
4
+ return `You are analyzing a failing E2E test script. The test fails because some browser actions execute before the page has finished loading or navigating.
5
+
6
+ Your task: identify which line numbers need a sleep/wait inserted BEFORE them to fix timing issues.
7
+
8
+ ## Rules
9
+ - ONLY identify lines where a sleep is needed — do NOT suggest any other changes
10
+ - Common patterns that need a sleep:
11
+ - After \`ab("open", ...)\` when the next line interacts with elements (fill, click, etc.)
12
+ - After \`ab("press", "Enter")\` or \`ab("click", ...)\` when a page navigation occurs before the next action
13
+ - After any action that triggers a redirect or page reload
14
+ - Look at the error log to identify WHICH lines failed, then determine if a sleep before that line would fix it
15
+ - If a \`spawnSync("sleep", ...)\` already exists before a failing line, suggest increasing its duration instead
16
+ - Output ONLY a JSON array of objects, no explanation, no markdown code fences
17
+
18
+ ## Output format
19
+ Each object has:
20
+ - "line": the 1-based line number to insert a sleep BEFORE
21
+ - "seconds": recommended sleep duration (typically 3-5)
22
+ - "reason": very short explanation (e.g., "page navigation after form submit")
23
+
24
+ If a sleep already exists and needs to be increased:
25
+ - "line": the line number of the existing sleep
26
+ - "increase_to": the new duration in seconds
27
+ - "reason": explanation
28
+
29
+ Example output:
30
+ [{"line": 15, "seconds": 3, "reason": "page navigation after press Enter"}, {"line": 22, "increase_to": 5, "reason": "slow page load"}]
31
+
32
+ If no fixes are needed, return: []
33
+
34
+ ## Test Script (with line numbers)
35
+ ${script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n")}
36
+
37
+ ## Failure Log
38
+ ${failureLog.slice(0, 3000)}`;
39
+ }
40
+
41
+ export function buildCleanupPrompt(actions: TraceAction[]): string {
42
+ const lines = actions
43
+ .map((a, i) => {
44
+ const parts = [`${i + 1}. ${a.command}`];
45
+ if (a.assertType) parts.push(`assertType="${a.assertType}"`);
46
+ if (a.selector) parts.push(`selector="${a.selector}"`);
47
+ if (a.value) parts.push(`value="${a.value}"`);
48
+ if (a.observation) parts.push(`→ ${a.observation}`);
49
+ return parts.join(" ");
50
+ })
51
+ .join("\n");
52
+
53
+ return `You are given a list of browser actions recorded during an E2E test trace.
54
+ The trace contains noise: failed attempts, redundant retries, and duplicate operations recorded because the agent explored multiple strategies.
55
+
56
+ Your task: return a **cleaned-up JSON array** of TraceAction objects that represents the minimal, correct sequence of actions needed to reproduce the test.
57
+
58
+ Each TraceAction object has the following shape (use EXACTLY these field names):
59
+ { "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "..." }
60
+ Only include fields that are present in the original action. The "command" field is required. For assert actions, "assertType" is also required.
61
+
62
+ Rules:
63
+ - Remove actions that were failed attempts superseded by a later successful action (e.g., if \`fill selector="text=Foo"\` was followed by \`fill selector="[placeholder='Foo']"\`, keep only the latter)
64
+ - Remove duplicate fill operations on the same field (keep only the last successful fill for each field)
65
+ - For \`click\` and \`fill\` actions: if the selector starts with \`text=\`, it is a failed attempt — remove it (text= selectors only work with the wait command, not click/fill)
66
+ - Keep all snapshot actions — they serve as comments/observations in the generated test
67
+ - Keep all assert actions — they are the test's verification points and must not be removed
68
+ - Do NOT invent new actions or change values
69
+ - Output ONLY a valid JSON array, no explanation, no markdown code fences
70
+
71
+ ## Recorded Actions
72
+ ${lines}`;
73
+ }