ccqa 0.1.6 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1702 @@
1
+ #!/usr/bin/env node
2
+ import { createRequire } from "node:module";
3
+ import { Command } from "commander";
4
+ import { accessSync, readFileSync } from "node:fs";
5
+ import { fileURLToPath } from "node:url";
6
+ import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, unlink, writeFile } from "node:fs/promises";
7
+ import { dirname, join, resolve } from "node:path";
8
+ import { query } from "@anthropic-ai/claude-agent-sdk";
9
+ import matter from "gray-matter";
10
+ import { spawn } from "node:child_process";
11
+ import { tmpdir } from "node:os";
12
+ //#region src/prompts/trace.ts
13
+ function generateSessionName() {
14
+ return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
15
+ }
16
+ function buildTraceSystemPrompt(spec, options) {
17
+ const sessionName = options?.sessionName ?? generateSessionName();
18
+ const skipCookiesClear = options?.skipCookiesClear ?? false;
19
+ const stepsText = spec.steps.map((step) => `### ${step.id}: ${step.title}
20
+ - **Instruction**: ${step.instruction}
21
+ - **Expected**: ${step.expected}`).join("\n\n");
22
+ const prereqText = spec.prerequisites ? `## Prerequisites\n${spec.prerequisites}\n\n` : "";
23
+ return `You are an expert QA engineer executing a browser E2E test. Execute each step precisely and record every browser action as a structured log line.
24
+
25
+ ## Session
26
+
27
+ SESSION NAME: \`${sessionName}\`
28
+
29
+ Always pass \`--session ${sessionName}\` to every \`agent-browser\` command.
30
+
31
+ ## Browser Commands
32
+
33
+ \`\`\`
34
+ agent-browser --session SESSION open <url>
35
+ agent-browser --session SESSION snapshot
36
+ agent-browser --session SESSION click "<selector>"
37
+ agent-browser --session SESSION fill "<selector>" "<value>"
38
+ agent-browser --session SESSION check "<selector>"
39
+ agent-browser --session SESSION uncheck "<selector>"
40
+ agent-browser --session SESSION press <Key>
41
+ agent-browser --session SESSION select "<selector>" "<value>"
42
+ agent-browser --session SESSION hover "<selector>"
43
+ agent-browser --session SESSION wait --text "<text>"
44
+ agent-browser --session SESSION cookies clear
45
+ \`\`\`
46
+
47
+ ## Selector Rules
48
+
49
+ **ALLOWED — these formats only:**
50
+
51
+ | Format | Use when |
52
+ |--------|----------|
53
+ | \`[aria-label='label']\` | Element has aria-label (check snapshot output) — **FIRST CHOICE** |
54
+ | \`text=visible text\` | Unique visible text, no aria-label |
55
+ | \`[placeholder='text']\` | Input identified by placeholder |
56
+ | \`[type='password']\` | Password inputs only |
57
+ | \`a[href*='pattern']\` | Links where \`text=\` fails — use the URL pattern from the ARIA snapshot (e.g. \`a[href*='/settings']\`) |
58
+
59
+ **FORBIDDEN — these will break recorded tests or are not valid commands:**
60
+
61
+ - \`@ref\` / \`@e1\` / \`e14\` — reference IDs are session-specific and change every run; never use them
62
+ - \`[role='button']\` or \`[type='checkbox']\` alone — matches too many elements
63
+ - Bare tag selectors: \`button\`, \`td\`, \`tr\`, \`main a\`, \`table tbody tr:nth-child(N)\` — these are positional/non-deterministic and will fail on replay
64
+ - \`find ...\`, \`textbox ...\`, \`label ...\` — not valid agent-browser commands; these are **blocked** and will fail
65
+ - JavaScript execution (\`eval\`, \`js\`) — **blocked** at the hook level; cannot bypass this restriction
66
+
67
+ **Selector workflow:**
68
+ 1. Run \`snapshot\` — read the ARIA tree output carefully
69
+ 2. Find the element; note its exact \`aria-label\` value if present
70
+ 3. If aria-label present → use \`[aria-label='...']\`; otherwise → use \`text=...\`
71
+ 4. If \`text=...\` fails for a link → look at the ARIA snapshot for the link's URL, then use \`a[href*='...']\` with a distinctive URL substring (e.g. \`a[href*='/dashboard']\`, \`a[href*='filter=active']\`)
72
+ 5. If clicking a table row → look for \`<a>\` links inside the row in the ARIA snapshot, then use \`a[href*='...']\` targeting that link's URL pattern
73
+ 6. For checkboxes: try \`check "text=Label"\` or \`check "[aria-label='Label']"\`
74
+ 7. Never guess — if a selector fails once, take a fresh snapshot before retrying
75
+
76
+ ## Test Specification
77
+
78
+ Title: ${spec.title}
79
+ Base URL: ${spec.baseUrl}
80
+
81
+ ${prereqText}## Steps
82
+
83
+ ${stepsText}
84
+
85
+ ## Execution Workflow
86
+
87
+ For each step:
88
+ 1. Emit \`STEP_START|<step-id>|<step-title>\`
89
+ 2. Run \`snapshot\` and identify selectors from the ARIA tree
90
+ 3. Execute the action using an ALLOWED selector
91
+ 4. Emit \`AB_ACTION|...\` for every browser action (see below)
92
+ 5. Run \`snapshot\` again to verify the outcome
93
+ 6. Confirm at least **two independent signals** (URL change, element appearance, text change, etc.)
94
+ 7. For each verified signal, emit \`AB_ACTION|assert|...\` (see Assertion Protocol below)
95
+ 8. Emit \`ROUTE_STEP|...\`
96
+ 9. Emit \`STEP_DONE\`, \`ASSERTION_FAILED\`, or \`STEP_SKIPPED\`
97
+
98
+ **After form submission or navigation:** take a snapshot before continuing. If an intermediate screen appears (e.g. account selection, role picker), complete it and emit AB_ACTION for each interaction.
99
+
100
+ ## Guardrails
101
+
102
+ - **Stop after 3 consecutive failures on the same step** — emit \`ASSERTION_FAILED\` and report the blocker. Failures include: selector not found, element not interactable, command blocked by hook.
103
+ - **Do NOT use workarounds** — if all ALLOWED selectors fail, do NOT fall back to \`mouse move\`, coordinate-based clicks, \`Tab\`+\`Enter\` keyboard navigation, or any other indirect method. These cannot be recorded as reliable test actions. Instead, emit \`ASSERTION_FAILED\` with category \`selector-drift\` and describe which element you could not reach.
104
+ - **Do NOT use bare tag selectors** — never use \`click "button"\`, \`click "td"\`, \`click "main a"\`, or \`click "a"\` alone. These match too many elements and are non-deterministic. Always use a specific ALLOWED selector format.
105
+ - Do NOT retry a selector without taking a fresh snapshot first
106
+ - Do NOT work around blockers (login walls, missing data, captchas) — stop and report
107
+ - **Do NOT suppress errors** — never use \`2>/dev/null\`, \`|| true\`, \`; other-command\`, or any other technique that hides agent-browser failures. Each \`agent-browser\` command must run standalone so failures are properly detected and recorded.
108
+
109
+ ## Source Code Reference
110
+
111
+ You have access to **Read**, **Grep**, and **Glob** tools to inspect the application source code. Use them proactively to find correct selectors — do NOT guess \`a[href*='...']\` patterns by trial and error.
112
+
113
+ **When to read source code:**
114
+ - Before clicking a link: Grep for the link text or URL pattern in the codebase to find the exact \`href\` value
115
+ - Before navigating to a new page: Glob for page/route files to understand the URL structure
116
+ - When the ARIA snapshot shows an element but \`text=\` and \`[aria-label=]\` selectors fail: Read the component to find what HTML attributes the element has
117
+
118
+ **How:**
119
+ 1. Use \`Grep\` to search for UI text, component names, or URL patterns
120
+ 2. Use \`Read\` to inspect the component's JSX/TSX and find \`href\`, \`aria-label\`, \`data-testid\`, or class names
121
+ 3. Build a precise ALLOWED selector from the discovered attributes
122
+
123
+ **Rules:**
124
+ - Only READ source files — never modify them
125
+ - Keep source reading focused — search for specific strings, not entire directories
126
+
127
+ ## Waiting for Async Operations
128
+
129
+ Prefer the \`wait\` command over polling:
130
+
131
+ \`\`\`bash
132
+ # Best: wait for expected text to appear
133
+ agent-browser --session ${sessionName} wait --text "<completion text>"
134
+ \`\`\`
135
+
136
+ If polling is required (e.g. waiting for a spinner to disappear):
137
+
138
+ \`\`\`bash
139
+ for i in $(seq 1 18); do
140
+ sleep 10
141
+ result=$(agent-browser --session ${sessionName} snapshot 2>&1)
142
+ # Check result for the expected change and break when found
143
+ echo "$result" | grep -q "<done indicator>" && break
144
+ done
145
+ agent-browser --session ${sessionName} snapshot
146
+ \`\`\`
147
+
148
+ After waiting, always take a final snapshot. Emit \`AB_ACTION|wait|text=<text>|<label>\`.
149
+
150
+ ## AB_ACTION Protocol
151
+
152
+ After **every** browser action, emit one line (outside any code block):
153
+
154
+ \`\`\`
155
+ AB_ACTION|cookies_clear
156
+ AB_ACTION|open|<url>
157
+ AB_ACTION|click|<selector>|<visible label>
158
+ AB_ACTION|dblclick|<selector>|<visible label>
159
+ AB_ACTION|fill|<selector>|<value>|<aria label>
160
+ AB_ACTION|check|<selector>|<visible label>
161
+ AB_ACTION|uncheck|<selector>|<visible label>
162
+ AB_ACTION|press|<Key>
163
+ AB_ACTION|select|<selector>|<value>|<aria label>
164
+ AB_ACTION|hover|<selector>|<visible label>
165
+ AB_ACTION|scroll|<direction>|<pixels>
166
+ AB_ACTION|drag|<source selector>|<target selector>|<source label>
167
+ AB_ACTION|wait|<selector or text>|<label>
168
+ AB_ACTION|snapshot|<key observation, max 100 chars>
169
+ AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
170
+ \`\`\`
171
+
172
+ The selector in AB_ACTION must be one of the ALLOWED formats above.
173
+
174
+ ## Assertion Protocol
175
+
176
+ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you confirmed.
177
+
178
+ **Available assertTypes:**
179
+
180
+ | assertType | Use when | selector | value |
181
+ |------------|----------|----------|-------|
182
+ | \`text_visible\` | Stable text appears on page | (empty) | text to find |
183
+ | \`text_not_visible\` | Text should be gone | (empty) | text that should be absent |
184
+ | \`element_visible\` | Element is visible | CSS selector | (empty) |
185
+ | \`element_not_visible\` | Element is hidden/removed | CSS selector | (empty) |
186
+ | \`url_contains\` | URL contains a pattern | (empty) | URL substring |
187
+ | \`element_enabled\` | Button/input is enabled | CSS selector | (empty) |
188
+ | \`element_disabled\` | Button/input is disabled | CSS selector | (empty) |
189
+ | \`element_checked\` | Checkbox is checked | CSS selector | (empty) |
190
+ | \`element_unchecked\` | Checkbox is unchecked | CSS selector | (empty) |
191
+
192
+ **Stability rules — CRITICAL:**
193
+ - **NEVER** assert on: timestamps (dates, times), session IDs, exact numeric counts that vary between runs
194
+ - For dynamic counts (e.g. "42 results"): assert on the STABLE part only (e.g. "results"), not the number
195
+ - **PREFER** asserting on: status text, button labels, URL patterns, element enabled/disabled state
196
+
197
+ **Page context rules — CRITICAL:**
198
+ - After a page navigation (\`open\` or \`click\` that navigates), take a **fresh snapshot** BEFORE emitting any assertions
199
+ - Only assert on text/elements that are visible on the **current** page — never assert on text from the previous page
200
+ - If you navigated away from a page, its text is gone — do not emit \`text_visible\` for it
201
+
202
+ **Selector rules for assert actions — CRITICAL:**
203
+ - Use the **same ALLOWED formats** as browser actions — never invent aria-label values
204
+ - Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
205
+ - When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector
206
+ - For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
207
+
208
+ **Examples:**
209
+ \`\`\`
210
+ AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
211
+ AB_ACTION|assert|element_disabled|.btn-submit||Submit button disabled before form is valid
212
+ AB_ACTION|assert|element_enabled|.btn-submit||Submit button enabled after form is filled
213
+ AB_ACTION|assert|text_visible|||Loading|Operation started
214
+ AB_ACTION|assert|text_visible|||Done|Operation completed
215
+ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
216
+ \`\`\`
217
+
218
+ ## Status Protocol
219
+
220
+ Emit exactly one status line per step (outside any code block):
221
+
222
+ \`\`\`
223
+ STEP_START|<step-id>|<step-title>
224
+ STEP_DONE|<step-id>|<what was verified>
225
+ ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
226
+ STEP_SKIPPED|<step-id>|<reason>
227
+ RUN_COMPLETED|passed|<summary>
228
+ RUN_COMPLETED|failed|<summary>
229
+ \`\`\`
230
+
231
+ ## Route Recording
232
+
233
+ After each step (outside any code block):
234
+
235
+ \`\`\`
236
+ ROUTE_STEP|<step-id>|<step-title>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
237
+ \`\`\`
238
+
239
+ ## Start
240
+
241
+ ${skipCookiesClear ? `A setup procedure has already been executed in this session. Do NOT clear cookies — keep the existing session state.
242
+
243
+ \`\`\`bash
244
+ agent-browser --session ${sessionName} open ${spec.baseUrl}
245
+ \`\`\`
246
+
247
+ Emit:
248
+ \`\`\`
249
+ AB_ACTION|open|${spec.baseUrl}
250
+ \`\`\`` : `\`\`\`bash
251
+ agent-browser --session ${sessionName} cookies clear
252
+ agent-browser --session ${sessionName} open ${spec.baseUrl}
253
+ \`\`\`
254
+
255
+ Emit:
256
+ \`\`\`
257
+ AB_ACTION|cookies_clear
258
+ AB_ACTION|open|${spec.baseUrl}
259
+ \`\`\``}
260
+
261
+ Then emit \`STEP_START|step-01|...\` and begin.`;
262
+ }
263
+ function buildTracePrompt(spec) {
264
+ return `Execute the test for "${spec.title}" at ${spec.baseUrl}.`;
265
+ }
266
+ function buildSetupTraceSystemPrompt(spec) {
267
+ return buildTraceSystemPrompt({
268
+ title: spec.title,
269
+ baseUrl: "about:blank",
270
+ steps: spec.steps
271
+ });
272
+ }
273
+ function buildSetupTracePrompt(spec) {
274
+ return `Execute the setup procedure "${spec.title}". Follow each step precisely.`;
275
+ }
276
+ //#endregion
277
+ //#region src/cli/logger.ts
278
+ const STEP_ICONS = {
279
+ STEP_START: "▶",
280
+ STEP_DONE: "✓",
281
+ ASSERTION_FAILED: "✗",
282
+ STEP_SKIPPED: "⊘",
283
+ RUN_COMPLETED: "■"
284
+ };
285
+ function header(command, target) {
286
+ process.stdout.write(`\nccqa ${command}${target ? ` ${target}` : ""}\n\n`);
287
+ }
288
+ function meta(key, value) {
289
+ process.stdout.write(` ${key}: ${value}\n`);
290
+ }
291
+ function blank() {
292
+ process.stdout.write("\n");
293
+ }
294
+ function info(message) {
295
+ process.stdout.write(`${message}\n`);
296
+ }
297
+ function step(type, stepId, detail) {
298
+ process.stdout.write(` ${STEP_ICONS[type]} [${stepId}] ${detail}\n`);
299
+ }
300
+ function bash(command) {
301
+ process.stdout.write(` $ ${command.slice(0, 120)}\n`);
302
+ }
303
+ function error(message) {
304
+ process.stderr.write(`error: ${message}\n`);
305
+ }
306
+ function warn(message) {
307
+ process.stderr.write(`warn: ${message}\n`);
308
+ }
309
+ function hint(message) {
310
+ process.stdout.write(`\nhint: ${message}\n`);
311
+ }
312
+ //#endregion
313
+ //#region src/claude/invoke.ts
314
+ async function invokeClaudeStreaming(options, onEvent) {
315
+ const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, onAbAction, onAbActionFailed } = options;
316
+ let lastAbToolUseId = null;
317
+ const sdkOptions = {
318
+ systemPrompt,
319
+ maxTurns,
320
+ allowedTools: allowedTools ?? ["Bash(*)"],
321
+ permissionMode: "bypassPermissions",
322
+ allowDangerouslySkipPermissions: true,
323
+ ...env ? { env: {
324
+ ...process.env,
325
+ ...env
326
+ } } : {},
327
+ ...disableBuiltinTools ? { tools: [] } : {},
328
+ hooks: onAbAction || onAbActionFailed ? {
329
+ PreToolUse: [{ hooks: [async (input) => {
330
+ if (input.hook_event_name !== "PreToolUse") return {};
331
+ if (input.tool_name !== "Bash") return {};
332
+ const cmd = input.tool_input?.["command"];
333
+ if (typeof cmd !== "string") return {};
334
+ if (isBlockedAbSubcommand(cmd)) return {
335
+ decision: "block",
336
+ reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait. Take a fresh snapshot to find the correct selector."
337
+ };
338
+ if (hasRefSelector(cmd)) return {
339
+ decision: "block",
340
+ reason: "@ref selectors (like @e14) are session-specific and change every run. They cannot be used in generated tests. Use one of the allowed selector formats instead: [aria-label='...'], text=..., [placeholder='...'], or [type='password']. Take a fresh snapshot and find the element's aria-label or visible text."
341
+ };
342
+ const ab = extractAbActionFromBashCommand(cmd);
343
+ if (ab && onAbAction) {
344
+ lastAbToolUseId = input.tool_use_id;
345
+ onAbAction(ab);
346
+ } else lastAbToolUseId = null;
347
+ return {};
348
+ }] }],
349
+ PostToolUseFailure: [{ hooks: [async (input) => {
350
+ if (input.hook_event_name !== "PostToolUseFailure") return {};
351
+ if (input.tool_name !== "Bash") return {};
352
+ if (input.tool_use_id === lastAbToolUseId && onAbActionFailed) {
353
+ onAbActionFailed();
354
+ lastAbToolUseId = null;
355
+ }
356
+ return {};
357
+ }] }]
358
+ } : void 0
359
+ };
360
+ let result = "";
361
+ let isError = false;
362
+ const q = await buildMessageStream(prompt, sdkOptions);
363
+ for await (const msg of q) {
364
+ onEvent(msg);
365
+ if (msg.type === "assistant") {
366
+ for (const block of msg.message.content ?? []) if (block.type === "tool_use" && block.name === "Bash") {
367
+ const cmd = block.input?.["command"];
368
+ if (typeof cmd === "string") bash(cmd);
369
+ }
370
+ }
371
+ if (msg.type === "result") {
372
+ result = msg.subtype === "success" ? msg.result : "";
373
+ isError = msg.is_error ?? false;
374
+ }
375
+ }
376
+ return {
377
+ result,
378
+ isError
379
+ };
380
+ }
381
+ const BLOCKED_AB_SUBCOMMANDS = new Set([
382
+ "eval",
383
+ "js",
384
+ "find",
385
+ "label",
386
+ "textbox"
387
+ ]);
388
+ /**
389
+ * Shell-aware tokenizer: splits a command string into tokens respecting single/double quotes.
390
+ * e.g. `click "[role='dialog'] button:last-child"` → ["click", "[role='dialog'] button:last-child"]
391
+ */
392
+ function shellTokenize(s) {
393
+ const tokens = [];
394
+ let cur = "";
395
+ let quote = null;
396
+ for (let i = 0; i < s.length; i++) {
397
+ const ch = s[i];
398
+ if (quote) if (ch === quote) quote = null;
399
+ else cur += ch;
400
+ else if (ch === "\"" || ch === "'") quote = ch;
401
+ else if (ch === " " || ch === " ") {
402
+ if (cur) {
403
+ tokens.push(cur);
404
+ cur = "";
405
+ }
406
+ } else cur += ch;
407
+ }
408
+ if (cur) tokens.push(cur);
409
+ return tokens;
410
+ }
411
+ /** Extracts the subcommand from an `agent-browser [flags] <subcommand> [args...]` command string. */
412
+ function extractAbSubcommand(cmd) {
413
+ const abIdx = cmd.indexOf("agent-browser");
414
+ if (abIdx === -1) return null;
415
+ const parts = shellTokenize(cmd.slice(abIdx + 13).trim());
416
+ let i = 0;
417
+ while (i < parts.length && parts[i].startsWith("-")) i += 2;
418
+ return parts[i] ?? null;
419
+ }
420
+ /** Returns true if the agent-browser subcommand is blocked (eval/js/find/etc). */
421
+ function isBlockedAbSubcommand(cmd) {
422
+ const sub = extractAbSubcommand(cmd);
423
+ return sub !== null && BLOCKED_AB_SUBCOMMANDS.has(sub);
424
+ }
425
+ /** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
426
+ function hasRefSelector(cmd) {
427
+ const abIdx = cmd.indexOf("agent-browser");
428
+ if (abIdx === -1) return false;
429
+ const parts = shellTokenize(cmd.slice(abIdx + 13).trim());
430
+ let i = 0;
431
+ while (i < parts.length && parts[i].startsWith("-")) i += 2;
432
+ i++;
433
+ for (; i < parts.length; i++) if (/^@/.test(parts[i])) return true;
434
+ return false;
435
+ }
436
+ /**
437
+ * Parse an `agent-browser --session <name> <cmd> [args...]` bash command
438
+ * and return the corresponding AB_ACTION line, or null if not an agent-browser call.
439
+ */
440
+ function extractAbActionFromBashCommand(cmd) {
441
+ const subCmd = extractAbSubcommand(cmd);
442
+ if (!subCmd) return null;
443
+ const abIdx = cmd.indexOf("agent-browser");
444
+ const parts = shellTokenize(cmd.slice(abIdx + 13).trim()).filter((t) => !/^(2?>|[|&>])/.test(t));
445
+ let i = 0;
446
+ while (i < parts.length && parts[i].startsWith("-")) i += 2;
447
+ const args = parts.slice(i + 1);
448
+ switch (subCmd) {
449
+ case "cookies":
450
+ if (args[0] === "clear") return "AB_ACTION|cookies_clear";
451
+ return null;
452
+ case "open": return `AB_ACTION|open|${args[0] ?? ""}`;
453
+ case "press": return `AB_ACTION|press|${args[0] ?? ""}`;
454
+ case "scroll": return `AB_ACTION|scroll|${args.join("|")}`;
455
+ case "click":
456
+ case "dblclick":
457
+ case "check":
458
+ case "uncheck":
459
+ case "hover":
460
+ case "wait": return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}`;
461
+ case "fill":
462
+ case "type":
463
+ case "select": return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
464
+ case "drag": return `AB_ACTION|drag|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
465
+ case "snapshot": return null;
466
+ default: return null;
467
+ }
468
+ }
469
+ async function buildMessageStream(prompt, options) {
470
+ const mockFile = process.env["CCQA_CLAUDE_MOCK_FILE"];
471
+ if (mockFile) return replayMockMessages(mockFile);
472
+ return query({
473
+ prompt,
474
+ options
475
+ });
476
+ }
477
+ async function* replayMockMessages(path) {
478
+ const raw = await readFile(path, "utf8");
479
+ for (const line of raw.split("\n")) {
480
+ const trimmed = line.trim();
481
+ if (!trimmed) continue;
482
+ yield JSON.parse(trimmed);
483
+ }
484
+ }
485
+ //#endregion
486
+ //#region src/store/index.ts
487
+ const CCQA_DIR = ".ccqa";
488
+ function getCcqaDir(cwd = process.cwd()) {
489
+ return join(cwd, CCQA_DIR);
490
+ }
491
+ function parseSpecPath(specPath) {
492
+ const parts = specPath.split("/");
493
+ if (parts.length !== 2 || !parts[0] || !parts[1]) throw new Error(`Invalid spec path: "${specPath}". Expected format: "<feature>/<spec>"`);
494
+ return {
495
+ featureName: parts[0],
496
+ specName: parts[1]
497
+ };
498
+ }
499
+ function getFeatureDir(featureName, cwd) {
500
+ return join(getCcqaDir(cwd), "features", featureName);
501
+ }
502
+ function getSpecDir(featureName, specName, cwd) {
503
+ return join(getFeatureDir(featureName, cwd), "test-cases", specName);
504
+ }
505
+ async function ensureCcqaDir(cwd) {
506
+ await mkdir(join(getCcqaDir(cwd), "features"), { recursive: true });
507
+ }
508
+ async function readSpecFile(featureName, specName, cwd) {
509
+ const specPath = join(getSpecDir(featureName, specName, cwd), "test-spec.md");
510
+ return readFile(specPath, "utf-8").catch(() => {
511
+ throw new Error(`Spec file not found: ${specPath}`);
512
+ });
513
+ }
514
+ async function saveRoute(featureName, specName, route, cwd) {
515
+ const specDir = getSpecDir(featureName, specName, cwd);
516
+ await mkdir(specDir, { recursive: true });
517
+ const routePath = join(specDir, "route.md");
518
+ await writeFile(routePath, routeToMarkdown(route), "utf-8");
519
+ return routePath;
520
+ }
521
+ async function saveTraceActions(featureName, specName, actions, cwd) {
522
+ const specDir = getSpecDir(featureName, specName, cwd);
523
+ await mkdir(specDir, { recursive: true });
524
+ const actionsPath = join(specDir, "actions.json");
525
+ await writeFile(actionsPath, JSON.stringify(actions, null, 2), "utf-8");
526
+ return actionsPath;
527
+ }
528
+ function getSetupDir(name, cwd) {
529
+ return join(getCcqaDir(cwd), "setups", name);
530
+ }
531
+ async function readSetupSpecFile(name, cwd) {
532
+ const specPath = join(getSetupDir(name, cwd), "setup-spec.md");
533
+ return readFile(specPath, "utf-8").catch(() => {
534
+ throw new Error(`Setup spec not found: ${specPath}`);
535
+ });
536
+ }
537
+ async function saveSetupActions(name, actions, cwd) {
538
+ const dir = getSetupDir(name, cwd);
539
+ await mkdir(dir, { recursive: true });
540
+ const path = join(dir, "actions.json");
541
+ await writeFile(path, JSON.stringify(actions, null, 2), "utf-8");
542
+ return path;
543
+ }
544
+ async function getSetupActions(name, cwd) {
545
+ const path = join(getSetupDir(name, cwd), "actions.json");
546
+ const content = await readFile(path, "utf-8").catch(() => {
547
+ throw new Error(`No setup actions found for: ${name}. Run \`ccqa trace-setup ${name}\` first.`);
548
+ });
549
+ return {
550
+ path,
551
+ actions: JSON.parse(content)
552
+ };
553
+ }
554
+ async function saveSetupRoute(name, route, cwd) {
555
+ const dir = getSetupDir(name, cwd);
556
+ await mkdir(dir, { recursive: true });
557
+ const routePath = join(dir, "route.md");
558
+ await writeFile(routePath, routeToMarkdown(route), "utf-8");
559
+ return routePath;
560
+ }
561
+ async function getTraceActions(featureName, specName, cwd) {
562
+ const path = join(getSpecDir(featureName, specName, cwd), "actions.json");
563
+ const content = await readFile(path, "utf-8").catch(() => {
564
+ throw new Error(`No trace actions found for spec: ${featureName}/${specName}. Run \`ccqa trace\` first.`);
565
+ });
566
+ return {
567
+ path,
568
+ actions: JSON.parse(content)
569
+ };
570
+ }
571
+ async function saveTestScript(featureName, specName, content, cwd) {
572
+ const specDir = getSpecDir(featureName, specName, cwd);
573
+ await mkdir(specDir, { recursive: true });
574
+ const scriptPath = join(specDir, "test.spec.ts");
575
+ await writeFile(scriptPath, content, "utf-8");
576
+ return scriptPath;
577
+ }
578
+ async function getTestScript(featureName, specName, cwd) {
579
+ const path = join(getSpecDir(featureName, specName, cwd), "test.spec.ts");
580
+ return stat(path).then(() => path).catch(() => null);
581
+ }
582
+ async function listAllSpecs(cwd) {
583
+ const featuresDir = join(getCcqaDir(cwd), "features");
584
+ const featureDirs = await readdir(featuresDir).catch(() => []);
585
+ return (await Promise.all(featureDirs.map(async (featureName) => {
586
+ const testCasesDir = join(featuresDir, featureName, "test-cases");
587
+ const specDirs = await readdir(testCasesDir).catch(() => []);
588
+ return (await Promise.all(specDirs.map(async (specName) => {
589
+ return await stat(join(testCasesDir, specName, "test.spec.ts")).then(() => true).catch(() => false) ? {
590
+ featureName,
591
+ specName
592
+ } : null;
593
+ }))).filter((e) => e !== null);
594
+ }))).flat();
595
+ }
596
+ async function listSpecsForFeature(featureName, cwd) {
597
+ return readdir(join(getFeatureDir(featureName, cwd), "test-cases")).catch(() => []);
598
+ }
599
+ function routeToMarkdown(route) {
600
+ const lines = [
601
+ "---",
602
+ `specName: "${route.specName}"`,
603
+ `timestamp: "${route.timestamp}"`,
604
+ `status: "${route.status}"`,
605
+ "---",
606
+ ""
607
+ ];
608
+ for (const step of route.steps) {
609
+ lines.push(`## ${step.title}`);
610
+ lines.push(`- **action**: ${step.action}`);
611
+ lines.push(`- **observation**: ${step.observation}`);
612
+ lines.push(`- **status**: ${step.status}`);
613
+ if (step.reason) lines.push(`- **reason**: ${step.reason}`);
614
+ lines.push("");
615
+ }
616
+ return lines.join("\n");
617
+ }
618
+ //#endregion
619
+ //#region src/spec/parser.ts
620
+ function parseTestSpec(content) {
621
+ const { data, content: body } = matter(content);
622
+ const steps = parseSteps(body);
623
+ const prerequisites = parsePrerequisites(body);
624
+ return {
625
+ title: String(data["title"] ?? "Untitled"),
626
+ baseUrl: String(data["baseUrl"] ?? "http://localhost:3000"),
627
+ prerequisites: prerequisites || void 0,
628
+ setups: parseSetupRefs(data["setups"]),
629
+ steps
630
+ };
631
+ }
632
+ function parseSetupSpec(content) {
633
+ const { data, content: body } = matter(content);
634
+ const steps = parseSteps(body);
635
+ const placeholders = parsePlaceholders(data["placeholders"]);
636
+ return {
637
+ title: String(data["title"] ?? "Untitled"),
638
+ placeholders: Object.keys(placeholders).length > 0 ? placeholders : void 0,
639
+ steps
640
+ };
641
+ }
642
+ function parsePlaceholders(raw) {
643
+ if (!raw || typeof raw !== "object") return {};
644
+ const result = {};
645
+ for (const [key, val] of Object.entries(raw)) if (val && typeof val === "object" && "dummy" in val) {
646
+ const v = val;
647
+ result[key] = {
648
+ dummy: String(v["dummy"]),
649
+ description: v["description"] ? String(v["description"]) : void 0
650
+ };
651
+ }
652
+ return result;
653
+ }
654
+ function parseSetupRefs(raw) {
655
+ if (!Array.isArray(raw)) return void 0;
656
+ const refs = [];
657
+ for (const item of raw) if (typeof item === "object" && item !== null && "name" in item) {
658
+ const i = item;
659
+ refs.push({
660
+ name: String(i["name"]),
661
+ params: i["params"] && typeof i["params"] === "object" ? Object.fromEntries(Object.entries(i["params"]).map(([k, v]) => [k, String(v)])) : void 0
662
+ });
663
+ }
664
+ return refs.length > 0 ? refs : void 0;
665
+ }
666
+ function parsePrerequisites(body) {
667
+ const match = body.match(/##\s+Prerequisites\s+([\s\S]*?)(?=##|$)/);
668
+ if (!match || !match[1]) return null;
669
+ return match[1].trim();
670
+ }
671
+ function parseSteps(body) {
672
+ const stepBlocks = body.split(/###\s+Step\s+\d+:/);
673
+ const steps = [];
674
+ for (let i = 1; i < stepBlocks.length; i++) {
675
+ const block = stepBlocks[i];
676
+ if (!block) continue;
677
+ const titleMatch = block.match(/^(.+)/);
678
+ const instructionMatch = block.match(/\*\*Instruction\*\*:\s*(.+)/);
679
+ const expectedMatch = block.match(/\*\*Expected\*\*:\s*(.+)/);
680
+ if (!titleMatch || !instructionMatch || !expectedMatch) continue;
681
+ steps.push({
682
+ id: `step-${String(i).padStart(2, "0")}`,
683
+ title: titleMatch[1]?.trim() ?? "",
684
+ instruction: instructionMatch[1]?.trim() ?? "",
685
+ expected: expectedMatch[1]?.trim() ?? ""
686
+ });
687
+ }
688
+ return steps;
689
+ }
690
+ //#endregion
691
+ //#region src/runtime/bundled-config.ts
692
+ const CANDIDATES = [
693
+ "../runtime/vitest.config.mjs",
694
+ "./vitest.config.mjs",
695
+ "./vitest.config.ts"
696
+ ];
697
+ function bundledVitestConfigPath() {
698
+ for (const rel of CANDIDATES) {
699
+ const candidate = fileURLToPath(new URL(rel, import.meta.url));
700
+ try {
701
+ accessSync(candidate);
702
+ return candidate;
703
+ } catch {}
704
+ }
705
+ return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
706
+ }
707
+ //#endregion
708
+ //#region src/runtime/spawn-vitest.ts
709
+ const require = createRequire(import.meta.url);
710
+ function resolveVitestBin() {
711
+ const pkgPath = require.resolve("vitest/package.json");
712
+ const pkg = require(pkgPath);
713
+ const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
714
+ if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
715
+ return resolve(dirname(pkgPath), binRel);
716
+ }
717
+ async function spawnVitestCaptured(args, opts = {}) {
718
+ const child = spawnVitestChild(args, opts, "pipe");
719
+ const [stdout, stderr, exitCode] = await Promise.all([
720
+ drain(child.stdout),
721
+ drain(child.stderr),
722
+ waitExit(child)
723
+ ]);
724
+ return {
725
+ exitCode,
726
+ stdout,
727
+ stderr
728
+ };
729
+ }
730
+ function spawnVitestStreaming(args, opts = {}) {
731
+ const child = spawnVitestChild(args, opts, "pipe");
732
+ return {
733
+ child,
734
+ stdout: child.stdout,
735
+ stderr: child.stderr,
736
+ exited: waitExit(child)
737
+ };
738
+ }
739
+ function spawnVitestChild(args, opts, stdio) {
740
+ const vitestBin = resolveVitestBin();
741
+ return spawn(process.execPath, [vitestBin, ...args], {
742
+ cwd: opts.cwd,
743
+ env: opts.env ?? process.env,
744
+ stdio: [
745
+ "ignore",
746
+ stdio,
747
+ stdio
748
+ ]
749
+ });
750
+ }
751
+ async function drain(stream) {
752
+ stream.setEncoding("utf8");
753
+ let buf = "";
754
+ for await (const chunk of stream) buf += chunk;
755
+ return buf;
756
+ }
757
+ function waitExit(child) {
758
+ return new Promise((resolvePromise, rejectPromise) => {
759
+ child.once("exit", (code) => resolvePromise(code ?? 0));
760
+ child.once("error", rejectPromise);
761
+ });
762
+ }
763
+ //#endregion
764
+ //#region src/cli/trace.ts
765
+ const traceCommand = new Command("trace").argument("<feature/spec>", "Spec to trace (e.g. tasks/create-and-complete)").description("Run agent-browser, verify assertions, and record structured actions").action(async (specPath) => {
766
+ const { featureName, specName } = parseSpecPath(specPath);
767
+ await runTrace(featureName, specName);
768
+ });
769
+ async function runTrace(featureName, specName) {
770
+ header("trace", `${featureName}/${specName}`);
771
+ await ensureCcqaDir();
772
+ const spec = parseTestSpec(await readSpecFile(featureName, specName));
773
+ const hasSetups = (spec.setups?.length ?? 0) > 0;
774
+ meta("spec", spec.title);
775
+ meta("url", spec.baseUrl);
776
+ if (hasSetups) meta("setups", spec.setups.map((s) => s.name).join(", "));
777
+ meta("steps", spec.steps.length);
778
+ blank();
779
+ const sessionName = generateSessionName();
780
+ if (hasSetups) {
781
+ info("Running setup procedures...");
782
+ await runSetups(spec.setups, sessionName);
783
+ blank();
784
+ }
785
+ const systemPrompt = buildTraceSystemPrompt(spec, {
786
+ sessionName,
787
+ skipCookiesClear: hasSetups
788
+ });
789
+ const prompt = buildTracePrompt(spec);
790
+ info("Running agent-browser session...");
791
+ blank();
792
+ const routeSteps = [];
793
+ let overallStatus = "passed";
794
+ const traceActions = [];
795
+ const { isError } = await invokeClaudeStreaming({
796
+ prompt,
797
+ systemPrompt,
798
+ allowedTools: [
799
+ "Bash(*)",
800
+ "Read",
801
+ "Grep",
802
+ "Glob"
803
+ ],
804
+ env: { AGENT_BROWSER_SESSION: sessionName },
805
+ onAbAction: (abAction) => {
806
+ const action = parseAbAction(abAction);
807
+ if (action) traceActions.push(action);
808
+ },
809
+ onAbActionFailed: () => {
810
+ traceActions.pop();
811
+ }
812
+ }, (msg) => {
813
+ if (msg.type !== "assistant") return;
814
+ for (const block of msg.message.content ?? []) {
815
+ if (block.type !== "text" || !block.text) continue;
816
+ const text = block.text;
817
+ const statusLine = parseStatusLine(text);
818
+ if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
819
+ for (const line of text.split("\n")) {
820
+ const trimmed = line.trim();
821
+ if (trimmed.startsWith("ROUTE_STEP|")) {
822
+ const routeStep = parseRouteStep(trimmed);
823
+ if (routeStep) {
824
+ routeSteps.push(routeStep);
825
+ if (routeStep.status === "FAILED") overallStatus = "failed";
826
+ }
827
+ } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
828
+ const action = parseAbAction(trimmed);
829
+ if (action) traceActions.push(action);
830
+ }
831
+ }
832
+ }
833
+ });
834
+ if (isError) overallStatus = "failed";
835
+ const route = {
836
+ specName,
837
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
838
+ status: overallStatus,
839
+ steps: routeSteps
840
+ };
841
+ const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, traceActions)]);
842
+ blank();
843
+ meta("route", routePath);
844
+ meta("saved", actionsPath);
845
+ meta("actions", traceActions.length);
846
+ meta("status", overallStatus.toUpperCase());
847
+ hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
848
+ }
849
+ /**
850
+ * Execute setup procedures by running their test.spec.ts via vitest with a fixed session name.
851
+ * Creates a temporary runner script that sets the session and imports each setup's test body.
852
+ */
853
+ async function runSetups(setups, sessionName) {
854
+ for (const ref of setups) {
855
+ info(` setup: ${ref.name}`);
856
+ const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
857
+ let script = await readFile(scriptPath, "utf-8").catch(() => {
858
+ throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
859
+ });
860
+ for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, value);
861
+ script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
862
+ const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
863
+ await writeFile(tmpPath, script, "utf-8");
864
+ try {
865
+ const { exitCode, stdout, stderr } = await spawnVitestCaptured([
866
+ "run",
867
+ "--config",
868
+ bundledVitestConfigPath(),
869
+ tmpPath
870
+ ]);
871
+ process.stdout.write(stdout);
872
+ if (stderr) process.stderr.write(stderr);
873
+ if (exitCode !== 0) throw new Error(`Setup '${ref.name}' failed (exit ${exitCode})`);
874
+ } finally {
875
+ await unlink(tmpPath).catch(() => {});
876
+ }
877
+ }
878
+ }
879
+ function parseStatusLine(text) {
880
+ for (const line of text.split("\n")) {
881
+ const match = line.match(/^(STEP_START|STEP_DONE|ASSERTION_FAILED|STEP_SKIPPED|RUN_COMPLETED)\|([^|]*)\|(.*)$/);
882
+ if (match) return {
883
+ type: match[1],
884
+ stepId: match[2] ?? "",
885
+ detail: match[3] ?? ""
886
+ };
887
+ }
888
+ return null;
889
+ }
890
+ function parseRouteStep(line) {
891
+ const parts = line.split("|");
892
+ if (parts.length < 6) return null;
893
+ const title = parts[2] ?? "";
894
+ const action = (parts[3] ?? "").replace(/^ACTION:/, "").trim();
895
+ const observation = (parts[4] ?? "").replace(/^OBSERVATION:/, "").trim();
896
+ const statusRaw = (parts[5] ?? "").replace(/^STATUS:/, "").trim();
897
+ return {
898
+ title,
899
+ action,
900
+ observation,
901
+ status: [
902
+ "PASSED",
903
+ "FAILED",
904
+ "SKIPPED"
905
+ ].find((s) => s === statusRaw) ?? "FAILED"
906
+ };
907
+ }
908
+ function parseAbAction(line) {
909
+ if (!line.startsWith("AB_ACTION|")) return null;
910
+ const parts = line.split("|");
911
+ const command = parts[1];
912
+ switch (command) {
913
+ case "cookies_clear": return { command };
914
+ case "open": return {
915
+ command,
916
+ value: parts[2]
917
+ };
918
+ case "press": return {
919
+ command,
920
+ value: parts[2]
921
+ };
922
+ case "scroll": return {
923
+ command,
924
+ direction: parts[2],
925
+ pixels: parts[3]
926
+ };
927
+ case "snapshot": return {
928
+ command,
929
+ observation: parts[2]
930
+ };
931
+ case "assert": return {
932
+ command,
933
+ assertType: parts[2],
934
+ selector: parts[3] || void 0,
935
+ value: parts[4] || void 0,
936
+ observation: parts[5] || void 0
937
+ };
938
+ case "click":
939
+ case "dblclick":
940
+ case "check":
941
+ case "uncheck":
942
+ case "hover": return {
943
+ command,
944
+ selector: parts[2],
945
+ label: parts[3]
946
+ };
947
+ case "wait": {
948
+ const isTextWait = parts[2] === "--text";
949
+ return {
950
+ command,
951
+ selector: isTextWait ? `text=${parts[3]}` : parts[2],
952
+ label: isTextWait ? parts[4] : parts[3]
953
+ };
954
+ }
955
+ case "fill":
956
+ case "type":
957
+ case "select": return {
958
+ command,
959
+ selector: parts[2],
960
+ value: parts[3],
961
+ label: parts[4]
962
+ };
963
+ case "drag": return {
964
+ command,
965
+ selector: parts[2],
966
+ target: parts[3],
967
+ label: parts[4]
968
+ };
969
+ default: return null;
970
+ }
971
+ }
972
+ //#endregion
973
+ //#region src/codegen/actions-to-script.ts
974
+ function actionsToScript(actions, title, setupScripts) {
975
+ const parts = [...[
976
+ `import { test } from "vitest";`,
977
+ `import { spawnSync } from "node:child_process";`,
978
+ `import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from "ccqa/test-helpers";`,
979
+ "",
980
+ `// Single session shared across all tests — reset per run via cookies clear in first test`,
981
+ `process.env.AGENT_BROWSER_SESSION = \`ccqa-run-\${Date.now()}\`;`,
982
+ ""
983
+ ]];
984
+ if (setupScripts?.length) for (const setup of setupScripts) parts.push(`test("setup: ${setup.name}", () => {`, setup.body, "}, 3 * 60 * 1000);", "");
985
+ const body = actionsToLines(actions).map((l) => ` ${l}`).join("\n");
986
+ parts.push(`test(${JSON.stringify(title)}, () => {`, body, "}, 5 * 60 * 1000);", "");
987
+ return parts.join("\n");
988
+ }
989
+ /** Commands that interact with page elements and need the page to be loaded */
990
+ const ELEMENT_COMMANDS = new Set([
991
+ "click",
992
+ "dblclick",
993
+ "fill",
994
+ "type",
995
+ "check",
996
+ "uncheck",
997
+ "select",
998
+ "hover",
999
+ "drag"
1000
+ ]);
1001
+ function actionsToLines(actions) {
1002
+ const lines = [];
1003
+ let prevLine = null;
1004
+ let prevCommand = null;
1005
+ for (const action of actions) {
1006
+ const line = actionToLine(action);
1007
+ if (line === null) continue;
1008
+ if (line === prevLine) continue;
1009
+ if (prevCommand === "open" && ELEMENT_COMMANDS.has(action.command)) lines.push(`spawnSync("sleep", ["3"], { stdio: "inherit" });`);
1010
+ lines.push(line);
1011
+ prevLine = line;
1012
+ prevCommand = action.command;
1013
+ }
1014
+ return lines;
1015
+ }
1016
+ /** Returns true if a selector is a session-specific @ref that cannot be replayed. */
1017
+ function isRefSelector(selector) {
1018
+ return typeof selector === "string" && /^@/.test(selector.trim());
1019
+ }
1020
+ function actionToLine(action) {
1021
+ if ("selector" in action && isRefSelector(action.selector)) return null;
1022
+ switch (action.command) {
1023
+ case "cookies_clear": return `ab("cookies", "clear");`;
1024
+ case "open": return `ab("open", ${j((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
1025
+ case "snapshot": return action.observation ? `// ${action.observation}` : null;
1026
+ case "click": return `ab("click", ${j(action.selector)});`;
1027
+ case "dblclick": return `ab("dblclick", ${j(action.selector)});`;
1028
+ case "fill": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
1029
+ case "type": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
1030
+ case "check": return `ab("check", ${j(action.selector)});`;
1031
+ case "uncheck": return `ab("uncheck", ${j(action.selector)});`;
1032
+ case "press": return `ab("press", ${j(action.value)});`;
1033
+ case "select": return `ab("select", ${j(action.selector)}, ${j(action.value)});`;
1034
+ case "hover": return `ab("hover", ${j(action.selector)});`;
1035
+ case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
1036
+ case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
1037
+ case "wait": {
1038
+ const sel = action.selector;
1039
+ if (/^\d+$/.test(sel)) return `spawnSync("sleep", [${j(sel)}], { stdio: "inherit" });`;
1040
+ return `abWait(${j(sel)});`;
1041
+ }
1042
+ case "assert": {
1043
+ const val = action.value ?? action.observation;
1044
+ const sel = action.selector ?? action.observation;
1045
+ const comment = action.observation ? `// Assert: ${action.observation}` : null;
1046
+ let assertLine = null;
1047
+ switch (action.assertType) {
1048
+ case "text_visible":
1049
+ if (val) assertLine = `abAssertTextVisible(${j(val)});`;
1050
+ break;
1051
+ case "text_not_visible":
1052
+ if (val) assertLine = `abAssertNotVisible(${j("text=" + val)}, 180_000);`;
1053
+ break;
1054
+ case "element_visible":
1055
+ if (sel) assertLine = `abAssertVisible(${j(sel)});`;
1056
+ break;
1057
+ case "element_not_visible":
1058
+ if (sel) assertLine = `abAssertNotVisible(${j(sel)});`;
1059
+ break;
1060
+ case "url_contains":
1061
+ if (val) assertLine = `abAssertUrl(${j(val)});`;
1062
+ break;
1063
+ case "element_enabled":
1064
+ if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
1065
+ break;
1066
+ case "element_disabled":
1067
+ if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertDisabled(${j(sel)});`;
1068
+ break;
1069
+ case "element_checked":
1070
+ if (sel) assertLine = `abAssertChecked(${j(sel)});`;
1071
+ break;
1072
+ case "element_unchecked":
1073
+ if (sel) assertLine = `abAssertUnchecked(${j(sel)});`;
1074
+ break;
1075
+ }
1076
+ if (comment && assertLine) return `${comment}\n ${assertLine}`;
1077
+ return assertLine ?? comment;
1078
+ }
1079
+ default: return null;
1080
+ }
1081
+ }
1082
+ /** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
1083
+ const j = (s) => JSON.stringify(s);
1084
+ //#endregion
1085
+ //#region src/prompts/codegen.ts
1086
+ function buildAutoFixPrompt(script, failureLog) {
1087
+ return `You are analyzing a failing E2E test script. The test fails because some browser actions execute before the page has finished loading or navigating.
1088
+
1089
+ Your task: identify which line numbers need a sleep/wait inserted BEFORE them to fix timing issues.
1090
+
1091
+ ## Rules
1092
+ - ONLY identify lines where a sleep is needed — do NOT suggest any other changes
1093
+ - Common patterns that need a sleep:
1094
+ - After \`ab("open", ...)\` when the next line interacts with elements (fill, click, etc.)
1095
+ - After \`ab("press", "Enter")\` or \`ab("click", ...)\` when a page navigation occurs before the next action
1096
+ - After any action that triggers a redirect or page reload
1097
+ - Look at the error log to identify WHICH lines failed, then determine if a sleep before that line would fix it
1098
+ - If a \`spawnSync("sleep", ...)\` already exists before a failing line, suggest increasing its duration instead
1099
+ - Output ONLY a JSON array of objects, no explanation, no markdown code fences
1100
+
1101
+ ## Output format
1102
+ Each object has:
1103
+ - "line": the 1-based line number to insert a sleep BEFORE
1104
+ - "seconds": recommended sleep duration (typically 3-5)
1105
+ - "reason": very short explanation (e.g., "page navigation after form submit")
1106
+
1107
+ If a sleep already exists and needs to be increased:
1108
+ - "line": the line number of the existing sleep
1109
+ - "increase_to": the new duration in seconds
1110
+ - "reason": explanation
1111
+
1112
+ Example output:
1113
+ [{"line": 15, "seconds": 3, "reason": "page navigation after press Enter"}, {"line": 22, "increase_to": 5, "reason": "slow page load"}]
1114
+
1115
+ If no fixes are needed, return: []
1116
+
1117
+ ## Test Script (with line numbers)
1118
+ ${script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n")}
1119
+
1120
+ ## Failure Log
1121
+ ${failureLog.slice(0, 3e3)}`;
1122
+ }
1123
+ function buildCleanupPrompt(actions) {
1124
+ return `You are given a list of browser actions recorded during an E2E test trace.
1125
+ The trace contains noise: failed attempts, redundant retries, and duplicate operations recorded because the agent explored multiple strategies.
1126
+
1127
+ Your task: return a **cleaned-up JSON array** of TraceAction objects that represents the minimal, correct sequence of actions needed to reproduce the test.
1128
+
1129
+ Each TraceAction object has the following shape (use EXACTLY these field names):
1130
+ { "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "..." }
1131
+ Only include fields that are present in the original action. The "command" field is required. For assert actions, "assertType" is also required.
1132
+
1133
+ Rules:
1134
+ - Remove actions that were failed attempts superseded by a later successful action (e.g., if \`fill selector="text=Foo"\` was followed by \`fill selector="[placeholder='Foo']"\`, keep only the latter)
1135
+ - Remove duplicate fill operations on the same field (keep only the last successful fill for each field)
1136
+ - For \`click\` and \`fill\` actions: if the selector starts with \`text=\`, it is a failed attempt — remove it (text= selectors only work with the wait command, not click/fill)
1137
+ - Keep all snapshot actions — they serve as comments/observations in the generated test
1138
+ - Keep all assert actions — they are the test's verification points and must not be removed
1139
+ - Do NOT invent new actions or change values
1140
+ - Output ONLY a valid JSON array, no explanation, no markdown code fences
1141
+
1142
+ ## Recorded Actions
1143
+ ${actions.map((a, i) => {
1144
+ const parts = [`${i + 1}. ${a.command}`];
1145
+ if (a.assertType) parts.push(`assertType="${a.assertType}"`);
1146
+ if (a.selector) parts.push(`selector="${a.selector}"`);
1147
+ if (a.value) parts.push(`value="${a.value}"`);
1148
+ if (a.observation) parts.push(`→ ${a.observation}`);
1149
+ return parts.join(" ");
1150
+ }).join("\n")}`;
1151
+ }
1152
+ //#endregion
1153
+ //#region src/cli/generate.ts
1154
+ const generateCommand = new Command("generate").argument("<feature/spec>", "Spec to generate test for (e.g. tasks/create-and-complete)").description("Generate agent-browser test script from recorded trace actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").action(async (specPath, opts) => {
1155
+ const { featureName, specName } = parseSpecPath(specPath);
1156
+ await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10));
1157
+ });
1158
+ async function runGenerate(featureName, specName, maxRetries) {
1159
+ header("generate", `${featureName}/${specName}`);
1160
+ await ensureCcqaDir();
1161
+ const { path: actionsPath, actions } = await getTraceActions(featureName, specName);
1162
+ meta("trace", actionsPath);
1163
+ meta("actions", actions.length);
1164
+ const spec = parseTestSpec(await readSpecFile(featureName, specName));
1165
+ const setupScripts = await loadSetupScripts(spec.setups);
1166
+ if (setupScripts.length > 0) meta("setups", setupScripts.map((s) => s.name).join(", "));
1167
+ blank();
1168
+ const cleanedActions = await cleanupActions$1(actions);
1169
+ if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
1170
+ const scriptPath = await saveTestScript(featureName, specName, actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : void 0));
1171
+ meta("saved", scriptPath);
1172
+ blank();
1173
+ let { exitCode, output, currentScript } = await runVitest$1(scriptPath);
1174
+ if (exitCode === 0) {
1175
+ hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
1176
+ return;
1177
+ }
1178
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1179
+ info(`auto-fix attempt ${attempt}/${maxRetries}...`);
1180
+ blank();
1181
+ const fixed = await autoFixWithLLM$1(currentScript, output);
1182
+ if (!fixed) {
1183
+ warn("could not determine fix from failure log");
1184
+ break;
1185
+ }
1186
+ await writeFile(scriptPath, fixed, "utf-8");
1187
+ meta("saved", scriptPath);
1188
+ blank();
1189
+ ({exitCode, output, currentScript} = await runVitest$1(scriptPath));
1190
+ if (exitCode === 0) {
1191
+ hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
1192
+ return;
1193
+ }
1194
+ }
1195
+ warn("auto-fix exhausted — test still failing");
1196
+ process.exit(1);
1197
+ }
1198
+ /**
1199
+ * Load setup test scripts, extract test body, and replace {{placeholders}} with params values.
1200
+ */
1201
+ async function loadSetupScripts(setups) {
1202
+ if (!setups?.length) return [];
1203
+ const result = [];
1204
+ for (const ref of setups) {
1205
+ const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
1206
+ const resolved = replacePlaceholders(extractTestBody(await readFile(scriptPath, "utf-8").catch(() => {
1207
+ throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
1208
+ })), ref.params ?? {});
1209
+ result.push({
1210
+ name: ref.name,
1211
+ body: resolved
1212
+ });
1213
+ }
1214
+ return result;
1215
+ }
1216
+ /**
1217
+ * Extract the test body (lines inside the first test() block) from a setup test script.
1218
+ */
1219
+ function extractTestBody(script) {
1220
+ const lines = script.split("\n");
1221
+ const startIdx = lines.findIndex((l) => /^\s*test\(/.test(l));
1222
+ if (startIdx === -1) return "";
1223
+ const bodyLines = [];
1224
+ for (let i = startIdx + 1; i < lines.length; i++) {
1225
+ if (/^\s*\}[\s,);]/.test(lines[i])) break;
1226
+ bodyLines.push(lines[i]);
1227
+ }
1228
+ return bodyLines.join("\n");
1229
+ }
1230
+ function replacePlaceholders(body, params) {
1231
+ let result = body;
1232
+ for (const [key, value] of Object.entries(params)) result = result.replaceAll(`{{${key}}}`, value);
1233
+ return result;
1234
+ }
1235
+ async function autoFixWithLLM$1(script, failureLog) {
1236
+ try {
1237
+ const { result, isError } = await invokeClaudeStreaming({
1238
+ prompt: buildAutoFixPrompt(script, failureLog),
1239
+ disableBuiltinTools: true,
1240
+ maxTurns: 1
1241
+ }, () => {});
1242
+ if (isError || !result) return null;
1243
+ const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1244
+ const fixes = JSON.parse(json);
1245
+ if (!Array.isArray(fixes) || fixes.length === 0) return null;
1246
+ return applySleepFixes$1(script, fixes);
1247
+ } catch {
1248
+ return null;
1249
+ }
1250
+ }
1251
+ function applySleepFixes$1(script, fixes) {
1252
+ const lines = script.split("\n");
1253
+ for (const fix of fixes) if ("increase_to" in fix) {
1254
+ const idx = fix.line - 1;
1255
+ if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
1256
+ }
1257
+ const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
1258
+ for (const fix of inserts) {
1259
+ const idx = fix.line - 1;
1260
+ if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
1261
+ }
1262
+ return lines.join("\n");
1263
+ }
1264
+ async function runVitest$1(scriptPath) {
1265
+ const { exitCode, stdout, stderr } = await spawnVitestCaptured([
1266
+ "run",
1267
+ "--config",
1268
+ bundledVitestConfigPath(),
1269
+ scriptPath
1270
+ ]);
1271
+ const currentScript = await readFile(scriptPath, "utf8");
1272
+ process.stdout.write(stdout);
1273
+ if (stderr) process.stderr.write(stderr);
1274
+ return {
1275
+ exitCode,
1276
+ output: stdout + stderr,
1277
+ currentScript
1278
+ };
1279
+ }
1280
+ async function cleanupActions$1(actions) {
1281
+ try {
1282
+ const { result, isError } = await invokeClaudeStreaming({
1283
+ prompt: buildCleanupPrompt(actions),
1284
+ disableBuiltinTools: true,
1285
+ maxTurns: 1
1286
+ }, () => {});
1287
+ if (isError || !result) return actions;
1288
+ const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1289
+ const parsed = JSON.parse(json);
1290
+ if (Array.isArray(parsed) && parsed.length > 0) return parsed;
1291
+ } catch {}
1292
+ return actions;
1293
+ }
1294
+ //#endregion
1295
+ //#region src/cli/run.ts
1296
+ const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
1297
+ async function resolveVitestConfig() {
1298
+ try {
1299
+ await access(USER_VITEST_CONFIG);
1300
+ return USER_VITEST_CONFIG;
1301
+ } catch {
1302
+ return bundledVitestConfigPath();
1303
+ }
1304
+ }
1305
+ const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts").action(async (target) => {
1306
+ await runTests(target);
1307
+ });
1308
+ async function runTests(target) {
1309
+ header("run", target);
1310
+ const specs = await resolveSpecs(target);
1311
+ if (specs.length === 0) {
1312
+ error("no test scripts found");
1313
+ hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
1314
+ process.exit(1);
1315
+ }
1316
+ const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
1317
+ const summaries = [];
1318
+ let overallExitCode = 0;
1319
+ const vitestConfig = await resolveVitestConfig();
1320
+ try {
1321
+ for (let i = 0; i < specs.length; i++) {
1322
+ const { featureName, specName } = specs[i];
1323
+ const scriptFile = await getTestScript(featureName, specName);
1324
+ if (!scriptFile) {
1325
+ warn(`${featureName}/${specName}: no test.spec.ts found`);
1326
+ continue;
1327
+ }
1328
+ info(`▶ ${featureName}/${specName}`);
1329
+ meta("test", scriptFile);
1330
+ blank();
1331
+ const reportFile = join(tmpDir, `report-${i}.json`);
1332
+ const proc = spawnVitestStreaming([
1333
+ "run",
1334
+ "--config",
1335
+ vitestConfig,
1336
+ scriptFile,
1337
+ "--reporter=json",
1338
+ `--outputFile.json=${reportFile}`
1339
+ ]);
1340
+ await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
1341
+ const exitCode = await proc.exited;
1342
+ if (exitCode !== 0) overallExitCode = exitCode;
1343
+ const report = await readReport(reportFile);
1344
+ summaries.push({
1345
+ featureName,
1346
+ specName,
1347
+ scriptFile,
1348
+ report,
1349
+ exitCode
1350
+ });
1351
+ blank();
1352
+ }
1353
+ printSummary(summaries);
1354
+ } finally {
1355
+ await rm(tmpDir, {
1356
+ recursive: true,
1357
+ force: true
1358
+ });
1359
+ }
1360
+ process.exit(overallExitCode);
1361
+ }
1362
+ async function readReport(path) {
1363
+ try {
1364
+ const raw = await readFile(path, "utf8");
1365
+ return JSON.parse(raw);
1366
+ } catch {
1367
+ return null;
1368
+ }
1369
+ }
1370
+ const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
1371
+ const C = {
1372
+ reset: useColor ? "\x1B[0m" : "",
1373
+ bold: useColor ? "\x1B[1m" : "",
1374
+ dim: useColor ? "\x1B[2m" : "",
1375
+ green: useColor ? "\x1B[32m" : "",
1376
+ red: useColor ? "\x1B[31m" : "",
1377
+ yellow: useColor ? "\x1B[33m" : "",
1378
+ cyan: useColor ? "\x1B[36m" : "",
1379
+ gray: useColor ? "\x1B[90m" : ""
1380
+ };
1381
+ function printSummary(summaries) {
1382
+ process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
1383
+ let totalTests = 0;
1384
+ let totalPassed = 0;
1385
+ let totalFailed = 0;
1386
+ let totalSkipped = 0;
1387
+ for (const s of summaries) {
1388
+ const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
1389
+ if (!s.report) {
1390
+ const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
1391
+ process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
1392
+ continue;
1393
+ }
1394
+ totalTests += s.report.numTotalTests;
1395
+ totalPassed += s.report.numPassedTests;
1396
+ totalFailed += s.report.numFailedTests;
1397
+ totalSkipped += s.report.numPendingTests;
1398
+ const ok = s.report.success;
1399
+ const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
1400
+ const countColor = ok ? C.green : C.red;
1401
+ process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
1402
+ for (const file of s.report.testResults) for (const a of file.assertionResults) {
1403
+ const aIcon = assertionIcon(a.status);
1404
+ const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
1405
+ process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
1406
+ if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
1407
+ const firstLine = msg.split("\n")[0] ?? msg;
1408
+ process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
1409
+ }
1410
+ }
1411
+ }
1412
+ const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
1413
+ const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
1414
+ process.stdout.write("\n");
1415
+ process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
1416
+ process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
1417
+ process.stdout.write("\n");
1418
+ }
1419
+ function assertionIcon(status) {
1420
+ switch (status) {
1421
+ case "passed": return `${C.green}✔${C.reset}`;
1422
+ case "failed": return `${C.red}✖${C.reset}`;
1423
+ case "skipped":
1424
+ case "pending":
1425
+ case "todo": return `${C.yellow}◌${C.reset}`;
1426
+ }
1427
+ }
1428
+ function formatDuration(ms) {
1429
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
1430
+ return `${(ms / 1e3).toFixed(2)}s`;
1431
+ }
1432
+ const NOISE_LINE_PATTERNS = [/^JSON report written to /];
1433
+ async function streamFiltered(source, sink) {
1434
+ source.setEncoding("utf8");
1435
+ let buffer = "";
1436
+ for await (const chunk of source) {
1437
+ buffer += chunk;
1438
+ let nl = buffer.indexOf("\n");
1439
+ while (nl !== -1) {
1440
+ const line = buffer.slice(0, nl);
1441
+ buffer = buffer.slice(nl + 1);
1442
+ if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
1443
+ nl = buffer.indexOf("\n");
1444
+ }
1445
+ }
1446
+ if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
1447
+ }
1448
+ async function resolveSpecs(target) {
1449
+ if (!target) return listAllSpecs();
1450
+ if (target.includes("/")) {
1451
+ const { featureName, specName } = parseSpecPath(target);
1452
+ return [{
1453
+ featureName,
1454
+ specName
1455
+ }];
1456
+ }
1457
+ return (await listSpecsForFeature(target)).map((specName) => ({
1458
+ featureName: target,
1459
+ specName
1460
+ }));
1461
+ }
1462
+ //#endregion
1463
+ //#region src/cli/trace-setup.ts
1464
+ const traceSetupCommand = new Command("trace-setup").argument("<name>", "Setup name to trace (e.g. login)").description("Trace a setup procedure using dummy placeholder values").action(async (name) => {
1465
+ await runTraceSetup(name);
1466
+ });
1467
+ async function runTraceSetup(name) {
1468
+ header("trace-setup", name);
1469
+ await ensureCcqaDir();
1470
+ const spec = parseSetupSpec(await readSetupSpecFile(name));
1471
+ const resolvedSpec = replacePlaceholdersWithDummies(spec);
1472
+ meta("setup", spec.title);
1473
+ meta("steps", spec.steps.length);
1474
+ if (spec.placeholders) meta("placeholders", Object.keys(spec.placeholders).join(", "));
1475
+ blank();
1476
+ const systemPrompt = buildSetupTraceSystemPrompt(resolvedSpec);
1477
+ const prompt = buildSetupTracePrompt(resolvedSpec);
1478
+ info("Running agent-browser session...");
1479
+ blank();
1480
+ const routeSteps = [];
1481
+ let overallStatus = "passed";
1482
+ const traceActions = [];
1483
+ const { isError } = await invokeClaudeStreaming({
1484
+ prompt,
1485
+ systemPrompt,
1486
+ allowedTools: [
1487
+ "Bash(*)",
1488
+ "Read",
1489
+ "Grep",
1490
+ "Glob"
1491
+ ],
1492
+ onAbAction: (abAction) => {
1493
+ const action = parseAbAction(abAction);
1494
+ if (action) traceActions.push(action);
1495
+ },
1496
+ onAbActionFailed: () => {
1497
+ traceActions.pop();
1498
+ }
1499
+ }, (msg) => {
1500
+ if (msg.type !== "assistant") return;
1501
+ for (const block of msg.message.content ?? []) {
1502
+ if (block.type !== "text" || !block.text) continue;
1503
+ const text = block.text;
1504
+ const statusLine = parseStatusLine(text);
1505
+ if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
1506
+ for (const line of text.split("\n")) {
1507
+ const trimmed = line.trim();
1508
+ if (trimmed.startsWith("ROUTE_STEP|")) {
1509
+ const routeStep = parseRouteStep(trimmed);
1510
+ if (routeStep) {
1511
+ routeSteps.push(routeStep);
1512
+ if (routeStep.status === "FAILED") overallStatus = "failed";
1513
+ }
1514
+ } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
1515
+ const action = parseAbAction(trimmed);
1516
+ if (action) traceActions.push(action);
1517
+ }
1518
+ }
1519
+ }
1520
+ });
1521
+ if (isError) overallStatus = "failed";
1522
+ const route = {
1523
+ specName: name,
1524
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1525
+ status: overallStatus,
1526
+ steps: routeSteps
1527
+ };
1528
+ const [routePath, actionsPath] = await Promise.all([saveSetupRoute(name, route), saveSetupActions(name, traceActions)]);
1529
+ blank();
1530
+ meta("route", routePath);
1531
+ meta("saved", actionsPath);
1532
+ meta("actions", traceActions.length);
1533
+ meta("status", overallStatus.toUpperCase());
1534
+ hint(`run 'ccqa generate-setup ${name}' to generate and validate the setup`);
1535
+ }
1536
+ function replacePlaceholdersWithDummies(spec) {
1537
+ if (!spec.placeholders) return spec;
1538
+ const dummies = spec.placeholders;
1539
+ const resolve = (text) => {
1540
+ let result = text;
1541
+ for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, def.dummy);
1542
+ return result;
1543
+ };
1544
+ return {
1545
+ ...spec,
1546
+ steps: spec.steps.map((step) => ({
1547
+ ...step,
1548
+ instruction: resolve(step.instruction),
1549
+ expected: resolve(step.expected)
1550
+ }))
1551
+ };
1552
+ }
1553
+ //#endregion
1554
+ //#region src/cli/generate-setup.ts
1555
+ const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").action(async (name, opts) => {
1556
+ await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false);
1557
+ });
1558
+ async function runGenerateSetup(name, maxRetries, fromDummy) {
1559
+ header("generate-setup", name);
1560
+ await ensureCcqaDir();
1561
+ const spec = parseSetupSpec(await readSetupSpecFile(name));
1562
+ const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
1563
+ const finalPath = join(getSetupDir(name), "test.spec.ts");
1564
+ if (fromDummy) {
1565
+ if (!await stat(dummyPath).then(() => true).catch(() => false)) {
1566
+ warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
1567
+ process.exit(1);
1568
+ }
1569
+ info("Resuming from existing test.dummy.spec.ts");
1570
+ } else {
1571
+ const { actions } = await getSetupActions(name);
1572
+ meta("setup", spec.title);
1573
+ meta("actions", actions.length);
1574
+ blank();
1575
+ const cleanedActions = await cleanupActions(actions);
1576
+ if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
1577
+ await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
1578
+ meta("saved", dummyPath);
1579
+ }
1580
+ blank();
1581
+ let { exitCode, output, currentScript } = await runVitest(dummyPath);
1582
+ if (exitCode !== 0) {
1583
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1584
+ info(`auto-fix attempt ${attempt}/${maxRetries}...`);
1585
+ blank();
1586
+ const fixed = await autoFixWithLLM(currentScript, output);
1587
+ if (!fixed) {
1588
+ warn("could not determine fix from failure log");
1589
+ break;
1590
+ }
1591
+ await writeFile(dummyPath, fixed, "utf-8");
1592
+ meta("saved", dummyPath);
1593
+ blank();
1594
+ ({exitCode, output, currentScript} = await runVitest(dummyPath));
1595
+ if (exitCode === 0) break;
1596
+ }
1597
+ if (exitCode !== 0) {
1598
+ warn("auto-fix exhausted — setup test still failing");
1599
+ hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
1600
+ process.exit(1);
1601
+ }
1602
+ }
1603
+ await writeFile(finalPath, reversePlaceholdersInScript(currentScript, spec.placeholders), "utf-8");
1604
+ await unlink(dummyPath).catch(() => {});
1605
+ blank();
1606
+ meta("saved", finalPath);
1607
+ hint(`setup '${name}' is ready — reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
1608
+ }
1609
+ /**
1610
+ * Replace dummy values with {{placeholder}} directly in the test script text.
1611
+ * Longer dummy values are replaced first to avoid partial matches.
1612
+ */
1613
+ function reversePlaceholdersInScript(script, placeholders) {
1614
+ if (!placeholders) return script;
1615
+ const entries = Object.entries(placeholders).sort((a, b) => b[1].dummy.length - a[1].dummy.length);
1616
+ let result = script;
1617
+ for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
1618
+ return result;
1619
+ }
1620
+ async function autoFixWithLLM(script, failureLog) {
1621
+ try {
1622
+ const { result, isError } = await invokeClaudeStreaming({
1623
+ prompt: buildAutoFixPrompt(script, failureLog),
1624
+ disableBuiltinTools: true,
1625
+ maxTurns: 1
1626
+ }, () => {});
1627
+ if (isError || !result) return null;
1628
+ const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1629
+ const fixes = JSON.parse(json);
1630
+ if (!Array.isArray(fixes) || fixes.length === 0) return null;
1631
+ return applySleepFixes(script, fixes);
1632
+ } catch {
1633
+ return null;
1634
+ }
1635
+ }
1636
+ function applySleepFixes(script, fixes) {
1637
+ const lines = script.split("\n");
1638
+ for (const fix of fixes) if ("increase_to" in fix) {
1639
+ const idx = fix.line - 1;
1640
+ if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
1641
+ }
1642
+ const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
1643
+ for (const fix of inserts) {
1644
+ const idx = fix.line - 1;
1645
+ if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
1646
+ }
1647
+ return lines.join("\n");
1648
+ }
1649
+ async function runVitest(scriptPath) {
1650
+ const { exitCode, stdout, stderr } = await spawnVitestCaptured([
1651
+ "run",
1652
+ "--config",
1653
+ bundledVitestConfigPath(),
1654
+ scriptPath
1655
+ ]);
1656
+ const currentScript = await readFile(scriptPath, "utf8");
1657
+ process.stdout.write(stdout);
1658
+ if (stderr) process.stderr.write(stderr);
1659
+ return {
1660
+ exitCode,
1661
+ output: stdout + stderr,
1662
+ currentScript
1663
+ };
1664
+ }
1665
+ async function cleanupActions(actions) {
1666
+ try {
1667
+ const { result, isError } = await invokeClaudeStreaming({
1668
+ prompt: buildCleanupPrompt(actions),
1669
+ disableBuiltinTools: true,
1670
+ maxTurns: 1
1671
+ }, () => {});
1672
+ if (isError || !result) return actions;
1673
+ const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1674
+ const parsed = JSON.parse(json);
1675
+ if (Array.isArray(parsed) && parsed.length > 0) return parsed;
1676
+ } catch {}
1677
+ return actions;
1678
+ }
1679
+ //#endregion
1680
+ //#region src/cli/index.ts
1681
+ const packageJsonPath = resolvePackageJson();
1682
+ const { version } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
1683
+ function resolvePackageJson() {
1684
+ const distCandidate = fileURLToPath(new URL("../package.json", import.meta.url));
1685
+ const srcCandidate = fileURLToPath(new URL("../../package.json", import.meta.url));
1686
+ try {
1687
+ readFileSync(distCandidate);
1688
+ return distCandidate;
1689
+ } catch {
1690
+ return srcCandidate;
1691
+ }
1692
+ }
1693
+ const program = new Command();
1694
+ program.name("ccqa").description("E2E test CLI using Claude Code + agent-browser").version(version);
1695
+ program.addCommand(traceCommand);
1696
+ program.addCommand(generateCommand);
1697
+ program.addCommand(runCommand);
1698
+ program.addCommand(traceSetupCommand);
1699
+ program.addCommand(generateSetupCommand);
1700
+ program.parse();
1701
+ //#endregion
1702
+ export {};