ccqa 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/ccqa.mjs CHANGED
@@ -1,14 +1,14 @@
1
1
  #!/usr/bin/env node
2
- import { n as spawnAB } from "../spawn-ab-BxjEhA5e.mjs";
2
+ import { n as spawnAB, t as sleepSync } from "../spawn-ab-DjRh1-4T.mjs";
3
3
  import { createRequire } from "node:module";
4
4
  import { Command } from "commander";
5
5
  import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
8
8
  import { query } from "@anthropic-ai/claude-agent-sdk";
9
+ import { ZodError, z } from "zod";
9
10
  import { delimiter, dirname, join, relative, resolve } from "node:path";
10
11
  import { parse, stringify } from "yaml";
11
- import { ZodError, z } from "zod";
12
12
  import { execFile, spawn } from "node:child_process";
13
13
  import { createInterface } from "node:readline";
14
14
  import { homedir, tmpdir } from "node:os";
@@ -57,13 +57,20 @@ agent-browser --session SESSION press <Key>
57
57
  agent-browser --session SESSION select "<selector>" "<value>"
58
58
  agent-browser --session SESSION hover "<selector>"
59
59
  agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
60
- agent-browser --session SESSION wait "<selector>" [--timeout <ms>] [--state visible|hidden]
60
+ agent-browser --session SESSION wait --load networkidle
61
+ agent-browser --session SESSION get count "<selector>" # element-existence check (returns a number, fast)
61
62
  agent-browser --session SESSION cookies clear
63
+ agent-browser --session SESSION find <locator> <value> <action> [<input>] [--name "<n>"] [--exact]
64
+ # See "Selector Rules" for the full \`find\` subset.
65
+ # IMPORTANT: do NOT use \`wait "<css-selector>"\`. agent-browser ignores --timeout on a
66
+ # CSS-selector wait and blocks for ~150s when the selector never matches, killing the run.
67
+ # Wait for readiness with \`wait --text\`, \`wait --load networkidle\`, or just use \`find\`
68
+ # (which waits internally). To check an element exists, use \`get count "<selector>"\`.
62
69
  \`\`\`
63
70
 
64
71
  ## Selector Rules
65
72
 
66
- **ALLOWED — these formats only:**
73
+ **ALLOWED selector formats use ONE of these everywhere a selector appears (click, fill, wait, assert, ...):**
67
74
 
68
75
  | Format | Use when |
69
76
  |--------|----------|
@@ -71,24 +78,63 @@ agent-browser --session SESSION cookies clear
71
78
  | \`text=visible text\` | Unique visible text, no aria-label |
72
79
  | \`[placeholder='text']\` | Input identified by placeholder |
73
80
  | \`[type='password']\` | Password inputs only |
74
- | \`a[href*='pattern']\` | Links where \`text=\` fails — use the URL pattern from the ARIA snapshot (e.g. \`a[href*='/settings']\`) |
81
+ | \`a[href*='pattern']\` | Links where \`text=\` fails — use the URL pattern from the ARIA snapshot |
82
+ | \`[data-testid='...']\`, \`[data-qa='...']\` | Specific attribute selectors when an aria-label is absent |
83
+
84
+ **FORBIDDEN — these break recorded tests and are rejected by the hook layer:**
85
+
86
+ - \`@ref\` / \`@e1\` / \`e14\` — reference IDs are session-specific and change every run.
87
+ - **Bare tag selectors**: \`button\`, \`a\`, \`div\`, \`td\`, \`tr\`, \`main a\`, \`table tbody tr:nth-child(N)\`. These match every element of that tag and are non-deterministic on replay. **This includes the inner selector inside \`find first/last/nth\`** — see the \`find\` rules below.
88
+ - \`[role='button']\` or \`[type='checkbox']\` alone — matches too many elements.
89
+ - JavaScript execution (\`eval\`, \`js\`) — blocked by the hook layer.
90
+
91
+ ### \`find\` subset (fallback when no ALLOWED CSS uniquely targets the element)
92
+
93
+ When repeated aria-labels / visible text make ALLOWED selectors ambiguous (e.g. a chat client where every message row has the same "1 reply" button), use one of these — they record as structured actions and replay deterministically:
94
+
95
+ \`\`\`
96
+ find role <role> <action> [--name "<n>"] [--exact]
97
+ find text|label|placeholder|alt|title "<text>" <action> [--exact]
98
+ find testid "<id>" <action>
99
+ find first|last "<ALLOWED-css>" <action>
100
+ find nth <index> "<ALLOWED-css>" <action>
101
+ \`\`\`
102
+
103
+ \`<action>\` is one of \`click | dblclick | fill | type | hover | focus | check | uncheck\`. For \`fill\`/\`type\`, the input value follows the action: \`find label "Email" fill "user@example.com"\`.
75
104
 
76
- **FORBIDDEN these will break recorded tests or are not valid commands:**
105
+ **Rules for \`find\`:**
77
106
 
78
- - \`@ref\` / \`@e1\` / \`e14\` reference IDs are session-specific and change every run; never use them
79
- - \`[role='button']\` or \`[type='checkbox']\` alonematches too many elements
80
- - Bare tag selectors: \`button\`, \`td\`, \`tr\`, \`main a\`, \`table tbody tr:nth-child(N)\` these are positional/non-deterministic and will fail on replay
81
- - \`find ...\`, \`textbox ...\`, \`label ...\` not valid agent-browser commands; these are **blocked** and will fail
82
- - JavaScript execution (\`eval\`, \`js\`) **blocked** at the hook level; cannot bypass this restriction
107
+ 1. Try ALLOWED selectors first. Only reach for \`find\` when they demonstrably cannot uniquely target the element.
108
+ 2. **The inner selector for \`first/last/nth\` MUST be one of the ALLOWED formats above.** Never pass a bare tag "the last button" is meaningless on replay.
109
+ 3. \`find last\` is reliable only when the layout guarantees "the target is the bottom-most match" (e.g. the most-recently-sent chat message). Be explicit in the AB_ACTION label.
110
+ 4. Argument order is \`<value> <action> [flags]\` flags after the action. Putting \`--name\` / \`--exact\` before the action makes agent-browser fail with "Unknown subaction".
111
+ 5. \`--name "<n>"\` is **role-only**. Never pass it to \`find text\`, \`find label\`, etc.
112
+ 6. \`find\` includes its own wait; do not chain a \`wait\` before it.
83
113
 
84
- **Selector workflow:**
85
- 1. Run \`snapshot\` — read the ARIA tree output carefully
86
- 2. Find the element; note its exact \`aria-label\` value if present
87
- 3. If aria-label present use \`[aria-label='...']\`; otherwise use \`text=...\`
88
- 4. If \`text=...\` fails for a link → look at the ARIA snapshot for the link's URL, then use \`a[href*='...']\` with a distinctive URL substring (e.g. \`a[href*='/dashboard']\`, \`a[href*='filter=active']\`)
89
- 5. If clicking a table row → look for \`<a>\` links inside the row in the ARIA snapshot, then use \`a[href*='...']\` targeting that link's URL pattern
90
- 6. For checkboxes: try \`check "text=Label"\` or \`check "[aria-label='Label']"\`
91
- 7. Never guess — if a selector fails once, take a fresh snapshot before retrying
114
+ **Examples:**
115
+
116
+ - \`find last "[data-testid='reply-link']" click\` specific attribute + layout-guaranteed last match
117
+ - \`find role button click --name "Submit"\` role + accessible name (flags after action)
118
+ - \`find role button --name "Submit" click\` wrong order
119
+ - \`find last "button" click\` bare tag
120
+
121
+ ### Selector workflow
122
+
123
+ 1. Run \`snapshot\` and read the ARIA tree.
124
+ 2. Identify the element; note its exact \`aria-label\` if present.
125
+ 3. If aria-label present → use \`[aria-label='...']\`. Otherwise → use \`text=...\`.
126
+ 4. For links where \`text=\` fails, find the link's URL in the snapshot and use \`a[href*='...']\` with a distinctive substring.
127
+ 5. For checkboxes: try \`check "text=Label"\` or \`check "[aria-label='Label']"\`.
128
+ 6. If repeated labels make every ALLOWED selector ambiguous → use the \`find\` subset above.
129
+ 7. Never guess. If a selector fails, take a fresh snapshot before retrying.
130
+
131
+ ### Special input types
132
+
133
+ **contenteditable / RichText editors**: \`fill "[contenteditable='true']" "<text>"\` works on contenteditable elements (chat composers, WYSIWYG bodies) — agent-browser sets the text directly. Use a single \`fill\`; do NOT just \`click\` the field and rely on \`keyboard inserttext\` (that keystroke command is not recorded as a structured action, so the text never makes it into the generated test and the field ends up empty on replay).
134
+
135
+ **combobox / select with a required marker (\`*\`)**: required form fields often include the marker in their accessible name. If \`find role combobox click --name "<label>"\` misses, prefer \`find label "<label>" click\` or \`click "[aria-label='<label> *']"\`.
136
+
137
+ **Verifying cleanup / deletion**: assert the *absence* of the deleted thing, not the surrounding listing screen's text. Use \`wait --fn "!document.body.innerText.includes('<unique-label>')"\` (text disappearance) — never \`wait "<css-selector>" --state hidden\` (blocks the daemon) and never \`wait --text "<navbar label>"\` (passes regardless of the deletion).
92
138
 
93
139
  ## Test Specification
94
140
 
@@ -103,52 +149,42 @@ ${stepsText}
103
149
  ## Execution Workflow
104
150
 
105
151
  For each step:
106
- 1. Emit \`STEP_START|<step-id>|<short description of what this step does>\`
107
- 2. Run \`snapshot\` and identify selectors from the ARIA tree
108
- 3. Execute the action using an ALLOWED selector
109
- 4. Emit \`AB_ACTION|...\` for every browser action (see below)
110
- 5. Run \`snapshot\` again to verify the outcome
111
- 6. Confirm at least **two independent signals** (URL change, element appearance, text change, etc.)
112
- 7. For each verified signal, emit \`AB_ACTION|assert|...\` (see Assertion Protocol below)
113
- 8. Emit \`ROUTE_STEP|...\`
114
- 9. Emit \`STEP_DONE\`, \`ASSERTION_FAILED\`, or \`STEP_SKIPPED\`
115
-
116
- **After form submission or navigation:** take a snapshot before continuing. If an intermediate screen appears (e.g. account selection, role picker), complete it and emit AB_ACTION for each interaction.
152
+ 1. Emit \`STEP_START|<step-id>|<short description>\`.
153
+ 2. Run \`snapshot\` and identify selectors from the ARIA tree.
154
+ 3. Execute the action using an ALLOWED selector (see Selector Rules).
155
+ 4. Emit \`AB_ACTION|...\` for every browser action (see AB_ACTION Protocol).
156
+ 5. Run \`snapshot\` again to verify the outcome.
157
+ 6. Confirm at least **two independent signals** (URL change, element appearance, text change, ...).
158
+ 7. For each verified signal, emit \`AB_ACTION|assert|...\` (see Assertion Protocol).
159
+ 8. Emit \`ROUTE_STEP|...\`.
160
+ 9. Emit \`STEP_DONE\`, \`ASSERTION_FAILED\`, or \`STEP_SKIPPED\`.
161
+
162
+ **After form submission or navigation:** take a fresh snapshot before continuing. If an intermediate screen appears (account selection, role picker, ...), complete it and emit AB_ACTION for each interaction.
117
163
 
118
164
  ## Guardrails
119
165
 
120
- - **Stop after 3 consecutive failures on the same step** — emit \`ASSERTION_FAILED\` and report the blocker. Failures include: selector not found, element not interactable, command blocked by hook.
121
- - **Do NOT use workarounds** if all ALLOWED selectors fail, do NOT fall back to \`mouse move\`, coordinate-based clicks, \`Tab\`+\`Enter\` keyboard navigation, or any other indirect method. These cannot be recorded as reliable test actions. Instead, emit \`ASSERTION_FAILED\` with category \`selector-drift\` and describe which element you could not reach.
122
- - **Do NOT use bare tag selectors** — never use \`click "button"\`, \`click "td"\`, \`click "main a"\`, or \`click "a"\` alone. These match too many elements and are non-deterministic. Always use a specific ALLOWED selector format.
123
- - Do NOT retry a selector without taking a fresh snapshot first
124
- - Do NOT work around blockers (login walls, missing data, captchas) stop and report
125
- - **Do NOT suppress errors** never use \`2>/dev/null\`, \`|| true\`, \`; other-command\`, or any other technique that hides agent-browser failures. Each \`agent-browser\` command must run standalone so failures are properly detected and recorded.
126
- - **If \`agent-browser\` is not found, stop immediately.** Do not run \`which\`, \`find\`, \`npm ls\`, \`npm install\`, \`npx\`, \`brew\`, or any other discovery / installation command. Do not try alternate paths. The ccqa host already validates the binary before launching you, so if you see \`command not found\` it is a host-environment problem you cannot fix from inside the test run. Emit one line and terminate: \`ASSERTION_FAILED|step-XX|agent-browser binary not available in PATH\`.
166
+ - **Stop after 3 consecutive failures on the same step** — emit \`ASSERTION_FAILED\` and report the blocker.
167
+ - **No workarounds.** If all ALLOWED selectors fail, emit \`ASSERTION_FAILED|...|selector-drift: ...\`. Do NOT fall back to coordinate clicks, mouse moves, or \`Tab\`+\`Enter\` keyboard navigation they cannot be recorded as reliable test actions.
168
+ - Do NOT retry a selector without taking a fresh snapshot first.
169
+ - Do NOT work around blockers (login walls, missing data, captchas) — stop and report.
170
+ - **Do NOT suppress errors.** Never use \`2>/dev/null\`, \`|| true\`, \`; true\`, or any technique that hides agent-browser failures. Each \`agent-browser\` invocation must be its own standalone Bash call. Chaining multiple agent-browser commands with \`&&\` / \`;\` / \`|\` is rejected by the hook layer.
171
+ - **If \`agent-browser\` is not found, stop immediately.** Do not run \`which\`, \`find\`, \`npm ls\`, \`npm install\`, \`npx\`, \`brew\`, or any other discovery / installation command. Emit one line and terminate: \`ASSERTION_FAILED|step-XX|agent-browser binary not available in PATH\`.
127
172
 
128
173
  ## Source Code Reference
129
174
 
130
- You have access to **Read**, **Grep**, and **Glob** tools to inspect the application source code. Use them proactively to find correct selectors — do NOT guess \`a[href*='...']\` patterns by trial and error.
175
+ You have \`Read\`, \`Grep\`, and \`Glob\` to inspect the application source code. Use them proactively to find correct selectors — do NOT guess \`a[href*='...']\` patterns.
131
176
 
132
- **When to read source code:**
133
- - Before clicking a link: Grep for the link text or URL pattern in the codebase to find the exact \`href\` value
134
- - Before navigating to a new page: Glob for page/route files to understand the URL structure
135
- - When the ARIA snapshot shows an element but \`text=\` and \`[aria-label=]\` selectors fail: Read the component to find what HTML attributes the element has
177
+ **When**: before clicking a link (find the \`href\`); before navigating to a new page (understand routing); when an ARIA element exists but no ALLOWED selector matches (find the actual HTML attributes).
136
178
 
137
- **How:**
138
- 1. Use \`Grep\` to search for UI text, component names, or URL patterns
139
- 2. Use \`Read\` to inspect the component's JSX/TSX and find \`href\`, \`aria-label\`, \`data-testid\`, or class names
140
- 3. Build a precise ALLOWED selector from the discovered attributes
179
+ **How**: \`Grep\` for UI text / component names / URL patterns → \`Read\` the JSX/TSX to find \`href\`, \`aria-label\`, \`data-testid\`, or class names → build a precise ALLOWED selector.
141
180
 
142
- **Rules:**
143
- - Only READ source files — never modify them
144
- - Keep source reading focused — search for specific strings, not entire directories
181
+ **Rules**: only READ source files, never modify them. Keep searches focused.
145
182
 
146
183
  ## Waiting for Async Operations
147
184
 
148
- Prefer the \`wait\` command over polling:
185
+ Prefer \`wait\` over polling:
149
186
 
150
187
  \`\`\`bash
151
- # Best: wait for expected text to appear
152
188
  agent-browser --session ${sessionName} wait --text "<completion text>"
153
189
  \`\`\`
154
190
 
@@ -158,7 +194,6 @@ If polling is required (e.g. waiting for a spinner to disappear):
158
194
  for i in $(seq 1 18); do
159
195
  sleep 10
160
196
  result=$(agent-browser --session ${sessionName} snapshot 2>&1)
161
- # Check result for the expected change and break when found
162
197
  echo "$result" | grep -q "<done indicator>" && break
163
198
  done
164
199
  agent-browser --session ${sessionName} snapshot
@@ -186,18 +221,28 @@ AB_ACTION|drag|<source selector>|<target selector>|<source label>
186
221
  AB_ACTION|wait|<selector or text>|<label>
187
222
  AB_ACTION|snapshot|<key observation, max 100 chars>
188
223
  AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
224
+
225
+ # find_* (semantic locator fallback). <extra> = role's --name OR nth's index OR "".
226
+ # <exact> = literal "exact" if --exact was passed, "" otherwise. Keep empty pipe slots.
227
+ AB_ACTION|find_click|<locator>|<value>|<extra>|<exact>|<label>
228
+ AB_ACTION|find_dblclick|<locator>|<value>|<extra>|<exact>|<label>
229
+ AB_ACTION|find_hover|<locator>|<value>|<extra>|<exact>|<label>
230
+ AB_ACTION|find_focus|<locator>|<value>|<extra>|<exact>|<label>
231
+ AB_ACTION|find_check|<locator>|<value>|<extra>|<exact>|<label>
232
+ AB_ACTION|find_uncheck|<locator>|<value>|<extra>|<exact>|<label>
233
+ AB_ACTION|find_fill|<locator>|<value>|<extra>|<exact>|<input>|<label>
234
+ AB_ACTION|find_type|<locator>|<value>|<extra>|<exact>|<input>|<label>
189
235
  \`\`\`
190
236
 
191
- The selector in AB_ACTION must be one of the ALLOWED formats above.
237
+ Selectors in AB_ACTION must follow Selector Rules. \`find_*\` lines use the locator + value pair instead of a separate selector. Do NOT include literal \`|\` inside any field — replace with a space if necessary.
192
238
 
193
- **CRITICAL — record only successful actions.** The AB_ACTION stream is the
194
- canonical replay sequence: every line in it must be reproducible on a fresh
195
- browser session. Therefore:
239
+ **CRITICAL — record only successful actions.** The AB_ACTION stream is the canonical replay sequence: every line must be reproducible on a fresh browser session.
196
240
 
197
- - If you tried a selector and \`agent-browser\` returned a non-zero exit (selector not found, element not interactable, timeout): **do NOT emit \`AB_ACTION|...\`** for that attempt. Take a fresh snapshot, switch selector, and only emit the AB_ACTION for the call that finally succeeded.
198
- - If you explored multiple selectors for the same logical action (e.g. tried \`[aria-label='Email']\`, it failed, then \`[placeholder='Email']\` worked): emit AB_ACTION for the **working selector only**. The failed attempt must not appear in the trace.
199
- - The same rule applies to \`AB_ACTION|assert|...\` lines: only emit them for assertions you actually verified on the current page in the current snapshot. Never declare an assertion against a selector you have not just confirmed visible — even if you intended to use it earlier.
200
- - If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do NOT leave half-recorded actions for the failed step in the AB_ACTION stream.
241
+ - A non-zero exit from agent-browser (selector not found, element not interactable, timeout) **do NOT emit AB_ACTION** for that attempt. Switch selector and only emit the AB_ACTION for the call that finally succeeded.
242
+ - If you tried several selectors / \`find_*\` locators for the same logical action, emit AB_ACTION for the **last working one only**. Multiple failed attempts in a row will all fail at replay validation and silently delete the step from the generated test.
243
+ - \`AB_ACTION|assert|...\` follows the same rule: only emit assertions you actually verified on the current page in the current snapshot.
244
+ - **Environment-failure recovery is not part of the test.** If a session times out, a network blip drops you to login, or the app crashes and you re-login / re-navigate / re-fill to recover, do NOT emit AB_ACTION for the recovery operations.
245
+ - If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do not leave half-recorded actions in the stream.
201
246
 
202
247
  ## Assertion Protocol
203
248
 
@@ -212,62 +257,74 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
212
257
  | \`element_visible\` | Element is visible | CSS selector | (empty) |
213
258
  | \`element_not_visible\` | Element is hidden/removed | CSS selector | (empty) |
214
259
  | \`url_contains\` | URL contains a pattern | (empty) | URL substring |
215
- | \`element_enabled\` | Button/input is enabled | CSS selector | (empty) |
216
- | \`element_disabled\` | Button/input is disabled | CSS selector | (empty) |
260
+ | \`element_enabled\` | Button/input is enabled | CSS selector (state-independent) | (empty) |
261
+ | \`element_disabled\` | Button/input is disabled | CSS selector (state-independent) | (empty) |
217
262
  | \`element_checked\` | Checkbox is checked | CSS selector | (empty) |
218
263
  | \`element_unchecked\` | Checkbox is unchecked | CSS selector | (empty) |
219
264
 
220
- **Stability rules — CRITICAL:**
221
- - **NEVER** assert on: timestamps (dates, times), session IDs, exact numeric counts that vary between runs
222
- - For dynamic counts (e.g. "42 results"): assert on the STABLE part only (e.g. "results"), not the number
223
- - **PREFER** asserting on: status text, button labels, URL patterns, element enabled/disabled state
265
+ **Stability rules — CRITICAL. NEVER assert on values that change run-to-run:**
266
+
267
+ - Timestamps, session IDs, exact numeric counts that vary between runs.
268
+ - **Absolute dates / clock times**: \`12:34:56\`, \`2026-05-20\`, \`2026年5月20日\`, \`5月20日\`. These are scrubbed by post-trace literal-scrub anyway — avoid them at the source.
269
+ - **Relative-time labels** — true only in the moment of the trace, stale by replay:
270
+ - English: \`just now\`, \`5 minutes ago\`, \`2 hours ago\`, \`yesterday\`, \`last week\`.
271
+ - Japanese: \`たった今\`, \`3分前\`, \`1時間前\`, \`昨日\`.
272
+ - Dynamic counts like "42 results" → assert on the stable suffix ("results") only.
273
+ - **PREFER**: status text, button labels, URL patterns, element enabled/disabled state.
224
274
 
225
- **Page context rules — CRITICAL:**
226
- - After a page navigation (\`open\` or \`click\` that navigates), take a **fresh snapshot** BEFORE emitting any assertions
227
- - Only assert on text/elements that are visible on the **current** page — never assert on text from the previous page
228
- - If you navigated away from a page, its text is gone — do not emit \`text_visible\` for it
275
+ **No tautological state asserts — CRITICAL for \`element_enabled\` / \`element_disabled\`:**
229
276
 
230
- **Selector rules for assert actions CRITICAL:**
231
- - Use the **same ALLOWED formats** as browser actions — never invent aria-label values
232
- - Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
233
- - When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector — but still pre-verify with \`wait --text\` per the MUST-VERIFY rule below; \`alt\`-attribute "text" will not match.
234
- - For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
277
+ The selector must identify *which* element by something **other than the state you are asserting**. Selecting the element *by* its state and then asserting that state is a tautology that always passes and verifies nothing.
278
+
279
+ - \`element_disabled | button[disabled] |\` picks an already-disabled button, then "confirms" it is disabled. Passes even if the button the spec cares about is missing or enabled.
280
+ - \`element_enabled | button:enabled |\`, \`[aria-disabled='true']\`, \`input:disabled\` same trap.
281
+ - Name the element by a stable, state-independent selector and assert the state on it: e.g. the "送信" button is \`find role button --name "送信"\`; to assert it is disabled, give \`element_disabled\` a selector that targets *that* button (a stable \`id\` / \`data-testid\` / unique class), **not** \`[disabled]\`.
282
+ - If you cannot target the specific element without a state pseudo-class/attribute, **do not emit the enabled/disabled assert** — assert a user-visible consequence instead (e.g. the action it gates does not happen, a "権限がありません" message is shown), or rely on \`text_visible\` for the label plus \`text_not_visible\` for what an enabled control would have produced.
283
+
284
+ **Page-context and selector rules:**
285
+
286
+ - After a navigation, take a **fresh snapshot** before emitting any assertion. Don't assert on text from the previous page.
287
+ - Assertion selectors follow the same Selector Rules as actions — never invent aria-label values; use the exact strings from the current snapshot.
288
+ - When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) — but pre-verify with \`wait --text\` per the MUST-VERIFY rule below.
235
289
 
236
290
  **MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
237
291
 
238
- The \`snapshot\` output is the **accessibility tree**: a semantic view. \`agent-browser\` queries the **real DOM**. They DO NOT always match. Two known traps:
292
+ The \`snapshot\` output is the **accessibility tree**, but \`agent-browser\` queries the **real DOM**. They don't always agree. Two known traps:
239
293
 
240
- 1. *Selector trap*: a snapshot row like \`textbox "Email address"\` is reachable via \`[placeholder='...']\` but **NOT** via \`[aria-label='...']\` if no \`aria-label\` attribute is actually set the browser inferred the label from \`<label for=>\` / surrounding text / \`placeholder\`.
241
- 2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes via \`wait --text\`) will NOT find it.
294
+ 1. *Selector trap*: a snapshot row like \`textbox "Email address"\` may be reachable via \`[placeholder='...']\` but **not** via \`[aria-label='...']\` if no aria-label attribute is actually set (the browser inferred the label from \`<label for=>\` / placeholder).
295
+ 2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes) will NOT find it.
296
+ 3. *Input-value trap*: after you \`fill\` an \`<input>\` / \`<textarea>\` / \`[contenteditable]\`, the text you typed lives in the element's **value**, not as a visible text node. **Do NOT assert the typed value with \`text_visible\`** — it will never match. The spec's "the field reflects X" expectation is implicitly confirmed when the form submits successfully and the value shows up on the *result* page (a list row, a detail page). Assert there, not on the input itself.
242
297
 
243
- Before emitting an \`AB_ACTION|assert|...\` line, **verify the assertion form actually resolves on the live page**:
298
+ Before emitting \`AB_ACTION|assert|...\`, **verify the assertion form actually resolves on the live page**:
244
299
 
245
300
  \`\`\`bash
246
301
  # element_visible / element_enabled / element_disabled / element_checked / element_unchecked
247
- agent-browser --session SESSION wait "<selector>" --timeout 3000
248
-
302
+ # Use get count (fast, returns a number). Do NOT use \`wait "<selector>"\` it blocks the daemon.
303
+ agent-browser --session SESSION get count "<selector>" # >=1 means present
249
304
  # element_not_visible
250
- agent-browser --session SESSION wait "<selector>" --state hidden --timeout 3000
251
-
305
+ agent-browser --session SESSION get count "<selector>" # 0 means absent
252
306
  # text_visible
253
307
  agent-browser --session SESSION wait --text "<text>" --timeout 3000
254
-
255
308
  # text_not_visible
256
- agent-browser --session SESSION wait --text "<text>" --state hidden --timeout 3000
309
+ agent-browser --session SESSION wait --fn "!document.body.innerText.includes('<text>')" --timeout 3000
257
310
  \`\`\`
258
311
 
259
- Apply the "record only successful actions" rule from the AB_ACTION section above. **Additionally**, when *no* form verifies — e.g. you tried \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` and they all timed out, or the "text" turned out to be an \`alt\` / aria-label — **DROP the assertion entirely**. Fewer, real assertions beat invented ones that fail at replay. Prefer swapping a failed \`text_visible\` for an \`element_visible\` against the link/button selector when the visible label came from \`alt\` / aria-label.
312
+ When *no* form verifies — e.g. \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` all timed out, or the visible text turned out to be an \`alt\` — **drop the assertion entirely**. Fewer real assertions beat invented ones that fail at replay. \`url_contains\` is exempt (it checks the URL string, not the DOM).
260
313
 
261
- \`url_contains\` is exempt it checks the current URL string, not the DOM/accessibility tree.
314
+ **Field positionsget these RIGHT.** The line is
315
+ \`AB_ACTION|assert|<assertType>|<selector>|<value>|<observation>\`. The value
316
+ (the asserted text for \`text_visible\`/\`text_not_visible\`/\`url_contains\`) goes
317
+ in the **value** slot, NOT the observation slot. A common mistake is writing
318
+ \`text_visible|||Done|...\` (three pipes → empty selector AND empty value, "Done"
319
+ lands in observation): that records an assert with no value and it fails at
320
+ replay. Use exactly two pipes after the assertType for text asserts.
262
321
 
263
- **Examples:**
264
322
  \`\`\`
265
- AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
266
- AB_ACTION|assert|element_disabled|.btn-submit||Submit button disabled before form is valid
267
- AB_ACTION|assert|element_enabled|.btn-submit||Submit button enabled after form is filled
268
- AB_ACTION|assert|text_visible|||Loading|Operation started
269
- AB_ACTION|assert|text_visible|||Done|Operation completed
270
- AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
323
+ AB_ACTION|assert|url_contains||/dashboard|Navigated to dashboard
324
+ AB_ACTION|assert|element_disabled|.btn-submit||Submit disabled before form is valid
325
+ AB_ACTION|assert|element_enabled|.btn-submit||Submit enabled after form is filled
326
+ AB_ACTION|assert|text_visible||Loading|Operation started
327
+ AB_ACTION|assert|text_visible||Done|Operation completed
271
328
  \`\`\`
272
329
 
273
330
  ## Status Protocol
@@ -275,7 +332,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
275
332
  Emit exactly one status line per step (outside any code block):
276
333
 
277
334
  \`\`\`
278
- STEP_START|<step-id>|<short description of what this step does>
335
+ STEP_START|<step-id>|<short description>
279
336
  STEP_DONE|<step-id>|<what was verified>
280
337
  ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
281
338
  STEP_SKIPPED|<step-id>|<reason>
@@ -391,6 +448,35 @@ function run(message) {
391
448
  write("run", message);
392
449
  }
393
450
  /**
451
+ * Render a single-line progress indicator for a step-by-step loop.
452
+ *
453
+ * On a TTY the line is rewritten in place via `\r` so the terminal stays
454
+ * uncluttered. In a non-TTY environment (CI, piped runs) we fall back to
455
+ * a regular `[info]` line every PROGRESS_NONTTY_STRIDE steps to avoid
456
+ * spamming the log with one line per action.
457
+ *
458
+ * Callers MUST call `progressEnd()` when the loop finishes (or aborts) so
459
+ * the carriage-return line gets a final newline; otherwise the next log
460
+ * line lands on the same physical row.
461
+ */
462
+ const PROGRESS_NONTTY_STRIDE = 5;
463
+ let lastProgressNonTtyEmit = -1;
464
+ function progress(current, total, label) {
465
+ const text = `[info] ${current + 1}/${total} ${label}`;
466
+ if (process.stdout.isTTY) {
467
+ process.stdout.write(`\r${text}\x1b[K`);
468
+ return;
469
+ }
470
+ if (current === 0 || current - lastProgressNonTtyEmit >= PROGRESS_NONTTY_STRIDE) {
471
+ process.stdout.write(`${text}\n`);
472
+ lastProgressNonTtyEmit = current;
473
+ }
474
+ }
475
+ function progressEnd() {
476
+ if (process.stdout.isTTY) process.stdout.write(`\r\x1b[K`);
477
+ lastProgressNonTtyEmit = -1;
478
+ }
479
+ /**
394
480
  * Time a long-running step under the given scope, emitting `started` and
395
481
  * `finished in N.Ns` markers. Scope must be a tag the user wants to grep
396
482
  * for — typically "run" for vitest and "fix" for diagnose-loop steps.
@@ -408,6 +494,163 @@ async function timedPhase(label, fn, scope = "fix") {
408
494
  }
409
495
  }
410
496
  //#endregion
497
+ //#region src/spec/yaml-schema.ts
498
+ /**
499
+ * An action step: one user-facing browser interaction. `instruction` and
500
+ * `expected` are the natural-language description handed to Claude during
501
+ * `ccqa trace`. URLs live inside `instruction`, either verbatim or via
502
+ * `${ENV_VAR}` references (resolved at runtime).
503
+ */
504
+ const ActionStepSchema = z.object({
505
+ instruction: z.string().min(1),
506
+ expected: z.string().min(1)
507
+ }).strict();
508
+ /**
509
+ * An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
510
+ * `params` values are plain strings; env refs (`${VAR}`) inside them are
511
+ * resolved at expand time the same way step instructions are.
512
+ */
513
+ const IncludeStepSchema = z.object({
514
+ include: z.string().min(1),
515
+ params: z.record(z.string(), z.string()).optional()
516
+ }).strict();
517
+ /**
518
+ * A spec step is either an action step or an include step. The two are
519
+ * discriminated by the presence of the `include` key — see `isIncludeStep`.
520
+ */
521
+ const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
522
+ /** Top-level spec schema. `.strict()` rejects any unknown key. */
523
+ const TestSpecSchema = z.object({
524
+ title: z.string().min(1),
525
+ relatedPaths: z.array(z.string().min(1)).optional(),
526
+ steps: z.array(StepSchema).min(1)
527
+ }).strict();
528
+ /**
529
+ * A block param declaration. `required` defaults to true; only explicit
530
+ * `required: false` makes it optional. `secret: true` flags the value as
531
+ * sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
532
+ * template literals so the secret never ends up baked into test.spec.ts.
533
+ * `dummy` is a placeholder value surfaced by the draft / drift prompts
534
+ * (which see the block in isolation, before any include site exists);
535
+ * `description` is the param's semantic role, also consumed by those
536
+ * prompts and by spec authors browsing the block.
537
+ */
538
+ const BlockParamSchema = z.object({
539
+ name: z.string().min(1),
540
+ required: z.boolean().optional(),
541
+ secret: z.boolean().optional(),
542
+ dummy: z.string().optional(),
543
+ description: z.string().optional()
544
+ }).strict();
545
+ /**
546
+ * Block schema. Block steps are restricted to ActionStep — nested blocks are
547
+ * forbidden. Including a block from inside another block fails parsing here
548
+ * (the store layer maps the cryptic "Unrecognized key: 'include'" error into
549
+ * a targeted nested-block message).
550
+ */
551
+ const BlockSpecSchema = z.object({
552
+ title: z.string().min(1),
553
+ params: z.array(BlockParamSchema).optional(),
554
+ steps: z.array(ActionStepSchema).min(1)
555
+ }).strict();
556
+ /** Runtime predicate for the StepSchema union. */
557
+ function isIncludeStep(step) {
558
+ return "include" in step;
559
+ }
560
+ /** Returns true if a block param is required (default: true). */
561
+ function isParamRequired(param) {
562
+ return param.required !== false;
563
+ }
564
+ //#endregion
565
+ //#region src/types.ts
566
+ const RouteStepSchema = z.object({
567
+ title: z.string(),
568
+ action: z.string(),
569
+ observation: z.string(),
570
+ status: z.enum([
571
+ "PASSED",
572
+ "FAILED",
573
+ "SKIPPED"
574
+ ]),
575
+ reason: z.string().optional()
576
+ });
577
+ z.object({
578
+ specName: z.string(),
579
+ timestamp: z.string(),
580
+ status: z.enum(["passed", "failed"]),
581
+ steps: z.array(RouteStepSchema)
582
+ });
583
+ /**
584
+ * Semantic locator strategies exposed by `agent-browser find`. Used by the
585
+ * `find_*` commands when a target cannot be uniquely picked out by the
586
+ * ALLOWED CSS forms (e.g. repeated `aria-label='1 reply'` rows where only
587
+ * "the last one" is meaningful).
588
+ *
589
+ * `first` / `last` / `nth` are positional helpers and their `findValue`
590
+ * carries an inner CSS selector; `nth` additionally needs `findIndex`. The
591
+ * remaining locators read `findValue` as the human-visible text/id.
592
+ * `role` may pair with `findName` to filter by accessible name.
593
+ */
594
+ const FIND_LOCATORS = [
595
+ "role",
596
+ "text",
597
+ "label",
598
+ "placeholder",
599
+ "alt",
600
+ "title",
601
+ "testid",
602
+ "first",
603
+ "last",
604
+ "nth"
605
+ ];
606
+ /**
607
+ * Actions reachable via `agent-browser find <locator> ... <action>`. Kept
608
+ * here next to the locator list so all `find_*` knowledge lives in one
609
+ * place — `cli/trace.ts`, `claude/invoke.ts`, and `runtime/replay-validate.ts`
610
+ * import these instead of redefining their own sets.
611
+ */
612
+ const FIND_ACTIONS = [
613
+ "click",
614
+ "dblclick",
615
+ "fill",
616
+ "type",
617
+ "hover",
618
+ "focus",
619
+ "check",
620
+ "uncheck"
621
+ ];
622
+ const DraftIssueSchema = z.object({
623
+ severity: z.enum([
624
+ "OK",
625
+ "WARN",
626
+ "ERROR"
627
+ ]),
628
+ category: z.enum([
629
+ "assertable",
630
+ "blocks",
631
+ "granularity",
632
+ "unimplemented"
633
+ ]),
634
+ stepId: z.string().nullable(),
635
+ message: z.string(),
636
+ detail: z.string().optional()
637
+ });
638
+ const DraftReportSchema = z.object({
639
+ issues: z.array(DraftIssueSchema),
640
+ patch: z.string()
641
+ });
642
+ const DRAFT_CATEGORY_LABEL = {
643
+ assertable: "Assertability",
644
+ blocks: "Block references",
645
+ granularity: "Step granularity",
646
+ unimplemented: "Unimplemented checks"
647
+ };
648
+ const DraftNamingSchema = z.object({
649
+ featureName: z.string().min(1),
650
+ specName: z.string().min(1),
651
+ reason: z.string().optional()
652
+ });
653
+ //#endregion
411
654
  //#region src/claude/invoke.ts
412
655
  function resolveModel(explicit) {
413
656
  if (explicit) return explicit;
@@ -444,12 +687,25 @@ async function invokeClaudeStreaming(options, onEvent) {
444
687
  if (typeof cmd !== "string") return {};
445
688
  if (isBlockedAbSubcommand(cmd)) return {
446
689
  decision: "block",
447
- reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait. Take a fresh snapshot to find the correct selector."
690
+ reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait, find (with role/text/label/placeholder/alt/title/testid/first/last/nth). Take a fresh snapshot to find the correct selector."
448
691
  };
449
692
  if (hasRefSelector(cmd)) return {
450
693
  decision: "block",
451
694
  reason: "@ref selectors (like @e14) are session-specific and change every run. They cannot be used in generated tests. Use one of the allowed selector formats instead: [aria-label='...'], text=..., [placeholder='...'], or [type='password']. Take a fresh snapshot and find the element's aria-label or visible text."
452
695
  };
696
+ const bareTag = findPositionalBareTag(cmd);
697
+ if (bareTag !== null) return {
698
+ decision: "block",
699
+ reason: `\`find ${bareTag.locator}\` with a bare tag selector (\`${bareTag.selector}\`) is rejected: it matches every <${bareTag.selector}> on the page and is non-deterministic on replay. Pass a specific attribute selector instead, e.g. \`find ${bareTag.locator} "[aria-label='...']" ${bareTag.action}\` or \`find ${bareTag.locator} "[data-qa='...']" ${bareTag.action}\`. Take a fresh snapshot to find the right attribute.`
700
+ };
701
+ if (hasMultipleAbInvocations(cmd)) return {
702
+ decision: "block",
703
+ reason: "Run each `agent-browser` call as its own Bash command. Chaining multiple invocations with &&, ;, |, or || prevents ccqa from recording them as discrete steps and lets failed attempts leak into the trace. Issue one Bash tool call per agent-browser command."
704
+ };
705
+ if (hasErrorSuppression(cmd)) return {
706
+ decision: "block",
707
+ reason: "Do not suppress errors on `agent-browser` commands. Remove `|| true`, `|| :`, `2>/dev/null`, `; true`, and similar redirects so ccqa can detect failures and roll back unsuccessful attempts. Run the command standalone and let it surface its exit code."
708
+ };
453
709
  const ab = extractAbActionFromBashCommand(cmd);
454
710
  if (ab && onAbAction) {
455
711
  lastAbToolUseId = input.tool_use_id;
@@ -496,7 +752,6 @@ async function invokeClaudeStreaming(options, onEvent) {
496
752
  const BLOCKED_AB_SUBCOMMANDS = new Set([
497
753
  "eval",
498
754
  "js",
499
- "find",
500
755
  "label",
501
756
  "textbox"
502
757
  ]);
@@ -557,6 +812,33 @@ function isBashToolResponseError(tool_response) {
557
812
  if (r["killed"] === true) return true;
558
813
  return false;
559
814
  }
815
+ /**
816
+ * Detect `agent-browser ... find first|last|nth <bare-tag> <action>`. A bare
817
+ * tag inside a *positional* finder matches every element of that tag on the
818
+ * page, so "the last button" picks a different element whenever the page
819
+ * shape shifts — recorded tests built on top are flaky by construction. The
820
+ * check is narrow on purpose: `find role button --name X` is fine because
821
+ * role + accessible name stays stable.
822
+ */
823
+ function findPositionalBareTag(cmd) {
824
+ if (extractAbSubcommand(cmd) !== "find") return null;
825
+ const abIdx = cmd.indexOf("agent-browser");
826
+ const parts = shellTokenize(cmd.slice(abIdx + 13).trim());
827
+ let i = 0;
828
+ while (i < parts.length && parts[i].startsWith("-")) i += 2;
829
+ const locator = parts[i + 1];
830
+ if (locator !== "first" && locator !== "last" && locator !== "nth") return null;
831
+ const innerIdx = locator === "nth" ? i + 3 : i + 2;
832
+ const inner = parts[innerIdx];
833
+ const action = parts[innerIdx + 1] ?? "";
834
+ if (!inner) return null;
835
+ if (!/^[a-zA-Z][a-zA-Z0-9]*$/.test(inner)) return null;
836
+ return {
837
+ locator,
838
+ selector: inner,
839
+ action
840
+ };
841
+ }
560
842
  /** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
561
843
  function hasRefSelector(cmd) {
562
844
  const abIdx = cmd.indexOf("agent-browser");
@@ -569,6 +851,69 @@ function hasRefSelector(cmd) {
569
851
  return false;
570
852
  }
571
853
  /**
854
+ * Returns true when `cmd` contains more than one `agent-browser` invocation
855
+ * chained together via shell operators (`&&`, `||`, `;`, `|`). The
856
+ * PreToolUse hook only records ONE AB_ACTION per Bash call, so chained
857
+ * invocations would silently drop every intermediate failure — turning
858
+ * "I tried four selectors before one worked" into a clean-looking trace
859
+ * with five orphaned actions that later fail at replay.
860
+ *
861
+ * The check tokenizes the command and counts `agent-browser` occurrences
862
+ * that appear at the start of a shell command (i.e. immediately after a
863
+ * statement separator or at index 0). String literals are honoured so
864
+ * `agent-browser fill 'agent-browser'` doesn't false-fire.
865
+ */
866
+ function hasMultipleAbInvocations(cmd) {
867
+ const boundaries = [0];
868
+ let quote = null;
869
+ for (let i = 0; i < cmd.length; i++) {
870
+ const ch = cmd[i];
871
+ if (quote) {
872
+ if (ch === quote) quote = null;
873
+ continue;
874
+ }
875
+ if (ch === "\"" || ch === "'" || ch === "`") {
876
+ quote = ch;
877
+ continue;
878
+ }
879
+ if (ch === ";" || ch === "|" || ch === "&") {
880
+ while (i + 1 < cmd.length && (cmd[i + 1] === "|" || cmd[i + 1] === "&" || cmd[i + 1] === ";")) i++;
881
+ boundaries.push(i + 1);
882
+ }
883
+ }
884
+ let count = 0;
885
+ for (const start of boundaries) {
886
+ let j = start;
887
+ while (j < cmd.length && (cmd[j] === " " || cmd[j] === " " || cmd[j] === "\n")) j++;
888
+ if (cmd.slice(j, j + 13) !== "agent-browser") continue;
889
+ const after = cmd[j + 13];
890
+ if (after !== void 0 && /[A-Za-z0-9_\-]/.test(after)) continue;
891
+ count++;
892
+ if (count > 1) return true;
893
+ }
894
+ return false;
895
+ }
896
+ /**
897
+ * Returns true when an `agent-browser` command in `cmd` has its exit
898
+ * status hidden by a shell decorator that would prevent ccqa from rolling
899
+ * back a failed attempt:
900
+ *
901
+ * - trailing `|| true` / `|| :` / `; true` (force exit 0)
902
+ * - `2>/dev/null` and friends (drop stderr, sometimes paired with `|| true`)
903
+ *
904
+ * The agent-browser command itself returns exit 1 on selector miss, so
905
+ * once one of these is present the PostToolUse hook sees `is_error=false`
906
+ * and the bad attempt sneaks into actions.json.
907
+ */
908
+ function hasErrorSuppression(cmd) {
909
+ if (cmd.indexOf("agent-browser") === -1) return false;
910
+ if (/\|\|\s*(true|:|\s*$|#)/.test(cmd)) return true;
911
+ if (/;\s*(true|:)\b/.test(cmd)) return true;
912
+ if (/2\s*>\s*\/dev\/null/.test(cmd)) return true;
913
+ if (/&\s*>\s*\/dev\/null/.test(cmd)) return true;
914
+ return false;
915
+ }
916
+ /**
572
917
  * Parse an `agent-browser --session <name> <cmd> [args...]` bash command
573
918
  * and return the corresponding AB_ACTION line, or null if not an agent-browser call.
574
919
  */
@@ -598,9 +943,56 @@ function extractAbActionFromBashCommand(cmd) {
598
943
  case "select": return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
599
944
  case "drag": return `AB_ACTION|drag|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
600
945
  case "snapshot": return null;
946
+ case "find": return extractFindAbAction(args);
601
947
  default: return null;
602
948
  }
603
949
  }
950
+ const FIND_ACTION_SET = new Set(FIND_ACTIONS);
951
+ const FIND_LOCATOR_SET = new Set(FIND_LOCATORS);
952
+ /**
953
+ * Parse the positional tokens of `agent-browser find <locator> <value> [...]
954
+ * <action> [fillValue]` and produce a canonical
955
+ * `AB_ACTION|find_<action>|<locator>|<value>|<extra>|<exact>|...|<label>`
956
+ * line. The wire format keeps a fixed positional layout across locators so
957
+ * downstream `parseAbAction` in `cli/trace.ts` can split on `|` alone:
958
+ *
959
+ * <extra> is `--name` value for role, integer index for nth, "" otherwise.
960
+ * <exact> is the literal "exact" if --exact was passed, "" otherwise.
961
+ *
962
+ * Returns null for malformed invocations — the caller treats null as "not a
963
+ * structured action" and the Bash command still runs unobserved.
964
+ */
965
+ function extractFindAbAction(args) {
966
+ const locator = args[0];
967
+ if (!locator || !FIND_LOCATOR_SET.has(locator)) return null;
968
+ let i = 1;
969
+ let value = args[i] ?? "";
970
+ i++;
971
+ let extra = "";
972
+ if (locator === "nth") {
973
+ extra = value;
974
+ value = args[i] ?? "";
975
+ i++;
976
+ }
977
+ let action = "";
978
+ let name = "";
979
+ let exact = "";
980
+ let fillValue = "";
981
+ for (; i < args.length; i++) {
982
+ const tok = args[i];
983
+ if (tok === "--name") {
984
+ name = args[i + 1] ?? "";
985
+ i++;
986
+ } else if (tok === "--exact") exact = "exact";
987
+ else if (FIND_ACTION_SET.has(tok)) action = tok;
988
+ else if (action) fillValue = tok;
989
+ }
990
+ if (!action) return null;
991
+ if (locator === "role") extra = name;
992
+ const command = `find_${action}`;
993
+ if (action === "fill" || action === "type") return `AB_ACTION|${command}|${locator}|${value}|${extra}|${exact}|${fillValue}|`;
994
+ return `AB_ACTION|${command}|${locator}|${value}|${extra}|${exact}|`;
995
+ }
604
996
  async function buildMessageStream(prompt, options) {
605
997
  const mockFile = process.env["CCQA_CLAUDE_MOCK_FILE"];
606
998
  if (mockFile) return replayMockMessages(mockFile);
@@ -634,6 +1026,20 @@ function substituteVars(value, lookup) {
634
1026
  });
635
1027
  }
636
1028
  /**
1029
+ * Iterate every `${NAME}` / `$NAME` reference name (case-insensitive form)
1030
+ * appearing in `value`. Used by callers that want to enumerate refs without
1031
+ * also substituting, e.g. the env-scrub map builder. The reference name
1032
+ * grammar is the canonical one shared with `substituteVars`.
1033
+ */
1034
+ function* iterEnvRefNames(value) {
1035
+ ANY_VAR_RE.lastIndex = 0;
1036
+ let m;
1037
+ while ((m = ANY_VAR_RE.exec(value)) !== null) {
1038
+ const name = m[1] ?? m[2];
1039
+ if (name) yield name;
1040
+ }
1041
+ }
1042
+ /**
637
1043
  * Resolve every `$VAR` / `${VAR}` reference against the current process env.
638
1044
  *
639
1045
  * Missing variables expand to the empty string, mirroring `sh` behaviour.
@@ -692,74 +1098,6 @@ function refsToJsExpression(value, nameToExpr) {
692
1098
  })}\``;
693
1099
  }
694
1100
  //#endregion
695
- //#region src/spec/yaml-schema.ts
696
- /**
697
- * An action step: one user-facing browser interaction. `instruction` and
698
- * `expected` are the natural-language description handed to Claude during
699
- * `ccqa trace`. URLs live inside `instruction`, either verbatim or via
700
- * `${ENV_VAR}` references (resolved at runtime).
701
- */
702
- const ActionStepSchema = z.object({
703
- instruction: z.string().min(1),
704
- expected: z.string().min(1)
705
- }).strict();
706
- /**
707
- * An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
708
- * `params` values are plain strings; env refs (`${VAR}`) inside them are
709
- * resolved at expand time the same way step instructions are.
710
- */
711
- const IncludeStepSchema = z.object({
712
- include: z.string().min(1),
713
- params: z.record(z.string(), z.string()).optional()
714
- }).strict();
715
- /**
716
- * A spec step is either an action step or an include step. The two are
717
- * discriminated by the presence of the `include` key — see `isIncludeStep`.
718
- */
719
- const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
720
- /** Top-level spec schema. `.strict()` rejects any unknown key. */
721
- const TestSpecSchema = z.object({
722
- title: z.string().min(1),
723
- relatedPaths: z.array(z.string().min(1)).optional(),
724
- steps: z.array(StepSchema).min(1)
725
- }).strict();
726
- /**
727
- * A block param declaration. `required` defaults to true; only explicit
728
- * `required: false` makes it optional. `secret: true` flags the value as
729
- * sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
730
- * template literals so the secret never ends up baked into test.spec.ts.
731
- * `dummy` is a placeholder value surfaced by the draft / drift prompts
732
- * (which see the block in isolation, before any include site exists);
733
- * `description` is the param's semantic role, also consumed by those
734
- * prompts and by spec authors browsing the block.
735
- */
736
- const BlockParamSchema = z.object({
737
- name: z.string().min(1),
738
- required: z.boolean().optional(),
739
- secret: z.boolean().optional(),
740
- dummy: z.string().optional(),
741
- description: z.string().optional()
742
- }).strict();
743
- /**
744
- * Block schema. Block steps are restricted to ActionStep — nested blocks are
745
- * forbidden. Including a block from inside another block fails parsing here
746
- * (the store layer maps the cryptic "Unrecognized key: 'include'" error into
747
- * a targeted nested-block message).
748
- */
749
- const BlockSpecSchema = z.object({
750
- title: z.string().min(1),
751
- params: z.array(BlockParamSchema).optional(),
752
- steps: z.array(ActionStepSchema).min(1)
753
- }).strict();
754
- /** Runtime predicate for the StepSchema union. */
755
- function isIncludeStep(step) {
756
- return "include" in step;
757
- }
758
- /** Returns true if a block param is required (default: true). */
759
- function isParamRequired(param) {
760
- return param.required !== false;
761
- }
762
- //#endregion
763
1101
  //#region src/spec/parser.ts
764
1102
  /** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
765
1103
  function parseTestSpec(content, source = "spec.yaml") {
@@ -994,6 +1332,32 @@ async function loadAvailableBlocks(cwd) {
994
1332
  }))
995
1333
  }));
996
1334
  }
1335
+ const TRACE_USER_PROMPT_PATH = ".ccqa/prompts/trace.user.md";
1336
+ const TRACE_USER_PROMPT_MAX_BYTES = 32768;
1337
+ /**
1338
+ * Load project-specific guidance to append to the trace system prompt.
1339
+ *
1340
+ * Returns the file's contents (trimmed) when `.ccqa/prompts/trace.user.md`
1341
+ * exists and is non-empty. Missing file, empty file, or read error all
1342
+ * resolve to `null` so callers can treat the override as strictly optional.
1343
+ *
1344
+ * The file is meant for organisation-specific rules that don't belong in
1345
+ * the OSS-default prompt — naming conventions, staging URL hints, repeated
1346
+ * UI quirks that recur across specs. Anything that genuinely belongs in
1347
+ * one spec should go in that spec's instruction, not here.
1348
+ *
1349
+ * Size-capped at 32 KiB to keep accidental commits of huge files from
1350
+ * blowing up the system prompt; the cap is observable to callers as a
1351
+ * truncated warning suffix.
1352
+ */
1353
+ async function loadTraceUserPrompt(cwd) {
1354
+ const content = await readFile(join(cwd ?? process.cwd(), TRACE_USER_PROMPT_PATH), "utf-8").catch(() => null);
1355
+ if (content === null) return null;
1356
+ const trimmed = content.trim();
1357
+ if (trimmed.length === 0) return null;
1358
+ if (trimmed.length > TRACE_USER_PROMPT_MAX_BYTES) return trimmed.slice(0, TRACE_USER_PROMPT_MAX_BYTES) + `\n\n[ccqa] (trace.user.md truncated at ${TRACE_USER_PROMPT_MAX_BYTES} bytes)`;
1359
+ return trimmed;
1360
+ }
997
1361
  /**
998
1362
  * Probe for orphaned files left over from earlier ccqa versions inside
999
1363
  * `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
@@ -1246,6 +1610,33 @@ function formatAgentBrowserUnavailableMessage() {
1246
1610
  }
1247
1611
  //#endregion
1248
1612
  //#region src/runtime/replay-validate.ts
1613
+ function isPollCheck(x) {
1614
+ return x !== null && !Array.isArray(x) && x.kind === "poll-present";
1615
+ }
1616
+ const SELECTOR_POLL_INTERVAL_MS = 500;
1617
+ /** Poll `get count <selector>` until it matches (>=1) or the timeout elapses. */
1618
+ function runPollCheck(check, sessionName) {
1619
+ const deadline = Date.now() + check.timeoutMs;
1620
+ for (;;) {
1621
+ const r = spawnAB([
1622
+ "--session",
1623
+ sessionName,
1624
+ "get",
1625
+ "count",
1626
+ check.selector
1627
+ ]);
1628
+ const count = r.status === 0 ? Number.parseInt(r.stdout.trim(), 10) : NaN;
1629
+ if (!Number.isNaN(count) && count > 0) return {
1630
+ ok: true,
1631
+ reason: ""
1632
+ };
1633
+ if (Date.now() >= deadline) return {
1634
+ ok: false,
1635
+ reason: `selector not present within ${check.timeoutMs}ms (get count returned ${Number.isNaN(count) ? "error" : count})`
1636
+ };
1637
+ sleepSync(SELECTOR_POLL_INTERVAL_MS);
1638
+ }
1639
+ }
1249
1640
  const SHORT_TIMEOUT_MS = 5e3;
1250
1641
  const ASSERT_TIMEOUT_MS = 1e4;
1251
1642
  /**
@@ -1330,6 +1721,7 @@ function actionToAbArgs(action, sessionName) {
1330
1721
  const raw = sub(action.selector);
1331
1722
  if (!raw) return null;
1332
1723
  if (/^\d+$/.test(raw)) return null;
1724
+ if (raw.startsWith("--")) return null;
1333
1725
  if (raw.startsWith("text=")) return [
1334
1726
  ...base,
1335
1727
  "wait",
@@ -1338,18 +1730,47 @@ function actionToAbArgs(action, sessionName) {
1338
1730
  "--timeout",
1339
1731
  String(SHORT_TIMEOUT_MS)
1340
1732
  ];
1341
- return [
1342
- ...base,
1343
- "wait",
1344
- raw,
1345
- "--timeout",
1346
- String(SHORT_TIMEOUT_MS)
1347
- ];
1733
+ return {
1734
+ kind: "poll-present",
1735
+ selector: raw,
1736
+ timeoutMs: SHORT_TIMEOUT_MS
1737
+ };
1348
1738
  }
1349
1739
  case "snapshot": return null;
1350
1740
  case "assert": return assertToAbArgs(action, sub, sessionName);
1741
+ case "find_click":
1742
+ case "find_dblclick":
1743
+ case "find_hover":
1744
+ case "find_focus":
1745
+ case "find_check":
1746
+ case "find_uncheck": return buildFindArgs$1(action, void 0, sub, base);
1747
+ case "find_fill":
1748
+ case "find_type": return buildFindArgs$1(action, sub(action.value), sub, base);
1351
1749
  }
1352
1750
  }
1751
+ /**
1752
+ * Build the agent-browser argv for a recorded `find_*` action. Mirrors the
1753
+ * codegen shape in `actions-to-script.ts:buildFindArgs` but emits a plain
1754
+ * string array. Env refs in `findValue` / `findName` resolve through `sub`
1755
+ * so the validator hits the same DOM the generated test will.
1756
+ */
1757
+ function buildFindArgs$1(action, fillValue, sub, base) {
1758
+ const locator = action.findLocator;
1759
+ if (!locator || !action.findValue) return null;
1760
+ const innerAction = action.command.slice(5).replace("type", "fill");
1761
+ const out = [
1762
+ ...base,
1763
+ "find",
1764
+ locator
1765
+ ];
1766
+ if (locator === "nth") out.push(String(action.findIndex ?? 0));
1767
+ out.push(sub(action.findValue));
1768
+ out.push(innerAction);
1769
+ if (fillValue !== void 0) out.push(fillValue);
1770
+ if (locator === "role" && action.findName) out.push("--name", sub(action.findName));
1771
+ if (action.findExact) out.push("--exact");
1772
+ return out;
1773
+ }
1353
1774
  function assertToAbArgs(action, sub, sessionName) {
1354
1775
  const base = ["--session", sessionName];
1355
1776
  const val = sub(action.value ?? action.observation);
@@ -1368,13 +1789,11 @@ function assertToAbArgs(action, sub, sessionName) {
1368
1789
  case "text_not_visible": return null;
1369
1790
  case "element_visible":
1370
1791
  if (!sel) return null;
1371
- return [
1372
- ...base,
1373
- "wait",
1374
- sel,
1375
- "--timeout",
1376
- String(ASSERT_TIMEOUT_MS)
1377
- ];
1792
+ return {
1793
+ kind: "poll-present",
1794
+ selector: sel,
1795
+ timeoutMs: ASSERT_TIMEOUT_MS
1796
+ };
1378
1797
  case "element_not_visible": return null;
1379
1798
  case "url_contains": return null;
1380
1799
  case "element_enabled":
@@ -1382,23 +1801,59 @@ function assertToAbArgs(action, sub, sessionName) {
1382
1801
  case "element_checked":
1383
1802
  case "element_unchecked":
1384
1803
  if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
1385
- return [
1386
- ...base,
1387
- "wait",
1388
- sel,
1389
- "--timeout",
1390
- String(ASSERT_TIMEOUT_MS)
1391
- ];
1804
+ return {
1805
+ kind: "poll-present",
1806
+ selector: sel,
1807
+ timeoutMs: ASSERT_TIMEOUT_MS
1808
+ };
1392
1809
  default: return null;
1393
1810
  }
1394
1811
  }
1812
+ const NO_STEP_ID = "__no_step__";
1813
+ /**
1814
+ * Replay one recorded action against the validation session. Element-presence
1815
+ * checks go through `runPollCheck` (which uses `get count`, never the blocking
1816
+ * `wait <selector>`); everything else spawns the agent-browser argv. A single
1817
+ * hard-timeout (SIGTERM) retry covers the daemon's occasional under-load drop.
1818
+ */
1819
+ function runValidationAction(action, sessionName) {
1820
+ const built = actionToAbArgs(action, sessionName);
1821
+ if (built === null) return {
1822
+ skipped: true,
1823
+ ok: false,
1824
+ reason: ""
1825
+ };
1826
+ if (isPollCheck(built)) {
1827
+ const { ok, reason } = runPollCheck(built, sessionName);
1828
+ return {
1829
+ skipped: false,
1830
+ ok,
1831
+ reason
1832
+ };
1833
+ }
1834
+ let result = spawnAB(built);
1835
+ if (result.status !== 0 && looksLikeHardTimeout(result)) result = spawnAB(built);
1836
+ if (result.status === 0) return {
1837
+ skipped: false,
1838
+ ok: true,
1839
+ reason: ""
1840
+ };
1841
+ return {
1842
+ skipped: false,
1843
+ ok: false,
1844
+ reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
1845
+ };
1846
+ }
1395
1847
  function validateActions(actions, opts) {
1396
1848
  const kept = [];
1397
1849
  const dropped = [];
1398
- let skipUntilSideEffect = false;
1850
+ let skipFromStepId = null;
1399
1851
  for (let i = 0; i < actions.length; i++) {
1400
1852
  const action = actions[i];
1401
- if (skipUntilSideEffect && isPassiveCommand(action.command)) {
1853
+ opts.onProgress?.(i, actions.length, action);
1854
+ const stepId = action.stepId ?? NO_STEP_ID;
1855
+ if (skipFromStepId !== null && skipFromStepId !== stepId) skipFromStepId = null;
1856
+ if (skipFromStepId !== null && isPassiveCommand(action.command)) {
1402
1857
  dropped.push({
1403
1858
  index: i,
1404
1859
  action,
@@ -1406,28 +1861,111 @@ function validateActions(actions, opts) {
1406
1861
  });
1407
1862
  continue;
1408
1863
  }
1409
- skipUntilSideEffect = false;
1410
- const args = actionToAbArgs(action, opts.sessionName);
1411
- if (args === null) {
1864
+ const outcome = runValidationAction(action, opts.sessionName);
1865
+ if (outcome.skipped) {
1412
1866
  kept.push(action);
1413
1867
  continue;
1414
1868
  }
1415
- const result = spawnAB(args);
1416
- if (result.status === 0) {
1869
+ if (outcome.ok) {
1417
1870
  kept.push(action);
1871
+ if (skipFromStepId !== null && !isPassiveCommand(action.command)) skipFromStepId = null;
1418
1872
  continue;
1419
1873
  }
1420
1874
  dropped.push({
1421
1875
  index: i,
1422
1876
  action,
1423
- reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
1877
+ reason: outcome.reason
1424
1878
  });
1425
- skipUntilSideEffect = true;
1879
+ if (!isPassiveCommand(action.command)) skipFromStepId = stepId;
1880
+ }
1881
+ return splitByMode(actions, rescueLostSteps(actions, kept, dropped, opts), opts.mode ?? "lenient");
1882
+ }
1883
+ /**
1884
+ * Translate the internal `{ kept, dropped }` result of the rescue pass
1885
+ * into the public-facing shape. In strict mode the caller sees the same
1886
+ * shape as before (kept/dropped); in lenient mode the still-failed
1887
+ * actions move to `unstable` with `replayUnstable: true` tagged on, so
1888
+ * codegen can warn about them while still emitting the line.
1889
+ */
1890
+ function splitByMode(originalActions, result, mode) {
1891
+ if (mode === "strict") return {
1892
+ kept: result.kept,
1893
+ unstable: [],
1894
+ dropped: result.dropped,
1895
+ rescuedSteps: result.rescuedSteps
1896
+ };
1897
+ const droppedByIndex = new Map(result.dropped.map((d) => [d.index, d]));
1898
+ const keptSet = new Set(result.kept);
1899
+ const finalKept = [];
1900
+ const unstable = [];
1901
+ for (let i = 0; i < originalActions.length; i++) {
1902
+ const action = originalActions[i];
1903
+ if (keptSet.has(action)) {
1904
+ finalKept.push(action);
1905
+ continue;
1906
+ }
1907
+ const drop = droppedByIndex.get(i);
1908
+ if (drop) {
1909
+ action.replayUnstable = true;
1910
+ action.replayReason = drop.reason;
1911
+ unstable.push(action);
1912
+ }
1426
1913
  }
1427
1914
  return {
1915
+ kept: finalKept,
1916
+ unstable,
1917
+ dropped: [],
1918
+ rescuedSteps: result.rescuedSteps
1919
+ };
1920
+ }
1921
+ function rescueLostSteps(actions, kept, dropped, opts) {
1922
+ const stepsWithSurvivors = /* @__PURE__ */ new Set();
1923
+ for (const a of kept) if (a.stepId) stepsWithSurvivors.add(a.stepId);
1924
+ const lostStepDrops = /* @__PURE__ */ new Map();
1925
+ for (const d of dropped) {
1926
+ const id = d.action.stepId;
1927
+ if (!id || stepsWithSurvivors.has(id)) continue;
1928
+ const list = lostStepDrops.get(id) ?? [];
1929
+ list.push(d);
1930
+ lostStepDrops.set(id, list);
1931
+ }
1932
+ if (lostStepDrops.size === 0) return {
1428
1933
  kept,
1429
1934
  dropped
1430
1935
  };
1936
+ const rescuedIndices = /* @__PURE__ */ new Set();
1937
+ const rescuedSteps = [];
1938
+ for (const [stepId, drops] of lostStepDrops.entries()) {
1939
+ let anyForThisStep = false;
1940
+ for (const d of drops) {
1941
+ const outcome = runValidationAction(d.action, opts.sessionName);
1942
+ if (outcome.skipped) continue;
1943
+ if (outcome.ok) {
1944
+ rescuedIndices.add(d.index);
1945
+ anyForThisStep = true;
1946
+ }
1947
+ }
1948
+ if (anyForThisStep) rescuedSteps.push(stepId);
1949
+ }
1950
+ if (rescuedIndices.size === 0) return {
1951
+ kept,
1952
+ dropped
1953
+ };
1954
+ const keptSet = new Set(kept);
1955
+ const newKept = [];
1956
+ for (let i = 0; i < actions.length; i++) {
1957
+ const action = actions[i];
1958
+ if (rescuedIndices.has(i) || keptSet.has(action)) newKept.push(action);
1959
+ }
1960
+ return {
1961
+ kept: newKept,
1962
+ dropped: dropped.filter((d) => !rescuedIndices.has(d.index)),
1963
+ rescuedSteps
1964
+ };
1965
+ }
1966
+ /** Did this agent-browser invocation get SIGTERM'd by the ccqa hard-timeout watchdog? */
1967
+ function looksLikeHardTimeout(result) {
1968
+ return result.stderr.includes("agent-browser killed after hard timeout");
1431
1969
  }
1432
1970
  /**
1433
1971
  * Passive (read-only) commands whose only effect is observation. When a
@@ -1438,12 +1976,228 @@ function isPassiveCommand(cmd) {
1438
1976
  return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
1439
1977
  }
1440
1978
  //#endregion
1979
+ //#region src/runtime/env-scrub.ts
1980
+ /**
1981
+ * Build a list of `[envValue, "${VAR}"]` pairs for every `${VAR}` reference
1982
+ * mentioned in the spec OR in any of its expanded (block-inlined) steps.
1983
+ * Used at trace time to scrub recorded Claude-text outputs so a value the
1984
+ * spec author intentionally threaded through `process.env` is preserved as
1985
+ * `${VAR}` in `actions.json` rather than baked in as the concrete
1986
+ * trace-time value.
1987
+ *
1988
+ * Why we walk `spec.steps` AND `expanded`:
1989
+ * - `spec.steps` carries the spec's own `instruction` / `expected` + each
1990
+ * include's raw `params` (which may themselves be `${ENV}` refs).
1991
+ * - `expanded` carries the inlined block-internal steps, whose
1992
+ * `instruction` / `expected` may *also* contain `${ENV}` refs that
1993
+ * don't go through include params.
1994
+ *
1995
+ * Only refs whose env value is currently non-empty land in the map —
1996
+ * scrubbing against an empty string would corrupt unrelated empty strings
1997
+ * in the action stream. Names whose env is unset are returned via
1998
+ * `unresolved` so the caller can warn the user.
1999
+ *
2000
+ * Longer values sort first so a `${SHORT}` whose value is a substring of a
2001
+ * `${LONG}` value doesn't clobber the longer one.
2002
+ *
2003
+ * `title` and `relatedPaths` are deliberately NOT scanned — they never
2004
+ * reach the recorded action stream.
2005
+ */
2006
+ function buildSpecEnvScrub(spec, expanded) {
2007
+ const refNames = /* @__PURE__ */ new Set();
2008
+ for (const step of spec.steps) if (isIncludeStep(step)) for (const v of Object.values(step.params ?? {})) collect(v, refNames);
2009
+ else {
2010
+ collect(step.instruction, refNames);
2011
+ collect(step.expected, refNames);
2012
+ }
2013
+ for (const step of expanded) {
2014
+ collect(step.instruction, refNames);
2015
+ collect(step.expected, refNames);
2016
+ }
2017
+ const map = [];
2018
+ const unresolved = [];
2019
+ for (const name of refNames) {
2020
+ const value = process.env[name];
2021
+ if (typeof value === "string" && value.length > 0) map.push([value, "${" + name + "}"]);
2022
+ else unresolved.push(name);
2023
+ }
2024
+ map.sort((a, b) => b[0].length - a[0].length);
2025
+ return {
2026
+ map,
2027
+ unresolved
2028
+ };
2029
+ }
2030
+ function collect(value, into) {
2031
+ for (const name of iterEnvRefNames(value)) into.add(name);
2032
+ }
2033
+ /**
2034
+ * Replace every occurrence of an env value with its `${VAR}` placeholder in
2035
+ * `text`. **Caller invariant**: the map must be sorted longest-value-first
2036
+ * so a shorter value doesn't shadow a longer one that contains it as a
2037
+ * substring. `buildSpecEnvScrub` upholds this; hand-built maps should too.
2038
+ */
2039
+ function scrubEnvValues(text, scrubMap) {
2040
+ if (scrubMap.length === 0) return text;
2041
+ let out = text;
2042
+ for (const [value, placeholder] of scrubMap) if (out.includes(value)) out = out.replaceAll(value, placeholder);
2043
+ return out;
2044
+ }
2045
+ //#endregion
2046
+ //#region src/runtime/literal-scrub.ts
2047
+ /**
2048
+ * Patterns are listed in roughly descending confidence — a hit on `clock-hms`
2049
+ * is almost certainly bad; a hit on `unix-epoch-sec` (`1[0-9]{9}`) gates on
2050
+ * the value starting with `1`, which empirically rules out most SKU / order-id
2051
+ * false positives while still catching epoch seconds in the 2001-2033 window.
2052
+ *
2053
+ * Relative-time labels ("just now", "N minutes ago", "N分前") are the same
2054
+ * class of problem as wall-clock literals: the page shows them, Claude
2055
+ * captures them, and they're stale before the test ever replays. We only
2056
+ * catch the unambiguous variants — bare "now" or "minute" would false-fire
2057
+ * on routine UI copy.
2058
+ */
2059
+ const UNSTABLE_PATTERNS = [
2060
+ {
2061
+ id: "clock-hms",
2062
+ pattern: /\b\d{2}:\d{2}:\d{2}\b/,
2063
+ label: "clock time HH:MM:SS"
2064
+ },
2065
+ {
2066
+ id: "iso-datetime",
2067
+ pattern: /\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/,
2068
+ label: "ISO datetime"
2069
+ },
2070
+ {
2071
+ id: "iso-date",
2072
+ pattern: /\b\d{4}-\d{2}-\d{2}\b/,
2073
+ label: "ISO date YYYY-MM-DD"
2074
+ },
2075
+ {
2076
+ id: "unix-epoch-sec",
2077
+ pattern: /\b1[0-9]{9}\b/,
2078
+ label: "Unix epoch seconds"
2079
+ },
2080
+ {
2081
+ id: "unix-epoch-ms",
2082
+ pattern: /\b1[0-9]{12}\b/,
2083
+ label: "Unix epoch milliseconds"
2084
+ },
2085
+ {
2086
+ id: "relative-time-en",
2087
+ pattern: /\b\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago\b/i,
2088
+ label: "English relative time (`N <unit> ago`)"
2089
+ },
2090
+ {
2091
+ id: "relative-now-en",
2092
+ pattern: /\bjust\s+now\b/i,
2093
+ label: "English `just now`"
2094
+ },
2095
+ {
2096
+ id: "relative-time-ja",
2097
+ pattern: /\d+\s*(秒|分|時間|日|週間|か月|ヶ月|年)前/,
2098
+ label: "Japanese relative time (`N<unit>前`)"
2099
+ },
2100
+ {
2101
+ id: "relative-now-ja",
2102
+ pattern: /たった今/,
2103
+ label: "Japanese `たった今`"
2104
+ },
2105
+ {
2106
+ id: "ja-date-full",
2107
+ pattern: /\d{4}年\d{1,2}月\d{1,2}日/,
2108
+ label: "Japanese date YYYY年M月D日"
2109
+ },
2110
+ {
2111
+ id: "ja-date-md",
2112
+ pattern: /(?<!年)(?<!\d)\d{1,2}月\d{1,2}日(?![間目])/,
2113
+ label: "Japanese date M月D日"
2114
+ }
2115
+ ];
2116
+ const SCANNABLE_FIELDS = [
2117
+ "selector",
2118
+ "value",
2119
+ "label",
2120
+ "target",
2121
+ "observation",
2122
+ "findValue",
2123
+ "findName"
2124
+ ];
2125
+ /**
2126
+ * Inspect a single action and return every (field, pattern) pair that
2127
+ * fired. An empty array means the action is safe to keep.
2128
+ */
2129
+ function detectUnstableLiterals(action) {
2130
+ const hits = [];
2131
+ for (const field of SCANNABLE_FIELDS) {
2132
+ const raw = action[field];
2133
+ if (typeof raw !== "string" || raw.length === 0) continue;
2134
+ for (const p of UNSTABLE_PATTERNS) {
2135
+ const m = raw.match(p.pattern);
2136
+ if (m) hits.push({
2137
+ field,
2138
+ patternId: p.id,
2139
+ match: m[0]
2140
+ });
2141
+ }
2142
+ }
2143
+ return hits;
2144
+ }
2145
+ /**
2146
+ * Walk every recorded action and split it into kept / dropped buckets. A
2147
+ * `snapshot` action is treated specially: its `observation` field is just a
2148
+ * comment in the generated script, so we keep the action even if its
2149
+ * `observation` carries an unstable literal — the comment will be wrong but
2150
+ * the script will still run. All other commands get dropped on any hit
2151
+ * because their `selector` / `value` would otherwise drive an
2152
+ * unreproducible interaction.
2153
+ */
2154
+ function scrubUnstableActions(actions) {
2155
+ const kept = [];
2156
+ const dropped = [];
2157
+ for (let i = 0; i < actions.length; i++) {
2158
+ const action = actions[i];
2159
+ const hits = detectUnstableLiterals(action);
2160
+ if (hits.length === 0) {
2161
+ kept.push(action);
2162
+ continue;
2163
+ }
2164
+ if (action.command === "snapshot" && hits.every((h) => h.field === "observation")) {
2165
+ kept.push(action);
2166
+ continue;
2167
+ }
2168
+ dropped.push({
2169
+ index: i,
2170
+ action,
2171
+ hits
2172
+ });
2173
+ }
2174
+ return {
2175
+ kept,
2176
+ dropped
2177
+ };
2178
+ }
2179
+ /**
2180
+ * Human-readable summary of one drop, suitable for `log.warn`. The format
2181
+ * mirrors `replay-validate`'s drop line so both sources of warnings look
2182
+ * uniform in the trace output.
2183
+ */
2184
+ function formatUnstableDrop(drop) {
2185
+ const { action, hits } = drop;
2186
+ const ids = [...new Set(hits.map((h) => h.patternId))].join(", ");
2187
+ const samples = hits.map((h) => `${h.field}="${h.match}"`).join(", ");
2188
+ return `${`${action.command}${action.assertType ? " " + action.assertType : ""}`}: contains unstable literal (${ids}) — ${samples}`;
2189
+ }
2190
+ //#endregion
1441
2191
  //#region src/cli/trace.ts
1442
- const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (specPath, opts) => {
2192
+ const VALIDATION_MODES = ["lenient", "strict"];
2193
+ const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions with a warning but keeps them; 'strict' drops them from actions.json.", (raw) => {
2194
+ if (VALIDATION_MODES.includes(raw)) return raw;
2195
+ throw new Error(`--validation-mode must be one of ${VALIDATION_MODES.join(" | ")}`);
2196
+ }, "lenient").action(async (specPath, opts) => {
1443
2197
  const { featureName, specName } = parseSpecPath(specPath);
1444
- await runTrace(featureName, specName, opts.model);
2198
+ await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient");
1445
2199
  });
1446
- async function runTrace(featureName, specName, model) {
2200
+ async function runTrace(featureName, specName, model, validationMode = "lenient") {
1447
2201
  header("trace", `${featureName}/${specName}`);
1448
2202
  try {
1449
2203
  meta("agent-browser", assertAgentBrowserAvailable());
@@ -1458,17 +2212,23 @@ async function runTrace(featureName, specName, model) {
1458
2212
  await warnStaleBlockArtifacts();
1459
2213
  const spec = parseTestSpec(await readSpecFile(featureName, specName));
1460
2214
  const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
2215
+ const envScrub = buildSpecEnvScrub(spec, expanded);
2216
+ const envScrubMap = envScrub.map;
2217
+ if (envScrub.unresolved.length > 0) warn(`spec references env var(s) with empty/unset values: ${envScrub.unresolved.join(", ")} — their literal trace-time values will be baked into actions.json`);
1461
2218
  meta("spec", spec.title);
1462
2219
  meta("steps", expanded.length);
1463
2220
  const includes = collectIncludedBlockNames(spec);
1464
2221
  if (includes.length > 0) meta("blocks", includes.join(", "));
1465
2222
  blank();
1466
2223
  const sessionName = generateSessionName();
1467
- const systemPrompt = buildTraceSystemPrompt({
2224
+ const baseSystemPrompt = buildTraceSystemPrompt({
1468
2225
  title: spec.title,
1469
2226
  steps: expanded,
1470
2227
  sessionName
1471
2228
  });
2229
+ const userPrompt = await loadTraceUserPrompt();
2230
+ if (userPrompt !== null) meta("user-prompt", ".ccqa/prompts/trace.user.md");
2231
+ const systemPrompt = userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`;
1472
2232
  const prompt = buildTracePrompt(spec.title);
1473
2233
  info("Running agent-browser session...");
1474
2234
  blank();
@@ -1499,7 +2259,7 @@ async function runTrace(featureName, specName, model) {
1499
2259
  },
1500
2260
  model,
1501
2261
  onAbAction: (abAction) => {
1502
- const action = withStepId(parseAbAction(abAction));
2262
+ const action = withStepId(parseAbAction(scrubEnvValues(abAction, envScrubMap)));
1503
2263
  if (action) traceActions.push(action);
1504
2264
  },
1505
2265
  onAbActionFailed: () => {
@@ -1530,14 +2290,14 @@ async function runTrace(featureName, specName, model) {
1530
2290
  if (routeStep.status === "FAILED") overallStatus = "failed";
1531
2291
  }
1532
2292
  } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
1533
- const action = withStepId(parseAbAction(trimmed));
2293
+ const action = withStepId(parseAbAction(scrubEnvValues(trimmed, envScrubMap)));
1534
2294
  if (action) traceActions.push(action);
1535
2295
  }
1536
2296
  }
1537
2297
  }
1538
2298
  });
1539
2299
  if (isError) overallStatus = "failed";
1540
- const validatedActions = validateAndReport(traceActions);
2300
+ const validatedActions = validateAndReport(dedupAndReport(scrubAndReport(traceActions)), validationMode);
1541
2301
  const route = {
1542
2302
  specName,
1543
2303
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -1558,24 +2318,194 @@ async function runTrace(featureName, specName, model) {
1558
2318
  hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
1559
2319
  }
1560
2320
  /**
2321
+ * Strip actions whose recorded fields contain "unstable literal" values
2322
+ * (clock readings, ISO datetimes, Unix-epoch IDs) that Claude baked into
2323
+ * the trace despite not coming through `${ENV_VAR}`. These would otherwise
2324
+ * pin the generated test to a single run. Reported the same way as
2325
+ * `validateAndReport` so users see one uniform "dropped" surface.
2326
+ */
2327
+ function scrubAndReport(actions) {
2328
+ if (actions.length === 0) return actions;
2329
+ const { kept, dropped } = scrubUnstableActions(actions);
2330
+ if (dropped.length === 0) return kept;
2331
+ blank();
2332
+ info("post-trace literal scrub (removing run-specific values)...");
2333
+ for (const d of dropped) warn(`dropped action #${d.index + 1} (${formatUnstableDrop(d)})`);
2334
+ meta("scrubbed", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
2335
+ return kept;
2336
+ }
2337
+ /**
2338
+ * Drop *immediate* duplicate AB_ACTION emissions inside the same step.
2339
+ * Claude occasionally records the same `find_click` (identical command,
2340
+ * locator, value, fields) twice in a row when retrying a selector after a
2341
+ * snapshot — only the last attempt is "the canonical one". Collapsing the
2342
+ * dupes keeps actions.json from accumulating ghost-retries the LLM never
2343
+ * meant to commit.
2344
+ *
2345
+ * The dedupe is intentionally conservative — adjacent + structurally
2346
+ * IDENTICAL only. We do NOT try to compress retries with different
2347
+ * selectors / locators (that would risk dropping a legitimate "click the
2348
+ * neighbouring button" sequence). The trace prompt now asks Claude not to
2349
+ * emit failed attempts in the first place, so this is the belt-and-braces
2350
+ * pass.
2351
+ */
2352
+ function dedupAndReport(actions) {
2353
+ if (actions.length === 0) return actions;
2354
+ const kept = [];
2355
+ let dropped = 0;
2356
+ for (const action of actions) {
2357
+ const prev = kept[kept.length - 1];
2358
+ if (prev && isAdjacentDuplicate(prev, action)) {
2359
+ dropped += 1;
2360
+ continue;
2361
+ }
2362
+ kept.push(action);
2363
+ }
2364
+ if (dropped === 0) return kept;
2365
+ meta("deduped", `${kept.length}/${actions.length} kept (${dropped} adjacent duplicate(s) dropped)`);
2366
+ return kept;
2367
+ }
2368
+ /**
2369
+ * Two actions are an "adjacent duplicate" when they would generate the
2370
+ * exact same agent-browser invocation. We compare by command + every
2371
+ * field that drives codegen output, sharing the same stepId (so we don't
2372
+ * silently merge two distinct steps that happen to start identically).
2373
+ */
2374
+ function isAdjacentDuplicate(a, b) {
2375
+ if (a.command !== b.command) return false;
2376
+ if ((a.stepId ?? "") !== (b.stepId ?? "")) return false;
2377
+ return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.target ?? "") === (b.target ?? "") && (a.label ?? "") === (b.label ?? "") && (a.assertType ?? "") === (b.assertType ?? "") && (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "") && (a.findName ?? "") === (b.findName ?? "") && (a.findIndex ?? -1) === (b.findIndex ?? -1) && (a.findExact ?? false) === (b.findExact ?? false);
2378
+ }
2379
+ /**
1561
2380
  * Run the post-trace replay validation and emit user-visible drop reports.
1562
2381
  * Splitting this out keeps `runTrace` readable; the function is pure aside
1563
2382
  * from `log.*` and the agent-browser invocations inside `validateActions`.
2383
+ *
2384
+ * In lenient mode (the default) failing actions are NOT removed — they're
2385
+ * tagged with `replayUnstable: true` and merged back into the output stream
2386
+ * in their original order so codegen can still emit them (with a `// [warn]`
2387
+ * comment) and let the auto-fix loop decide what to do.
1564
2388
  */
1565
- function validateAndReport(actions) {
2389
+ function validateAndReport(actions, mode) {
1566
2390
  if (actions.length === 0) return actions;
1567
2391
  const sessionName = `${generateSessionName()}-validate`;
1568
2392
  blank();
1569
- info("post-trace validation (replaying recorded actions)...");
1570
- const { kept, dropped } = validateActions(actions, { sessionName });
2393
+ info(`post-trace validation in ${mode} mode (replaying ${actions.length} recorded action(s))...`);
2394
+ const { kept, unstable, dropped, rescuedSteps = [] } = validateActions(actions, {
2395
+ sessionName,
2396
+ mode,
2397
+ onProgress: (i, total, action) => {
2398
+ progress(i, total, validationProgressLabel(action));
2399
+ }
2400
+ });
2401
+ progressEnd();
2402
+ if (rescuedSteps.length > 0) info(`rescued ${rescuedSteps.length} step(s) that had lost every action: ${rescuedSteps.join(", ")}`);
2403
+ if (mode === "lenient") {
2404
+ if (unstable.length === 0) meta("validated", `${kept.length}/${actions.length} kept`);
2405
+ else {
2406
+ for (const u of unstable) warn(`replay-unstable: ${`${u.command}${u.selector ? " " + u.selector : ""}${u.findValue ? " " + u.findValue : ""}`} — ${u.replayReason ?? "(no reason)"} (kept in actions.json with warning)`);
2407
+ meta("validated", `${kept.length}/${actions.length} kept, ${unstable.length} flagged replay-unstable (kept with warning)`);
2408
+ }
2409
+ const merged = mergeKeptAndUnstableInOriginalOrder(actions, kept, unstable);
2410
+ reportPerStepBreakdown(actions, merged);
2411
+ return merged;
2412
+ }
1571
2413
  if (dropped.length === 0) {
1572
2414
  meta("validated", `${kept.length}/${actions.length} kept`);
2415
+ reportPerStepBreakdown(actions, kept);
1573
2416
  return kept;
1574
2417
  }
1575
- for (const d of dropped) warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
2418
+ let cascadeStart = null;
2419
+ let cascadeCount = 0;
2420
+ let cascadeStepId;
2421
+ const flushCascade = () => {
2422
+ if (cascadeStart === null || cascadeCount === 0) return;
2423
+ const stepTag = cascadeStepId ? ` in ${cascadeStepId}` : "";
2424
+ warn(`cascade dropped ${cascadeCount} action(s)${stepTag} after action #${cascadeStart}`);
2425
+ cascadeStart = null;
2426
+ cascadeCount = 0;
2427
+ cascadeStepId = void 0;
2428
+ };
2429
+ for (const d of dropped) {
2430
+ const isCascade = d.reason.startsWith("skipped after");
2431
+ if (isCascade && cascadeStart !== null && cascadeStepId === d.action.stepId) {
2432
+ cascadeCount += 1;
2433
+ continue;
2434
+ }
2435
+ flushCascade();
2436
+ if (isCascade) {
2437
+ cascadeStart = d.index;
2438
+ cascadeCount = 1;
2439
+ cascadeStepId = d.action.stepId;
2440
+ continue;
2441
+ }
2442
+ warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
2443
+ }
2444
+ flushCascade();
1576
2445
  meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
2446
+ reportPerStepBreakdown(actions, kept);
1577
2447
  return kept;
1578
2448
  }
2449
+ /**
2450
+ * Lenient-mode helper: re-thread the `kept` and `unstable` lists back into
2451
+ * the original recording order. Object identity is fine because the
2452
+ * validator pushes original references — no shallow copies.
2453
+ */
2454
+ function mergeKeptAndUnstableInOriginalOrder(originalActions, kept, unstable) {
2455
+ const allowed = new Set([...kept, ...unstable]);
2456
+ const merged = [];
2457
+ for (const a of originalActions) if (allowed.has(a)) merged.push(a);
2458
+ return merged;
2459
+ }
2460
+ /**
2461
+ * Compact one-liner used as the progress label while validation replays
2462
+ * each action. Keep it under ~80 chars so it fits on a single terminal
2463
+ * row when paired with the `[info] N/M ` prefix.
2464
+ */
2465
+ function validationProgressLabel(action) {
2466
+ const step = action.stepId ? `${action.stepId} ` : "";
2467
+ const detail = action.findLocator ? `find ${action.findLocator} ${action.findValue ?? ""}`.trim() : action.selector ? `${action.command} ${action.selector}` : action.value ? `${action.command} ${action.value}` : action.command;
2468
+ return `${step}${detail.length > 80 ? detail.slice(0, 77) + "..." : detail}`;
2469
+ }
2470
+ /**
2471
+ * Print a per-step `kept/total` line so a step that lost ALL its actions
2472
+ * during validation surfaces clearly. Without this, a spec author can't
2473
+ * tell that "verify created content" or "delete the thing" silently fell
2474
+ * off the generated test — the trace appears to pass while half the spec
2475
+ * is missing. Lost steps are also surfaced as a dedicated warning line so
2476
+ * they don't blend into the per-step breakdown noise.
2477
+ */
2478
+ function reportPerStepBreakdown(beforeValidation, afterValidation) {
2479
+ const before = groupCountByStep(beforeValidation);
2480
+ const after = groupCountByStep(afterValidation);
2481
+ const ordered = [];
2482
+ const seen = /* @__PURE__ */ new Set();
2483
+ for (const a of beforeValidation) {
2484
+ const id = a.stepId ?? "<no step>";
2485
+ if (seen.has(id)) continue;
2486
+ seen.add(id);
2487
+ ordered.push(id);
2488
+ }
2489
+ const lostSteps = [];
2490
+ for (const id of ordered) {
2491
+ const total = before.get(id) ?? 0;
2492
+ const kept = after.get(id) ?? 0;
2493
+ const dropped = total - kept;
2494
+ const isLost = kept === 0 && total > 0 && id !== "<no step>";
2495
+ if (isLost) lostSteps.push(id);
2496
+ const tag = isLost ? " ⚠ entire step removed" : "";
2497
+ meta(` ${id}`, `${kept}/${total} kept${dropped > 0 ? `, ${dropped} dropped` : ""}${tag}`);
2498
+ }
2499
+ if (lostSteps.length > 0) warn(`${lostSteps.length} spec step(s) lost every recorded action: ${lostSteps.join(", ")} — the generated test will NOT exercise these steps.`);
2500
+ }
2501
+ function groupCountByStep(actions) {
2502
+ const counts = /* @__PURE__ */ new Map();
2503
+ for (const a of actions) {
2504
+ const id = a.stepId ?? "<no step>";
2505
+ counts.set(id, (counts.get(id) ?? 0) + 1);
2506
+ }
2507
+ return counts;
2508
+ }
1579
2509
  function parseStatusLine(text) {
1580
2510
  for (const line of text.split("\n")) {
1581
2511
  const match = line.match(/^(STEP_START|STEP_DONE|ASSERTION_FAILED|STEP_SKIPPED|RUN_COMPLETED)\|([^|]*)\|(.*)$/);
@@ -1666,13 +2596,50 @@ function parseAbAction(line) {
1666
2596
  target: parts[3],
1667
2597
  label: parts[4]
1668
2598
  };
2599
+ case "find_click":
2600
+ case "find_dblclick":
2601
+ case "find_hover":
2602
+ case "find_focus":
2603
+ case "find_check":
2604
+ case "find_uncheck": return parseFindAction(command, parts, false);
2605
+ case "find_fill":
2606
+ case "find_type": return parseFindAction(command, parts, true);
1669
2607
  default: return null;
1670
2608
  }
1671
2609
  }
2610
+ /**
2611
+ * Common parser for the `find_*` family. `<extra>` carries `--name` for
2612
+ * `role`, the integer index for `nth`, and is empty otherwise. We accept a
2613
+ * literally empty `<extra>` (the LLM emits a placeholder `|` so the
2614
+ * positional layout stays stable across locators).
2615
+ */
2616
+ function parseFindAction(command, parts, hasFillValue) {
2617
+ const locator = parts[2];
2618
+ const findValue = parts[3];
2619
+ const extra = parts[4] ?? "";
2620
+ const exactToken = parts[5] ?? "";
2621
+ if (!locator || !FIND_LOCATORS.includes(locator) || !findValue) return null;
2622
+ const findExact = exactToken === "exact" ? true : void 0;
2623
+ const findName = locator === "role" && extra ? extra : void 0;
2624
+ const findIndex = locator === "nth" && extra ? Number.parseInt(extra, 10) : void 0;
2625
+ if (locator === "nth" && (findIndex === void 0 || Number.isNaN(findIndex))) return null;
2626
+ return {
2627
+ command,
2628
+ findLocator: locator,
2629
+ findValue,
2630
+ ...findExact !== void 0 && { findExact },
2631
+ ...findName !== void 0 && { findName },
2632
+ ...findIndex !== void 0 && { findIndex },
2633
+ ...hasFillValue ? {
2634
+ value: parts[6],
2635
+ label: parts[7]
2636
+ } : { label: parts[6] }
2637
+ };
2638
+ }
1672
2639
  //#endregion
1673
2640
  //#region src/codegen/actions-to-script.ts
1674
2641
  function actionsToScript(input) {
1675
- const { actions, testName, stepMarkers = [] } = input;
2642
+ const { actions, testName, stepMarkers = [], emptySteps = [] } = input;
1676
2643
  const parts = [...[
1677
2644
  `import { test } from "vitest";`,
1678
2645
  `import { spawnSync } from "node:child_process";`,
@@ -1695,7 +2662,7 @@ function actionsToScript(input) {
1695
2662
  `process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
1696
2663
  ""
1697
2664
  ]];
1698
- const body = actionsToLines(actions, stepMarkers).map((l) => ` ${l}`).join("\n");
2665
+ const body = actionsToLines(actions, stepMarkers, emptySteps).map((l) => ` ${l}`).join("\n");
1699
2666
  parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
1700
2667
  return parts.join("\n");
1701
2668
  }
@@ -1709,13 +2676,31 @@ const ELEMENT_COMMANDS = new Set([
1709
2676
  "uncheck",
1710
2677
  "select",
1711
2678
  "hover",
1712
- "drag"
2679
+ "drag",
2680
+ "find_click",
2681
+ "find_dblclick",
2682
+ "find_fill",
2683
+ "find_type",
2684
+ "find_hover",
2685
+ "find_focus",
2686
+ "find_check",
2687
+ "find_uncheck"
1713
2688
  ]);
1714
- function actionsToLines(actions, stepMarkers) {
2689
+ function actionsToLines(actions, stepMarkers, emptySteps) {
1715
2690
  const lines = [];
1716
2691
  let prevLine = null;
1717
- let prevCommand = null;
2692
+ let pendingOpenSettle = false;
1718
2693
  const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
2694
+ const emptyByInsertAfter = /* @__PURE__ */ new Map();
2695
+ for (const e of emptySteps) {
2696
+ const list = emptyByInsertAfter.get(e.insertAfterIndex) ?? [];
2697
+ list.push(e);
2698
+ emptyByInsertAfter.set(e.insertAfterIndex, list);
2699
+ }
2700
+ const leadingNotices = emptyByInsertAfter.get(-1) ?? [];
2701
+ for (const n of leadingNotices) appendEmptyStepNotice(lines, n);
2702
+ let currentStepId;
2703
+ let filledValuesThisStep = /* @__PURE__ */ new Set();
1719
2704
  for (let i = 0; i < actions.length; i++) {
1720
2705
  const marker = markerByIndex.get(i);
1721
2706
  if (marker) {
@@ -1723,22 +2708,86 @@ function actionsToLines(actions, stepMarkers) {
1723
2708
  lines.push(`// step: ${marker.stepId} [${marker.source}]`);
1724
2709
  }
1725
2710
  const action = actions[i];
2711
+ if (action.stepId !== currentStepId) {
2712
+ currentStepId = action.stepId;
2713
+ filledValuesThisStep = /* @__PURE__ */ new Set();
2714
+ }
2715
+ const filled = fillValueOf(action);
2716
+ if (filled) filledValuesThisStep.add(filled);
2717
+ if (action.command === "assert" && action.assertType === "text_visible" && typeof action.value === "string" && filledValuesThisStep.has(action.value)) {
2718
+ lines.push(`// [warn] replay-unstable: dropped input-value assert (text_visible ${action.value}) — typed values aren't visible text nodes`);
2719
+ continue;
2720
+ }
1726
2721
  const line = actionToLine(action);
1727
2722
  if (line === null) continue;
1728
2723
  if (line === prevLine) continue;
1729
- if (prevCommand === "open" && ELEMENT_COMMANDS.has(action.command)) lines.push(`spawnSync("sleep", ["3"], { stdio: "inherit" });`);
2724
+ if (action.command === "open") pendingOpenSettle = true;
2725
+ if (pendingOpenSettle && ELEMENT_COMMANDS.has(action.command)) {
2726
+ lines.push(`spawnSync("sleep", ["3"], { stdio: "inherit" });`);
2727
+ pendingOpenSettle = false;
2728
+ }
2729
+ if (action.replayUnstable) lines.push(`// [warn] replay-unstable: ${action.replayReason ?? "(no reason recorded)"}`);
1730
2730
  lines.push(line);
1731
2731
  prevLine = line;
1732
- prevCommand = action.command;
2732
+ const followups = emptyByInsertAfter.get(i);
2733
+ if (followups) for (const n of followups) appendEmptyStepNotice(lines, n);
1733
2734
  }
1734
2735
  return lines;
1735
2736
  }
1736
- /** Returns true if a selector is a session-specific @ref that cannot be replayed. */
2737
+ /**
2738
+ * The text value a fill-type action types into a field, or null for
2739
+ * non-fill actions. Both the plain `fill`/`type` (value in `value`) and the
2740
+ * `find_fill`/`find_type` (also `value`) shapes carry it in `action.value`.
2741
+ */
2742
+ function fillValueOf(action) {
2743
+ return (action.command === "fill" || action.command === "type" || action.command === "find_fill" || action.command === "find_type") && typeof action.value === "string" && action.value.length > 0 ? action.value : null;
2744
+ }
2745
+ function appendEmptyStepNotice(lines, notice) {
2746
+ if (lines.length > 0) lines.push("");
2747
+ lines.push(`// step: ${notice.stepId} [${notice.source}]`);
2748
+ lines.push(`// [warn] all actions for this step were dropped during post-trace validation.`);
2749
+ lines.push(`// [warn] the generated test does NOT exercise step ${notice.stepId}. Re-run`);
2750
+ lines.push(`// [warn] \`ccqa trace\` or add manual assertions if this step is load-bearing.`);
2751
+ }
2752
+ /**
2753
+ * Returns true if a selector is a session-specific agent-browser ref that
2754
+ * cannot be replayed. Two forms occur:
2755
+ * - `@e14` — the snapshot ref syntax (interactions)
2756
+ * - `button[ref='e4']` / `[ref=e4]` — the ref attribute leaking into a CSS
2757
+ * selector (most often via an assert the agent built from a snapshot row)
2758
+ * Refs are re-numbered on every snapshot, so neither survives a fresh run.
2759
+ */
1737
2760
  function isRefSelector(selector) {
1738
- return typeof selector === "string" && /^@/.test(selector.trim());
2761
+ if (typeof selector !== "string") return false;
2762
+ const s = selector.trim();
2763
+ return /^@/.test(s) || /\[ref\s*=\s*['"]?e\d+['"]?\]/.test(s);
2764
+ }
2765
+ /**
2766
+ * Returns true if a selector picks elements *by the very state being asserted*,
2767
+ * which makes an `element_disabled` / `element_enabled` check a tautology.
2768
+ *
2769
+ * `abAssertDisabled("button[disabled]")` resolves to `is enabled
2770
+ * "button[disabled]"`: it first selects an already-disabled element, then
2771
+ * confirms it is disabled — always true, and true even when the *target* the
2772
+ * spec cares about (e.g. the "コンテンツの追加" button) is missing or enabled.
2773
+ * The agent emits these when it reaches for "the disabled button" instead of
2774
+ * naming the element by a state-independent selector. The assertion verifies
2775
+ * nothing, so codegen drops it (breadcrumb only) rather than baking a green
2776
+ * check that can never fail.
2777
+ *
2778
+ * Matches the `:disabled` / `:enabled` pseudo-classes and the `[disabled]` /
2779
+ * `[aria-disabled=…]` attribute selectors anywhere in the selector.
2780
+ */
2781
+ function isStateSelector(selector) {
2782
+ if (typeof selector !== "string") return false;
2783
+ return /:disabled\b|:enabled\b|\[\s*disabled[\s\]=]|\[\s*aria-disabled[\s\]=]/i.test(selector);
1739
2784
  }
1740
2785
  function actionToLine(action) {
1741
2786
  if ("selector" in action && isRefSelector(action.selector)) return null;
2787
+ if (action.command === "assert" && action.replayUnstable && typeof action.replayReason === "string" && action.replayReason.includes("selector not present")) {
2788
+ const sel = action.selector ?? action.observation ?? "(unknown)";
2789
+ return `// [warn] replay-unstable: dropped over-assertion (${action.assertType ?? "assert"} ${sel}) — selector not present on replay`;
2790
+ }
1742
2791
  switch (action.command) {
1743
2792
  case "cookies_clear": return `ab("cookies", "clear");`;
1744
2793
  case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
@@ -1757,7 +2806,22 @@ function actionToLine(action) {
1757
2806
  case "wait": {
1758
2807
  const sel = action.selector;
1759
2808
  if (/^\d+$/.test(sel)) return `spawnSync("sleep", [${j(sel)}], { stdio: "inherit" });`;
1760
- return `abWait(${j(sel)});`;
2809
+ if (sel.startsWith("--")) return null;
2810
+ return `abWait(${jExpr(sel)});`;
2811
+ }
2812
+ case "find_click":
2813
+ case "find_dblclick":
2814
+ case "find_hover":
2815
+ case "find_focus":
2816
+ case "find_check":
2817
+ case "find_uncheck": {
2818
+ const args = buildFindArgs(action, void 0);
2819
+ return args === null ? droppedFindMarker(action) : `ab(${args.join(", ")});`;
2820
+ }
2821
+ case "find_fill":
2822
+ case "find_type": {
2823
+ const args = buildFindArgs(action, action.value ?? "");
2824
+ return args === null ? droppedFindMarker(action) : `ab(${args.join(", ")});`;
1761
2825
  }
1762
2826
  case "assert": {
1763
2827
  const val = action.value ?? action.observation;
@@ -1781,9 +2845,11 @@ function actionToLine(action) {
1781
2845
  if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
1782
2846
  break;
1783
2847
  case "element_enabled":
2848
+ if (isStateSelector(sel)) return tautologicalStateAssertMarker(action, sel);
1784
2849
  if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
1785
2850
  break;
1786
2851
  case "element_disabled":
2852
+ if (isStateSelector(sel)) return tautologicalStateAssertMarker(action, sel);
1787
2853
  if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertDisabled(${j(sel)});`;
1788
2854
  break;
1789
2855
  case "element_checked":
@@ -1799,6 +2865,51 @@ function actionToLine(action) {
1799
2865
  default: return null;
1800
2866
  }
1801
2867
  }
2868
+ /**
2869
+ * Build the argument list for `ab("find", ...)` codegen. Layout matches the
2870
+ * `agent-browser find <locator> <value> [--name <n>] [--exact] <action>
2871
+ * [fillValue]` invocation shape. `findValue` and `findName` go through
2872
+ * `jExpr` so `${ENV}` references survive into the generated test; the
2873
+ * positional CSS selector inside `first/last/nth` stays as a plain string
2874
+ * literal.
2875
+ */
2876
+ function buildFindArgs(action, fillValue) {
2877
+ const { findLocator, findValue } = action;
2878
+ if (!findLocator || !findValue) return null;
2879
+ const innerAction = action.command.slice(5).replace("type", "fill");
2880
+ const args = [JSON.stringify("find"), JSON.stringify(findLocator)];
2881
+ if (findLocator === "nth") {
2882
+ args.push(JSON.stringify(String(action.findIndex ?? 0)));
2883
+ args.push(j(findValue));
2884
+ } else if (findLocator === "first" || findLocator === "last") args.push(j(findValue));
2885
+ else args.push(jExpr(findValue));
2886
+ args.push(JSON.stringify(innerAction));
2887
+ if (fillValue !== void 0) args.push(jExpr(fillValue));
2888
+ if (findLocator === "role" && action.findName) args.push(JSON.stringify("--name"), jExpr(action.findName));
2889
+ if (action.findExact) args.push(JSON.stringify("--exact"));
2890
+ return args;
2891
+ }
2892
+ /**
2893
+ * Emit a visible breadcrumb when a `find_*` action lacks the locator/value
2894
+ * fields that codegen needs. We can't generate a runnable `ab(...)` line, but
2895
+ * a silent skip would make the test pass while quietly dropping a step the
2896
+ * spec author cared about. The marker is a TS comment so the file still
2897
+ * parses, but `grep -n "find_\\* dropped"` surfaces the issue in CI logs.
2898
+ */
2899
+ function droppedFindMarker(action) {
2900
+ const ctx = action.stepId ? ` (stepId=${action.stepId})` : "";
2901
+ return `// [warn] find_* dropped: ${action.command}${ctx} — actions.json is missing findLocator/findValue. Re-run \`ccqa trace\` to regenerate.`;
2902
+ }
2903
+ /**
2904
+ * Breadcrumb for an `element_enabled` / `element_disabled` assert whose selector
2905
+ * picks the element by the asserted state (a tautology — see `isStateSelector`).
2906
+ * Dropped from the runnable script; surfaces in the test so a reviewer sees the
2907
+ * intended check was discarded and can re-assert against a state-independent
2908
+ * selector if the state really matters.
2909
+ */
2910
+ function tautologicalStateAssertMarker(action, sel) {
2911
+ return `// [warn] dropped tautological assert (${action.assertType ?? "assert"} ${sel ?? "(unknown)"}) — selector matches by the asserted state; target the element by a state-independent selector instead`;
2912
+ }
1802
2913
  /** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
1803
2914
  const j = (s) => JSON.stringify(s);
1804
2915
  /**
@@ -1818,13 +2929,19 @@ The trace contains noise: failed attempts, redundant retries, and duplicate oper
1818
2929
  Your task: return a **cleaned-up JSON array** of TraceAction objects that represents the minimal, correct sequence of actions needed to reproduce the test.
1819
2930
 
1820
2931
  Each TraceAction object has the following shape (use EXACTLY these field names):
1821
- { "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "..." }
2932
+ { "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "...",
2933
+ "findLocator": "...", "findValue": "...", "findName": "...", "findIndex": 0, "findExact": true }
2934
+
1822
2935
  Only include fields that are present in the original action. The "command" field is required. For assert actions, "assertType" is also required.
1823
2936
 
2937
+ **\`find_*\` actions (find_click / find_dblclick / find_fill / find_type / find_hover / find_focus / find_check / find_uncheck) are special:**
2938
+ They do NOT use \`selector\`. They use \`findLocator\` + \`findValue\` (and optionally \`findName\` / \`findIndex\` / \`findExact\`). When you keep a \`find_*\` action, you MUST copy **every** \`find*\` field from the original verbatim — dropping any of them silently corrupts the recorded selector and the generated test will be broken. Treat the \`find*\` cluster as one atomic unit: keep all or drop all.
2939
+
1824
2940
  Rules:
1825
2941
  - Remove actions that were failed attempts superseded by a later successful action (e.g., if \`fill selector="text=Foo"\` was followed by \`fill selector="[placeholder='Foo']"\`, keep only the latter)
1826
2942
  - Remove duplicate fill operations on the same field (keep only the last successful fill for each field)
1827
2943
  - For \`click\` and \`fill\` actions: if the selector starts with \`text=\`, it is a failed attempt — remove it (text= selectors only work with the wait command, not click/fill)
2944
+ - For \`find_*\` actions: if multiple \`find_*\` of the same command were emitted within the same logical step (Claude tried several locators), keep ONLY the last one — that is the one that finally succeeded
1828
2945
  - Keep all snapshot actions — they serve as comments/observations in the generated test
1829
2946
  - Keep all assert actions — they are the test's verification points and must not be removed
1830
2947
  - Do NOT invent new actions or change values
@@ -1836,6 +2953,11 @@ ${actions.map((a, i) => {
1836
2953
  if (a.assertType) parts.push(`assertType="${a.assertType}"`);
1837
2954
  if (a.selector) parts.push(`selector="${a.selector}"`);
1838
2955
  if (a.value) parts.push(`value="${a.value}"`);
2956
+ if (a.findLocator) parts.push(`findLocator="${a.findLocator}"`);
2957
+ if (a.findValue) parts.push(`findValue="${a.findValue}"`);
2958
+ if (a.findName) parts.push(`findName="${a.findName}"`);
2959
+ if (a.findIndex !== void 0) parts.push(`findIndex=${a.findIndex}`);
2960
+ if (a.findExact) parts.push(`findExact=true`);
1839
2961
  if (a.observation) parts.push(`→ ${a.observation}`);
1840
2962
  return parts.join(" ");
1841
2963
  }).join("\n")}`;
@@ -2033,7 +3155,7 @@ function applySelectorDrift(script, line, oldSelector, newSelector) {
2033
3155
  applied: false,
2034
3156
  reason: `oldSelector not found on line ${line}`
2035
3157
  };
2036
- lines[idx] = content.replaceAll(oldSelector, newSelector);
3158
+ lines[idx] = replaceSelectorLiteral(content, oldSelector, newSelector);
2037
3159
  return {
2038
3160
  applied: true,
2039
3161
  script: lines.join("\n"),
@@ -2041,6 +3163,44 @@ function applySelectorDrift(script, line, oldSelector, newSelector) {
2041
3163
  };
2042
3164
  }
2043
3165
  /**
3166
+ * Rewrite a selector inside whatever string literal encloses it on the line.
3167
+ * The tricky case is when `newSelector` contains a `${...}` env reference
3168
+ * and the host literal is a plain `"..."` / `'...'` — a naive `replaceAll`
3169
+ * leaves the unescaped `${...}` inside the double-quoted literal and produces
3170
+ * invalid TS (the auto-fix loop used to ship this and blow up esbuild). When
3171
+ * a template-literal substitution is needed, promote the enclosing literal
3172
+ * from "..."/'...' to `...` in one step.
3173
+ */
3174
+ function replaceSelectorLiteral(content, oldSelector, newSelector) {
3175
+ if (!/\$\{[A-Za-z_]/.test(newSelector)) return content.replaceAll(oldSelector, newSelector);
3176
+ const tplRe = new RegExp("`([^`]*)" + escapeForRegex(oldSelector) + "([^`]*)`", "g");
3177
+ if (tplRe.test(content)) return content.replace(tplRe, (_m, before, after) => `\`${before}${newSelector}${after}\``);
3178
+ for (const quote of ["\"", "'"]) {
3179
+ const re = new RegExp(`${quote}([^${quote}\\\\]*(?:\\\\.[^${quote}\\\\]*)*)${quote}`, "g");
3180
+ let match;
3181
+ const replacements = [];
3182
+ while ((match = re.exec(content)) !== null) {
3183
+ const inner = match[1] ?? "";
3184
+ if (!inner.includes(oldSelector)) continue;
3185
+ const backtickSafe = inner.replaceAll(oldSelector, newSelector).replace(/`/g, "\\`");
3186
+ replacements.push({
3187
+ start: match.index,
3188
+ end: match.index + match[0].length,
3189
+ rewritten: `\`${backtickSafe}\``
3190
+ });
3191
+ }
3192
+ if (replacements.length > 0) {
3193
+ let out = content;
3194
+ for (const r of replacements.reverse()) out = out.slice(0, r.start) + r.rewritten + out.slice(r.end);
3195
+ return out;
3196
+ }
3197
+ }
3198
+ return content.replaceAll(oldSelector, newSelector);
3199
+ }
3200
+ function escapeForRegex(s) {
3201
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3202
+ }
3203
+ /**
2044
3204
  * Build a unified-style diff snippet for showing the user what would change.
2045
3205
  * Just the changed lines with -/+ prefixes; not a real patch.
2046
3206
  */
@@ -2771,10 +3931,13 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
2771
3931
  const cleanedActions = await cleanupActions(actions, model);
2772
3932
  if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
2773
3933
  const markers = buildStepMarkers(expanded, cleanedActions);
3934
+ const emptySteps = findEmptySteps(expanded, cleanedActions);
3935
+ if (emptySteps.length > 0) for (const e of emptySteps) warn(`step ${e.stepId} has no kept actions — generated test will skip it (notice comment inserted).`);
2774
3936
  const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
2775
3937
  actions: cleanedActions,
2776
3938
  testName: spec.title,
2777
- stepMarkers: markers
3939
+ stepMarkers: markers,
3940
+ emptySteps
2778
3941
  }));
2779
3942
  meta("saved", scriptPath);
2780
3943
  blank();
@@ -2844,6 +4007,42 @@ function buildStepMarkers(steps, actions) {
2844
4007
  }
2845
4008
  return markers;
2846
4009
  }
4010
+ /**
4011
+ * Spec steps that lost every action by the time the trace finished its
4012
+ * cleanup + validation passes. `actionsToScript` uses these to splice a
4013
+ * visible `// [warn] step N was dropped` block into the generated script,
4014
+ * so the spec author can see at a glance that the recorded test stopped
4015
+ * exercising part of the spec.
4016
+ *
4017
+ * `insertAfterIndex = -1` means the lost step came before any kept
4018
+ * action; otherwise it's the cleanedActions index whose action precedes
4019
+ * the lost step in spec order. Spec order is canonical for the comment
4020
+ * placement so the warning lands near the steps that DID survive.
4021
+ */
4022
+ function findEmptySteps(steps, cleanedActions) {
4023
+ const presentStepIds = /* @__PURE__ */ new Set();
4024
+ for (const a of cleanedActions) if (a.stepId) presentStepIds.add(a.stepId);
4025
+ const lastActionIndexByStep = /* @__PURE__ */ new Map();
4026
+ for (let i = 0; i < cleanedActions.length; i++) {
4027
+ const id = cleanedActions[i].stepId;
4028
+ if (id) lastActionIndexByStep.set(id, i);
4029
+ }
4030
+ const notices = [];
4031
+ let lastSeenSurvivorIndex = -1;
4032
+ for (const step of steps) {
4033
+ if (presentStepIds.has(step.id)) {
4034
+ const idx = lastActionIndexByStep.get(step.id);
4035
+ if (idx !== void 0) lastSeenSurvivorIndex = idx;
4036
+ continue;
4037
+ }
4038
+ notices.push({
4039
+ stepId: step.id,
4040
+ source: step.source,
4041
+ insertAfterIndex: lastSeenSurvivorIndex
4042
+ });
4043
+ }
4044
+ return notices;
4045
+ }
2847
4046
  async function confirmOverwrite(path) {
2848
4047
  if (!process.stdin.isTTY) {
2849
4048
  warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
@@ -2912,16 +4111,39 @@ function reattachStepIds(cleaned, original) {
2912
4111
  cursor = i + 1;
2913
4112
  break;
2914
4113
  }
2915
- if (matched?.stepId) out.push({
2916
- ...c,
2917
- stepId: matched.stepId
2918
- });
2919
- else out.push(c);
4114
+ out.push(matched ? mergeFromOriginal(c, matched) : c);
2920
4115
  }
2921
4116
  return out;
2922
4117
  }
4118
+ /**
4119
+ * Merge a cleaned action back with its original counterpart. Always borrows
4120
+ * `stepId` (the cleanup prompt deliberately doesn't surface it). For `find_*`
4121
+ * actions, *also* re-attach the find-locator cluster if the cleaned copy
4122
+ * dropped any of them — Claude occasionally omits these fields under the
4123
+ * cleanup prompt and we'd otherwise emit a structurally broken action that
4124
+ * codegen has to silently skip.
4125
+ */
4126
+ function mergeFromOriginal(cleaned, original) {
4127
+ const merged = { ...cleaned };
4128
+ if (original.stepId && !merged.stepId) merged.stepId = original.stepId;
4129
+ if (cleaned.command.startsWith("find_")) {
4130
+ if (!merged.findLocator && original.findLocator) merged.findLocator = original.findLocator;
4131
+ if (!merged.findValue && original.findValue) merged.findValue = original.findValue;
4132
+ if (!merged.findName && original.findName) merged.findName = original.findName;
4133
+ if (merged.findIndex === void 0 && original.findIndex !== void 0) merged.findIndex = original.findIndex;
4134
+ if (!merged.findExact && original.findExact) merged.findExact = original.findExact;
4135
+ }
4136
+ if (original.replayUnstable && !merged.replayUnstable) {
4137
+ merged.replayUnstable = original.replayUnstable;
4138
+ if (original.replayReason) merged.replayReason = original.replayReason;
4139
+ }
4140
+ return merged;
4141
+ }
2923
4142
  function sameShape(a, b) {
2924
- return a.command === b.command && (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
4143
+ if (a.command !== b.command) return false;
4144
+ if (a.command.startsWith("find_") && a.findLocator && b.findLocator) return (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "");
4145
+ if (a.command.startsWith("find_")) return true;
4146
+ return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
2925
4147
  }
2926
4148
  //#endregion
2927
4149
  //#region src/claude/extract-json.ts
@@ -3164,56 +4386,6 @@ function buildDriftUserPrompt(existing) {
3164
4386
  });
3165
4387
  }
3166
4388
  //#endregion
3167
- //#region src/types.ts
3168
- const RouteStepSchema = z.object({
3169
- title: z.string(),
3170
- action: z.string(),
3171
- observation: z.string(),
3172
- status: z.enum([
3173
- "PASSED",
3174
- "FAILED",
3175
- "SKIPPED"
3176
- ]),
3177
- reason: z.string().optional()
3178
- });
3179
- z.object({
3180
- specName: z.string(),
3181
- timestamp: z.string(),
3182
- status: z.enum(["passed", "failed"]),
3183
- steps: z.array(RouteStepSchema)
3184
- });
3185
- const DraftIssueSchema = z.object({
3186
- severity: z.enum([
3187
- "OK",
3188
- "WARN",
3189
- "ERROR"
3190
- ]),
3191
- category: z.enum([
3192
- "assertable",
3193
- "blocks",
3194
- "granularity",
3195
- "unimplemented"
3196
- ]),
3197
- stepId: z.string().nullable(),
3198
- message: z.string(),
3199
- detail: z.string().optional()
3200
- });
3201
- const DraftReportSchema = z.object({
3202
- issues: z.array(DraftIssueSchema),
3203
- patch: z.string()
3204
- });
3205
- const DRAFT_CATEGORY_LABEL = {
3206
- assertable: "Assertability",
3207
- blocks: "Block references",
3208
- granularity: "Step granularity",
3209
- unimplemented: "Unimplemented checks"
3210
- };
3211
- const DraftNamingSchema = z.object({
3212
- featureName: z.string().min(1),
3213
- specName: z.string().min(1),
3214
- reason: z.string().optional()
3215
- });
3216
- //#endregion
3217
4389
  //#region src/drift/analyze.ts
3218
4390
  const DEFAULT_CONCURRENCY$1 = 3;
3219
4391
  /**