npm - ccqa - Versions diffs - 0.4.0 → 0.5.1 - Mend

ccqa 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/bin/ccqa.mjs +1447 -275
package/dist/package.json +1 -1
package/dist/runtime/test-helpers.d.mts +2 -2
package/dist/runtime/test-helpers.mjs +77 -39
package/dist/{spawn-ab-BxjEhA5e.mjs → spawn-ab-DjRh1-4T.mjs} +1 -1
package/package.json +1 -1

package/dist/bin/ccqa.mjs CHANGED Viewed

@@ -1,14 +1,14 @@
 #!/usr/bin/env node
-import { n as spawnAB } from "../spawn-ab-BxjEhA5e.mjs";
+import { n as spawnAB, t as sleepSync } from "../spawn-ab-DjRh1-4T.mjs";
 import { createRequire } from "node:module";
 import { Command } from "commander";
 import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
 import { fileURLToPath } from "node:url";
 import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
 import { query } from "@anthropic-ai/claude-agent-sdk";
+import { ZodError, z } from "zod";
 import { delimiter, dirname, join, relative, resolve } from "node:path";
 import { parse, stringify } from "yaml";
-import { ZodError, z } from "zod";
 import { execFile, spawn } from "node:child_process";
 import { createInterface } from "node:readline";
 import { homedir, tmpdir } from "node:os";
@@ -57,13 +57,20 @@ agent-browser --session SESSION press <Key>
 agent-browser --session SESSION select "<selector>" "<value>"
 agent-browser --session SESSION hover "<selector>"
 agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
-agent-browser --session SESSION wait "<selector>" [--timeout <ms>] [--state visible|hidden]
+agent-browser --session SESSION wait --load networkidle
+agent-browser --session SESSION get count "<selector>"   # element-existence check (returns a number, fast)
 agent-browser --session SESSION cookies clear
+agent-browser --session SESSION find <locator> <value> <action> [<input>] [--name "<n>"] [--exact]
+# See "Selector Rules" for the full \`find\` subset.
+# IMPORTANT: do NOT use \`wait "<css-selector>"\`. agent-browser ignores --timeout on a
+# CSS-selector wait and blocks for ~150s when the selector never matches, killing the run.
+# Wait for readiness with \`wait --text\`, \`wait --load networkidle\`, or just use \`find\`
+# (which waits internally). To check an element exists, use \`get count "<selector>"\`.
 \`\`\`
 ## Selector Rules
-**ALLOWED — these formats only:**
+**ALLOWED selector formats — use ONE of these everywhere a selector appears (click, fill, wait, assert, ...):**
 | Format | Use when |
 |--------|----------|
@@ -71,24 +78,63 @@ agent-browser --session SESSION cookies clear
 | \`text=visible text\` | Unique visible text, no aria-label |
 | \`[placeholder='text']\` | Input identified by placeholder |
 | \`[type='password']\` | Password inputs only |
-| \`a[href*='pattern']\` | Links where \`text=\` fails — use the URL pattern from the ARIA snapshot (e.g. \`a[href*='/settings']\`) |
+| \`a[href*='pattern']\` | Links where \`text=\` fails — use the URL pattern from the ARIA snapshot |
+| \`[data-testid='...']\`, \`[data-qa='...']\` | Specific attribute selectors when an aria-label is absent |
+**FORBIDDEN — these break recorded tests and are rejected by the hook layer:**
+- \`@ref\` / \`@e1\` / \`e14\` — reference IDs are session-specific and change every run.
+- **Bare tag selectors**: \`button\`, \`a\`, \`div\`, \`td\`, \`tr\`, \`main a\`, \`table tbody tr:nth-child(N)\`. These match every element of that tag and are non-deterministic on replay. **This includes the inner selector inside \`find first/last/nth\`** — see the \`find\` rules below.
+- \`[role='button']\` or \`[type='checkbox']\` alone — matches too many elements.
+- JavaScript execution (\`eval\`, \`js\`) — blocked by the hook layer.
+### \`find\` subset (fallback when no ALLOWED CSS uniquely targets the element)
+When repeated aria-labels / visible text make ALLOWED selectors ambiguous (e.g. a chat client where every message row has the same "1 reply" button), use one of these — they record as structured actions and replay deterministically:
+\`\`\`
+find role <role> <action> [--name "<n>"] [--exact]
+find text|label|placeholder|alt|title "<text>" <action> [--exact]
+find testid "<id>" <action>
+find first|last "<ALLOWED-css>" <action>
+find nth <index> "<ALLOWED-css>" <action>
+\`\`\`
+\`<action>\` is one of \`click | dblclick | fill | type | hover | focus | check | uncheck\`. For \`fill\`/\`type\`, the input value follows the action: \`find label "Email" fill "user@example.com"\`.
-**FORBIDDEN — these will break recorded tests or are not valid commands:**
+**Rules for \`find\`:**
-- \`@ref\` / \`@e1\` / \`e14\` — reference IDs are session-specific and change every run; never use them
-- \`[role='button']\` or \`[type='checkbox']\` alone — matches too many elements
-- Bare tag selectors: \`button\`, \`td\`, \`tr\`, \`main a\`, \`table tbody tr:nth-child(N)\` — these are positional/non-deterministic and will fail on replay
-- \`find ...\`, \`textbox ...\`, \`label ...\` — not valid agent-browser commands; these are **blocked** and will fail
-- JavaScript execution (\`eval\`, \`js\`) — **blocked** at the hook level; cannot bypass this restriction
+1. Try ALLOWED selectors first. Only reach for \`find\` when they demonstrably cannot uniquely target the element.
+2. **The inner selector for \`first/last/nth\` MUST be one of the ALLOWED formats above.** Never pass a bare tag — "the last button" is meaningless on replay.
+3. \`find last\` is reliable only when the layout guarantees "the target is the bottom-most match" (e.g. the most-recently-sent chat message). Be explicit in the AB_ACTION label.
+4. Argument order is \`<value> <action> [flags]\` — flags after the action. Putting \`--name\` / \`--exact\` before the action makes agent-browser fail with "Unknown subaction".
+5. \`--name "<n>"\` is **role-only**. Never pass it to \`find text\`, \`find label\`, etc.
+6. \`find\` includes its own wait; do not chain a \`wait\` before it.
-**Selector workflow:**
-1. Run \`snapshot\` — read the ARIA tree output carefully
-2. Find the element; note its exact \`aria-label\` value if present
-3. If aria-label present → use \`[aria-label='...']\`; otherwise → use \`text=...\`
-4. If \`text=...\` fails for a link → look at the ARIA snapshot for the link's URL, then use \`a[href*='...']\` with a distinctive URL substring (e.g. \`a[href*='/dashboard']\`, \`a[href*='filter=active']\`)
-5. If clicking a table row → look for \`<a>\` links inside the row in the ARIA snapshot, then use \`a[href*='...']\` targeting that link's URL pattern
-6. For checkboxes: try \`check "text=Label"\` or \`check "[aria-label='Label']"\`
-7. Never guess — if a selector fails once, take a fresh snapshot before retrying
+**Examples:**
+- ✓ \`find last "[data-testid='reply-link']" click\` — specific attribute + layout-guaranteed last match
+- ✓ \`find role button click --name "Submit"\` — role + accessible name (flags after action)
+- ✗ \`find role button --name "Submit" click\` — wrong order
+- ✗ \`find last "button" click\` — bare tag
+### Selector workflow
+1. Run \`snapshot\` and read the ARIA tree.
+2. Identify the element; note its exact \`aria-label\` if present.
+3. If aria-label present → use \`[aria-label='...']\`. Otherwise → use \`text=...\`.
+4. For links where \`text=\` fails, find the link's URL in the snapshot and use \`a[href*='...']\` with a distinctive substring.
+5. For checkboxes: try \`check "text=Label"\` or \`check "[aria-label='Label']"\`.
+6. If repeated labels make every ALLOWED selector ambiguous → use the \`find\` subset above.
+7. Never guess. If a selector fails, take a fresh snapshot before retrying.
+### Special input types
+**contenteditable / RichText editors**: \`fill "[contenteditable='true']" "<text>"\` works on contenteditable elements (chat composers, WYSIWYG bodies) — agent-browser sets the text directly. Use a single \`fill\`; do NOT just \`click\` the field and rely on \`keyboard inserttext\` (that keystroke command is not recorded as a structured action, so the text never makes it into the generated test and the field ends up empty on replay).
+**combobox / select with a required marker (\`*\`)**: required form fields often include the marker in their accessible name. If \`find role combobox click --name "<label>"\` misses, prefer \`find label "<label>" click\` or \`click "[aria-label='<label> *']"\`.
+**Verifying cleanup / deletion**: assert the *absence* of the deleted thing, not the surrounding listing screen's text. Use \`wait --fn "!document.body.innerText.includes('<unique-label>')"\` (text disappearance) — never \`wait "<css-selector>" --state hidden\` (blocks the daemon) and never \`wait --text "<navbar label>"\` (passes regardless of the deletion).
 ## Test Specification
@@ -103,52 +149,42 @@ ${stepsText}
 ## Execution Workflow
 For each step:
-1. Emit \`STEP_START|<step-id>|<short description of what this step does>\`
-2. Run \`snapshot\` and identify selectors from the ARIA tree
-3. Execute the action using an ALLOWED selector
-4. Emit \`AB_ACTION|...\` for every browser action (see below)
-5. Run \`snapshot\` again to verify the outcome
-6. Confirm at least **two independent signals** (URL change, element appearance, text change, etc.)
-7. For each verified signal, emit \`AB_ACTION|assert|...\` (see Assertion Protocol below)
-8. Emit \`ROUTE_STEP|...\`
-9. Emit \`STEP_DONE\`, \`ASSERTION_FAILED\`, or \`STEP_SKIPPED\`
-**After form submission or navigation:** take a snapshot before continuing. If an intermediate screen appears (e.g. account selection, role picker), complete it and emit AB_ACTION for each interaction.
+1. Emit \`STEP_START|<step-id>|<short description>\`.
+2. Run \`snapshot\` and identify selectors from the ARIA tree.
+3. Execute the action using an ALLOWED selector (see Selector Rules).
+4. Emit \`AB_ACTION|...\` for every browser action (see AB_ACTION Protocol).
+5. Run \`snapshot\` again to verify the outcome.
+6. Confirm at least **two independent signals** (URL change, element appearance, text change, ...).
+7. For each verified signal, emit \`AB_ACTION|assert|...\` (see Assertion Protocol).
+8. Emit \`ROUTE_STEP|...\`.
+9. Emit \`STEP_DONE\`, \`ASSERTION_FAILED\`, or \`STEP_SKIPPED\`.
+**After form submission or navigation:** take a fresh snapshot before continuing. If an intermediate screen appears (account selection, role picker, ...), complete it and emit AB_ACTION for each interaction.
 ## Guardrails
-- **Stop after 3 consecutive failures on the same step** — emit \`ASSERTION_FAILED\` and report the blocker. Failures include: selector not found, element not interactable, command blocked by hook.
-- **Do NOT use workarounds** — if all ALLOWED selectors fail, do NOT fall back to \`mouse move\`, coordinate-based clicks, \`Tab\`+\`Enter\` keyboard navigation, or any other indirect method. These cannot be recorded as reliable test actions. Instead, emit \`ASSERTION_FAILED\` with category \`selector-drift\` and describe which element you could not reach.
-- **Do NOT use bare tag selectors** — never use \`click "button"\`, \`click "td"\`, \`click "main a"\`, or \`click "a"\` alone. These match too many elements and are non-deterministic. Always use a specific ALLOWED selector format.
-- Do NOT retry a selector without taking a fresh snapshot first
-- Do NOT work around blockers (login walls, missing data, captchas) — stop and report
-- **Do NOT suppress errors** — never use \`2>/dev/null\`, \`|| true\`, \`; other-command\`, or any other technique that hides agent-browser failures. Each \`agent-browser\` command must run standalone so failures are properly detected and recorded.
-- **If \`agent-browser\` is not found, stop immediately.** Do not run \`which\`, \`find\`, \`npm ls\`, \`npm install\`, \`npx\`, \`brew\`, or any other discovery / installation command. Do not try alternate paths. The ccqa host already validates the binary before launching you, so if you see \`command not found\` it is a host-environment problem you cannot fix from inside the test run. Emit one line and terminate: \`ASSERTION_FAILED|step-XX|agent-browser binary not available in PATH\`.
+- **Stop after 3 consecutive failures on the same step** — emit \`ASSERTION_FAILED\` and report the blocker.
+- **No workarounds.** If all ALLOWED selectors fail, emit \`ASSERTION_FAILED|...|selector-drift: ...\`. Do NOT fall back to coordinate clicks, mouse moves, or \`Tab\`+\`Enter\` keyboard navigation — they cannot be recorded as reliable test actions.
+- Do NOT retry a selector without taking a fresh snapshot first.
+- Do NOT work around blockers (login walls, missing data, captchas) — stop and report.
+- **Do NOT suppress errors.** Never use \`2>/dev/null\`, \`|| true\`, \`; true\`, or any technique that hides agent-browser failures. Each \`agent-browser\` invocation must be its own standalone Bash call. Chaining multiple agent-browser commands with \`&&\` / \`;\` / \`|\` is rejected by the hook layer.
+- **If \`agent-browser\` is not found, stop immediately.** Do not run \`which\`, \`find\`, \`npm ls\`, \`npm install\`, \`npx\`, \`brew\`, or any other discovery / installation command. Emit one line and terminate: \`ASSERTION_FAILED|step-XX|agent-browser binary not available in PATH\`.
 ## Source Code Reference
-You have access to **Read**, **Grep**, and **Glob** tools to inspect the application source code. Use them proactively to find correct selectors — do NOT guess \`a[href*='...']\` patterns by trial and error.
+You have \`Read\`, \`Grep\`, and \`Glob\` to inspect the application source code. Use them proactively to find correct selectors — do NOT guess \`a[href*='...']\` patterns.
-**When to read source code:**
-- Before clicking a link: Grep for the link text or URL pattern in the codebase to find the exact \`href\` value
-- Before navigating to a new page: Glob for page/route files to understand the URL structure
-- When the ARIA snapshot shows an element but \`text=\` and \`[aria-label=]\` selectors fail: Read the component to find what HTML attributes the element has
+**When**: before clicking a link (find the \`href\`); before navigating to a new page (understand routing); when an ARIA element exists but no ALLOWED selector matches (find the actual HTML attributes).
-**How:**
-1. Use \`Grep\` to search for UI text, component names, or URL patterns
-2. Use \`Read\` to inspect the component's JSX/TSX and find \`href\`, \`aria-label\`, \`data-testid\`, or class names
-3. Build a precise ALLOWED selector from the discovered attributes
+**How**: \`Grep\` for UI text / component names / URL patterns → \`Read\` the JSX/TSX to find \`href\`, \`aria-label\`, \`data-testid\`, or class names → build a precise ALLOWED selector.
-**Rules:**
-- Only READ source files — never modify them
-- Keep source reading focused — search for specific strings, not entire directories
+**Rules**: only READ source files, never modify them. Keep searches focused.
 ## Waiting for Async Operations
-Prefer the \`wait\` command over polling:
+Prefer \`wait\` over polling:
 \`\`\`bash
-# Best: wait for expected text to appear
 agent-browser --session ${sessionName} wait --text "<completion text>"
 \`\`\`
@@ -158,7 +194,6 @@ If polling is required (e.g. waiting for a spinner to disappear):
 for i in $(seq 1 18); do
   sleep 10
   result=$(agent-browser --session ${sessionName} snapshot 2>&1)
-  # Check result for the expected change and break when found
   echo "$result" | grep -q "<done indicator>" && break
 done
 agent-browser --session ${sessionName} snapshot
@@ -186,18 +221,28 @@ AB_ACTION|drag|<source selector>|<target selector>|<source label>
 AB_ACTION|wait|<selector or text>|<label>
 AB_ACTION|snapshot|<key observation, max 100 chars>
 AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
+# find_* (semantic locator fallback). <extra> = role's --name OR nth's index OR "".
+# <exact> = literal "exact" if --exact was passed, "" otherwise. Keep empty pipe slots.
+AB_ACTION|find_click|<locator>|<value>|<extra>|<exact>|<label>
+AB_ACTION|find_dblclick|<locator>|<value>|<extra>|<exact>|<label>
+AB_ACTION|find_hover|<locator>|<value>|<extra>|<exact>|<label>
+AB_ACTION|find_focus|<locator>|<value>|<extra>|<exact>|<label>
+AB_ACTION|find_check|<locator>|<value>|<extra>|<exact>|<label>
+AB_ACTION|find_uncheck|<locator>|<value>|<extra>|<exact>|<label>
+AB_ACTION|find_fill|<locator>|<value>|<extra>|<exact>|<input>|<label>
+AB_ACTION|find_type|<locator>|<value>|<extra>|<exact>|<input>|<label>
 \`\`\`
-The selector in AB_ACTION must be one of the ALLOWED formats above.
+Selectors in AB_ACTION must follow Selector Rules. \`find_*\` lines use the locator + value pair instead of a separate selector. Do NOT include literal \`|\` inside any field — replace with a space if necessary.
-**CRITICAL — record only successful actions.** The AB_ACTION stream is the
-canonical replay sequence: every line in it must be reproducible on a fresh
-browser session. Therefore:
+**CRITICAL — record only successful actions.** The AB_ACTION stream is the canonical replay sequence: every line must be reproducible on a fresh browser session.
-- If you tried a selector and \`agent-browser\` returned a non-zero exit (selector not found, element not interactable, timeout): **do NOT emit \`AB_ACTION|...\`** for that attempt. Take a fresh snapshot, switch selector, and only emit the AB_ACTION for the call that finally succeeded.
-- If you explored multiple selectors for the same logical action (e.g. tried \`[aria-label='Email']\`, it failed, then \`[placeholder='Email']\` worked): emit AB_ACTION for the **working selector only**. The failed attempt must not appear in the trace.
-- The same rule applies to \`AB_ACTION|assert|...\` lines: only emit them for assertions you actually verified on the current page in the current snapshot. Never declare an assertion against a selector you have not just confirmed visible — even if you intended to use it earlier.
-- If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do NOT leave half-recorded actions for the failed step in the AB_ACTION stream.
+- A non-zero exit from agent-browser (selector not found, element not interactable, timeout) → **do NOT emit AB_ACTION** for that attempt. Switch selector and only emit the AB_ACTION for the call that finally succeeded.
+- If you tried several selectors / \`find_*\` locators for the same logical action, emit AB_ACTION for the **last working one only**. Multiple failed attempts in a row will all fail at replay validation and silently delete the step from the generated test.
+- \`AB_ACTION|assert|...\` follows the same rule: only emit assertions you actually verified on the current page in the current snapshot.
+- **Environment-failure recovery is not part of the test.** If a session times out, a network blip drops you to login, or the app crashes and you re-login / re-navigate / re-fill to recover, do NOT emit AB_ACTION for the recovery operations.
+- If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do not leave half-recorded actions in the stream.
 ## Assertion Protocol
@@ -212,62 +257,74 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
 | \`element_visible\` | Element is visible | CSS selector | (empty) |
 | \`element_not_visible\` | Element is hidden/removed | CSS selector | (empty) |
 | \`url_contains\` | URL contains a pattern | (empty) | URL substring |
-| \`element_enabled\` | Button/input is enabled | CSS selector | (empty) |
-| \`element_disabled\` | Button/input is disabled | CSS selector | (empty) |
+| \`element_enabled\` | Button/input is enabled | CSS selector (state-independent) | (empty) |
+| \`element_disabled\` | Button/input is disabled | CSS selector (state-independent) | (empty) |
 | \`element_checked\` | Checkbox is checked | CSS selector | (empty) |
 | \`element_unchecked\` | Checkbox is unchecked | CSS selector | (empty) |
-**Stability rules — CRITICAL:**
-- **NEVER** assert on: timestamps (dates, times), session IDs, exact numeric counts that vary between runs
-- For dynamic counts (e.g. "42 results"): assert on the STABLE part only (e.g. "results"), not the number
-- **PREFER** asserting on: status text, button labels, URL patterns, element enabled/disabled state
+**Stability rules — CRITICAL. NEVER assert on values that change run-to-run:**
+- Timestamps, session IDs, exact numeric counts that vary between runs.
+- **Absolute dates / clock times**: \`12:34:56\`, \`2026-05-20\`, \`2026年5月20日\`, \`5月20日\`. These are scrubbed by post-trace literal-scrub anyway — avoid them at the source.
+- **Relative-time labels** — true only in the moment of the trace, stale by replay:
+  - English: \`just now\`, \`5 minutes ago\`, \`2 hours ago\`, \`yesterday\`, \`last week\`.
+  - Japanese: \`たった今\`, \`3分前\`, \`1時間前\`, \`昨日\`.
+- Dynamic counts like "42 results" → assert on the stable suffix ("results") only.
+- **PREFER**: status text, button labels, URL patterns, element enabled/disabled state.
-**Page context rules — CRITICAL:**
-- After a page navigation (\`open\` or \`click\` that navigates), take a **fresh snapshot** BEFORE emitting any assertions
-- Only assert on text/elements that are visible on the **current** page — never assert on text from the previous page
-- If you navigated away from a page, its text is gone — do not emit \`text_visible\` for it
+**No tautological state asserts — CRITICAL for \`element_enabled\` / \`element_disabled\`:**
-**Selector rules for assert actions — CRITICAL:**
-- Use the **same ALLOWED formats** as browser actions — never invent aria-label values
-- Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
-- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector — but still pre-verify with \`wait --text\` per the MUST-VERIFY rule below; \`alt\`-attribute "text" will not match.
-- For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
+The selector must identify *which* element by something **other than the state you are asserting**. Selecting the element *by* its state and then asserting that state is a tautology that always passes and verifies nothing.
+- ✗ \`element_disabled | button[disabled] |\` — picks an already-disabled button, then "confirms" it is disabled. Passes even if the button the spec cares about is missing or enabled.
+- ✗ \`element_enabled | button:enabled |\`, \`[aria-disabled='true']\`, \`input:disabled\` — same trap.
+- ✓ Name the element by a stable, state-independent selector and assert the state on it: e.g. the "Submit" button is \`find role button --name "Submit"\`; to assert it is disabled, give \`element_disabled\` a selector that targets *that* button (a stable \`id\` / \`data-testid\` / unique class), **not** \`[disabled]\`.
+- If you cannot target the specific element without a state pseudo-class/attribute, **do not emit the enabled/disabled assert** — assert a user-visible consequence instead (e.g. the action it gates does not happen, a "you don't have permission" message is shown), or rely on \`text_visible\` for the label plus \`text_not_visible\` for what an enabled control would have produced.
+**Page-context and selector rules:**
+- After a navigation, take a **fresh snapshot** before emitting any assertion. Don't assert on text from the previous page.
+- Assertion selectors follow the same Selector Rules as actions — never invent aria-label values; use the exact strings from the current snapshot.
+- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) — but pre-verify with \`wait --text\` per the MUST-VERIFY rule below.
 **MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
-The \`snapshot\` output is the **accessibility tree**: a semantic view. \`agent-browser\` queries the **real DOM**. They DO NOT always match. Two known traps:
+The \`snapshot\` output is the **accessibility tree**, but \`agent-browser\` queries the **real DOM**. They don't always agree. Two known traps:
-1. *Selector trap*: a snapshot row like \`textbox "Email address"\` is reachable via \`[placeholder='...']\` but **NOT** via \`[aria-label='...']\` if no \`aria-label\` attribute is actually set — the browser inferred the label from \`<label for=>\` / surrounding text / \`placeholder\`.
-2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes via \`wait --text\`) will NOT find it.
+1. *Selector trap*: a snapshot row like \`textbox "Email address"\` may be reachable via \`[placeholder='...']\` but **not** via \`[aria-label='...']\` if no aria-label attribute is actually set (the browser inferred the label from \`<label for=>\` / placeholder).
+2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes) will NOT find it.
+3. *Input-value trap*: after you \`fill\` an \`<input>\` / \`<textarea>\` / \`[contenteditable]\`, the text you typed lives in the element's **value**, not as a visible text node. **Do NOT assert the typed value with \`text_visible\`** — it will never match. The spec's "the field reflects X" expectation is implicitly confirmed when the form submits successfully and the value shows up on the *result* page (a list row, a detail page). Assert there, not on the input itself.
-Before emitting an \`AB_ACTION|assert|...\` line, **verify the assertion form actually resolves on the live page**:
+Before emitting \`AB_ACTION|assert|...\`, **verify the assertion form actually resolves on the live page**:
 \`\`\`bash
 # element_visible / element_enabled / element_disabled / element_checked / element_unchecked
-agent-browser --session SESSION wait "<selector>" --timeout 3000
+# Use get count (fast, returns a number). Do NOT use \`wait "<selector>"\` — it blocks the daemon.
+agent-browser --session SESSION get count "<selector>"   # >=1 means present
 # element_not_visible
-agent-browser --session SESSION wait "<selector>" --state hidden --timeout 3000
+agent-browser --session SESSION get count "<selector>"   # 0 means absent
 # text_visible
 agent-browser --session SESSION wait --text "<text>" --timeout 3000
 # text_not_visible
-agent-browser --session SESSION wait --text "<text>" --state hidden --timeout 3000
+agent-browser --session SESSION wait --fn "!document.body.innerText.includes('<text>')" --timeout 3000
 \`\`\`
-Apply the "record only successful actions" rule from the AB_ACTION section above. **Additionally**, when *no* form verifies — e.g. you tried \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` and they all timed out, or the "text" turned out to be an \`alt\` / aria-label — **DROP the assertion entirely**. Fewer, real assertions beat invented ones that fail at replay. Prefer swapping a failed \`text_visible\` for an \`element_visible\` against the link/button selector when the visible label came from \`alt\` / aria-label.
+When *no* form verifies — e.g. \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` all timed out, or the visible text turned out to be an \`alt\` — **drop the assertion entirely**. Fewer real assertions beat invented ones that fail at replay. \`url_contains\` is exempt (it checks the URL string, not the DOM).
-\`url_contains\` is exempt — it checks the current URL string, not the DOM/accessibility tree.
+**Field positions — get these RIGHT.** The line is
+\`AB_ACTION|assert|<assertType>|<selector>|<value>|<observation>\`. The value
+(the asserted text for \`text_visible\`/\`text_not_visible\`/\`url_contains\`) goes
+in the **value** slot, NOT the observation slot. A common mistake is writing
+\`text_visible|||Done|...\` (three pipes → empty selector AND empty value, "Done"
+lands in observation): that records an assert with no value and it fails at
+replay. Use exactly two pipes after the assertType for text asserts.
-**Examples:**
 \`\`\`
-AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
-AB_ACTION|assert|element_disabled|.btn-submit||Submit button disabled before form is valid
-AB_ACTION|assert|element_enabled|.btn-submit||Submit button enabled after form is filled
-AB_ACTION|assert|text_visible|||Loading|Operation started
-AB_ACTION|assert|text_visible|||Done|Operation completed
-AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
+AB_ACTION|assert|url_contains||/dashboard|Navigated to dashboard
+AB_ACTION|assert|element_disabled|.btn-submit||Submit disabled before form is valid
+AB_ACTION|assert|element_enabled|.btn-submit||Submit enabled after form is filled
+AB_ACTION|assert|text_visible||Loading|Operation started
+AB_ACTION|assert|text_visible||Done|Operation completed
 \`\`\`
 ## Status Protocol
@@ -275,7 +332,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
 Emit exactly one status line per step (outside any code block):
 \`\`\`
-STEP_START|<step-id>|<short description of what this step does>
+STEP_START|<step-id>|<short description>
 STEP_DONE|<step-id>|<what was verified>
 ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
 STEP_SKIPPED|<step-id>|<reason>
@@ -391,6 +448,35 @@ function run(message) {
 	write("run", message);
 }
 /**
+* Render a single-line progress indicator for a step-by-step loop.
+*
+* On a TTY the line is rewritten in place via `\r` so the terminal stays
+* uncluttered. In a non-TTY environment (CI, piped runs) we fall back to
+* a regular `[info]` line every PROGRESS_NONTTY_STRIDE steps to avoid
+* spamming the log with one line per action.
+*
+* Callers MUST call `progressEnd()` when the loop finishes (or aborts) so
+* the carriage-return line gets a final newline; otherwise the next log
+* line lands on the same physical row.
+*/
+const PROGRESS_NONTTY_STRIDE = 5;
+let lastProgressNonTtyEmit = -1;
+function progress(current, total, label) {
+	const text = `[info] ${current + 1}/${total} ${label}`;
+	if (process.stdout.isTTY) {
+		process.stdout.write(`\r${text}\x1b[K`);
+		return;
+	}
+	if (current === 0 || current - lastProgressNonTtyEmit >= PROGRESS_NONTTY_STRIDE) {
+		process.stdout.write(`${text}\n`);
+		lastProgressNonTtyEmit = current;
+	}
+}
+function progressEnd() {
+	if (process.stdout.isTTY) process.stdout.write(`\r\x1b[K`);
+	lastProgressNonTtyEmit = -1;
+}
+/**
 * Time a long-running step under the given scope, emitting `started` and
 * `finished in N.Ns` markers. Scope must be a tag the user wants to grep
 * for — typically "run" for vitest and "fix" for diagnose-loop steps.
@@ -408,6 +494,163 @@ async function timedPhase(label, fn, scope = "fix") {
 	}
 }
 //#endregion
+//#region src/spec/yaml-schema.ts
+/**
+* An action step: one user-facing browser interaction. `instruction` and
+* `expected` are the natural-language description handed to Claude during
+* `ccqa trace`. URLs live inside `instruction`, either verbatim or via
+* `${ENV_VAR}` references (resolved at runtime).
+*/
+const ActionStepSchema = z.object({
+	instruction: z.string().min(1),
+	expected: z.string().min(1)
+}).strict();
+/**
+* An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
+* `params` values are plain strings; env refs (`${VAR}`) inside them are
+* resolved at expand time the same way step instructions are.
+*/
+const IncludeStepSchema = z.object({
+	include: z.string().min(1),
+	params: z.record(z.string(), z.string()).optional()
+}).strict();
+/**
+* A spec step is either an action step or an include step. The two are
+* discriminated by the presence of the `include` key — see `isIncludeStep`.
+*/
+const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
+/** Top-level spec schema. `.strict()` rejects any unknown key. */
+const TestSpecSchema = z.object({
+	title: z.string().min(1),
+	relatedPaths: z.array(z.string().min(1)).optional(),
+	steps: z.array(StepSchema).min(1)
+}).strict();
+/**
+* A block param declaration. `required` defaults to true; only explicit
+* `required: false` makes it optional. `secret: true` flags the value as
+* sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
+* template literals so the secret never ends up baked into test.spec.ts.
+* `dummy` is a placeholder value surfaced by the draft / drift prompts
+* (which see the block in isolation, before any include site exists);
+* `description` is the param's semantic role, also consumed by those
+* prompts and by spec authors browsing the block.
+*/
+const BlockParamSchema = z.object({
+	name: z.string().min(1),
+	required: z.boolean().optional(),
+	secret: z.boolean().optional(),
+	dummy: z.string().optional(),
+	description: z.string().optional()
+}).strict();
+/**
+* Block schema. Block steps are restricted to ActionStep — nested blocks are
+* forbidden. Including a block from inside another block fails parsing here
+* (the store layer maps the cryptic "Unrecognized key: 'include'" error into
+* a targeted nested-block message).
+*/
+const BlockSpecSchema = z.object({
+	title: z.string().min(1),
+	params: z.array(BlockParamSchema).optional(),
+	steps: z.array(ActionStepSchema).min(1)
+}).strict();
+/** Runtime predicate for the StepSchema union. */
+function isIncludeStep(step) {
+	return "include" in step;
+}
+/** Returns true if a block param is required (default: true). */
+function isParamRequired(param) {
+	return param.required !== false;
+}
+//#endregion
+//#region src/types.ts
+const RouteStepSchema = z.object({
+	title: z.string(),
+	action: z.string(),
+	observation: z.string(),
+	status: z.enum([
+		"PASSED",
+		"FAILED",
+		"SKIPPED"
+	]),
+	reason: z.string().optional()
+});
+z.object({
+	specName: z.string(),
+	timestamp: z.string(),
+	status: z.enum(["passed", "failed"]),
+	steps: z.array(RouteStepSchema)
+});
+/**
+* Semantic locator strategies exposed by `agent-browser find`. Used by the
+* `find_*` commands when a target cannot be uniquely picked out by the
+* ALLOWED CSS forms (e.g. repeated `aria-label='1 reply'` rows where only
+* "the last one" is meaningful).
+*
+* `first` / `last` / `nth` are positional helpers and their `findValue`
+* carries an inner CSS selector; `nth` additionally needs `findIndex`. The
+* remaining locators read `findValue` as the human-visible text/id.
+* `role` may pair with `findName` to filter by accessible name.
+*/
+const FIND_LOCATORS = [
+	"role",
+	"text",
+	"label",
+	"placeholder",
+	"alt",
+	"title",
+	"testid",
+	"first",
+	"last",
+	"nth"
+];
+/**
+* Actions reachable via `agent-browser find <locator> ... <action>`. Kept
+* here next to the locator list so all `find_*` knowledge lives in one
+* place — `cli/trace.ts`, `claude/invoke.ts`, and `runtime/replay-validate.ts`
+* import these instead of redefining their own sets.
+*/
+const FIND_ACTIONS = [
+	"click",
+	"dblclick",
+	"fill",
+	"type",
+	"hover",
+	"focus",
+	"check",
+	"uncheck"
+];
+const DraftIssueSchema = z.object({
+	severity: z.enum([
+		"OK",
+		"WARN",
+		"ERROR"
+	]),
+	category: z.enum([
+		"assertable",
+		"blocks",
+		"granularity",
+		"unimplemented"
+	]),
+	stepId: z.string().nullable(),
+	message: z.string(),
+	detail: z.string().optional()
+});
+const DraftReportSchema = z.object({
+	issues: z.array(DraftIssueSchema),
+	patch: z.string()
+});
+const DRAFT_CATEGORY_LABEL = {
+	assertable: "Assertability",
+	blocks: "Block references",
+	granularity: "Step granularity",
+	unimplemented: "Unimplemented checks"
+};
+const DraftNamingSchema = z.object({
+	featureName: z.string().min(1),
+	specName: z.string().min(1),
+	reason: z.string().optional()
+});
+//#endregion
 //#region src/claude/invoke.ts
 function resolveModel(explicit) {
 	if (explicit) return explicit;
@@ -444,12 +687,25 @@ async function invokeClaudeStreaming(options, onEvent) {
 				if (typeof cmd !== "string") return {};
 				if (isBlockedAbSubcommand(cmd)) return {
 					decision: "block",
-					reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait. Take a fresh snapshot to find the correct selector."
+					reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait, find (with role/text/label/placeholder/alt/title/testid/first/last/nth). Take a fresh snapshot to find the correct selector."
 				};
 				if (hasRefSelector(cmd)) return {
 					decision: "block",
 					reason: "@ref selectors (like @e14) are session-specific and change every run. They cannot be used in generated tests. Use one of the allowed selector formats instead: [aria-label='...'], text=..., [placeholder='...'], or [type='password']. Take a fresh snapshot and find the element's aria-label or visible text."
 				};
+				const bareTag = findPositionalBareTag(cmd);
+				if (bareTag !== null) return {
+					decision: "block",
+					reason: `\`find ${bareTag.locator}\` with a bare tag selector (\`${bareTag.selector}\`) is rejected: it matches every <${bareTag.selector}> on the page and is non-deterministic on replay. Pass a specific attribute selector instead, e.g. \`find ${bareTag.locator} "[aria-label='...']" ${bareTag.action}\` or \`find ${bareTag.locator} "[data-qa='...']" ${bareTag.action}\`. Take a fresh snapshot to find the right attribute.`
+				};
+				if (hasMultipleAbInvocations(cmd)) return {
+					decision: "block",
+					reason: "Run each `agent-browser` call as its own Bash command. Chaining multiple invocations with &&, ;, |, or || prevents ccqa from recording them as discrete steps and lets failed attempts leak into the trace. Issue one Bash tool call per agent-browser command."
+				};
+				if (hasErrorSuppression(cmd)) return {
+					decision: "block",
+					reason: "Do not suppress errors on `agent-browser` commands. Remove `|| true`, `|| :`, `2>/dev/null`, `; true`, and similar redirects so ccqa can detect failures and roll back unsuccessful attempts. Run the command standalone and let it surface its exit code."
+				};
 				const ab = extractAbActionFromBashCommand(cmd);
 				if (ab && onAbAction) {
 					lastAbToolUseId = input.tool_use_id;
@@ -496,7 +752,6 @@ async function invokeClaudeStreaming(options, onEvent) {
 const BLOCKED_AB_SUBCOMMANDS = new Set([
 	"eval",
 	"js",
-	"find",
 	"label",
 	"textbox"
 ]);
@@ -557,6 +812,33 @@ function isBashToolResponseError(tool_response) {
 	if (r["killed"] === true) return true;
 	return false;
 }
+/**
+* Detect `agent-browser ... find first|last|nth <bare-tag> <action>`. A bare
+* tag inside a *positional* finder matches every element of that tag on the
+* page, so "the last button" picks a different element whenever the page
+* shape shifts — recorded tests built on top are flaky by construction. The
+* check is narrow on purpose: `find role button --name X` is fine because
+* role + accessible name stays stable.
+*/
+function findPositionalBareTag(cmd) {
+	if (extractAbSubcommand(cmd) !== "find") return null;
+	const abIdx = cmd.indexOf("agent-browser");
+	const parts = shellTokenize(cmd.slice(abIdx + 13).trim());
+	let i = 0;
+	while (i < parts.length && parts[i].startsWith("-")) i += 2;
+	const locator = parts[i + 1];
+	if (locator !== "first" && locator !== "last" && locator !== "nth") return null;
+	const innerIdx = locator === "nth" ? i + 3 : i + 2;
+	const inner = parts[innerIdx];
+	const action = parts[innerIdx + 1] ?? "";
+	if (!inner) return null;
+	if (!/^[a-zA-Z][a-zA-Z0-9]*$/.test(inner)) return null;
+	return {
+		locator,
+		selector: inner,
+		action
+	};
+}
 /** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
 function hasRefSelector(cmd) {
 	const abIdx = cmd.indexOf("agent-browser");
@@ -569,6 +851,69 @@ function hasRefSelector(cmd) {
 	return false;
 }
 /**
+* Returns true when `cmd` contains more than one `agent-browser` invocation
+* chained together via shell operators (`&&`, `||`, `;`, `|`). The
+* PreToolUse hook only records ONE AB_ACTION per Bash call, so chained
+* invocations would silently drop every intermediate failure — turning
+* "I tried four selectors before one worked" into a clean-looking trace
+* with five orphaned actions that later fail at replay.
+*
+* The check tokenizes the command and counts `agent-browser` occurrences
+* that appear at the start of a shell command (i.e. immediately after a
+* statement separator or at index 0). String literals are honoured so
+* `agent-browser fill 'agent-browser'` doesn't false-fire.
+*/
+function hasMultipleAbInvocations(cmd) {
+	const boundaries = [0];
+	let quote = null;
+	for (let i = 0; i < cmd.length; i++) {
+		const ch = cmd[i];
+		if (quote) {
+			if (ch === quote) quote = null;
+			continue;
+		}
+		if (ch === "\"" || ch === "'" || ch === "`") {
+			quote = ch;
+			continue;
+		}
+		if (ch === ";" || ch === "|" || ch === "&") {
+			while (i + 1 < cmd.length && (cmd[i + 1] === "|" || cmd[i + 1] === "&" || cmd[i + 1] === ";")) i++;
+			boundaries.push(i + 1);
+		}
+	}
+	let count = 0;
+	for (const start of boundaries) {
+		let j = start;
+		while (j < cmd.length && (cmd[j] === " " || cmd[j] === "	" || cmd[j] === "\n")) j++;
+		if (cmd.slice(j, j + 13) !== "agent-browser") continue;
+		const after = cmd[j + 13];
+		if (after !== void 0 && /[A-Za-z0-9_\-]/.test(after)) continue;
+		count++;
+		if (count > 1) return true;
+	}
+	return false;
+}
+/**
+* Returns true when an `agent-browser` command in `cmd` has its exit
+* status hidden by a shell decorator that would prevent ccqa from rolling
+* back a failed attempt:
+*
+*   - trailing `|| true` / `|| :` / `; true` (force exit 0)
+*   - `2>/dev/null` and friends (drop stderr, sometimes paired with `|| true`)
+*
+* The agent-browser command itself returns exit 1 on selector miss, so
+* once one of these is present the PostToolUse hook sees `is_error=false`
+* and the bad attempt sneaks into actions.json.
+*/
+function hasErrorSuppression(cmd) {
+	if (cmd.indexOf("agent-browser") === -1) return false;
+	if (/\|\|\s*(true|:|\s*$|#)/.test(cmd)) return true;
+	if (/;\s*(true|:)\b/.test(cmd)) return true;
+	if (/2\s*>\s*\/dev\/null/.test(cmd)) return true;
+	if (/&\s*>\s*\/dev\/null/.test(cmd)) return true;
+	return false;
+}
+/**
 * Parse an `agent-browser --session <name> <cmd> [args...]` bash command
 * and return the corresponding AB_ACTION line, or null if not an agent-browser call.
 */
@@ -598,9 +943,56 @@ function extractAbActionFromBashCommand(cmd) {
 		case "select": return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
 		case "drag": return `AB_ACTION|drag|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
 		case "snapshot": return null;
+		case "find": return extractFindAbAction(args);
 		default: return null;
 	}
 }
+const FIND_ACTION_SET = new Set(FIND_ACTIONS);
+const FIND_LOCATOR_SET = new Set(FIND_LOCATORS);
+/**
+* Parse the positional tokens of `agent-browser find <locator> <value> [...]
+* <action> [fillValue]` and produce a canonical
+*   `AB_ACTION|find_<action>|<locator>|<value>|<extra>|<exact>|...|<label>`
+* line. The wire format keeps a fixed positional layout across locators so
+* downstream `parseAbAction` in `cli/trace.ts` can split on `|` alone:
+*
+*   <extra> is `--name` value for role, integer index for nth, "" otherwise.
+*   <exact> is the literal "exact" if --exact was passed, "" otherwise.
+*
+* Returns null for malformed invocations — the caller treats null as "not a
+* structured action" and the Bash command still runs unobserved.
+*/
+function extractFindAbAction(args) {
+	const locator = args[0];
+	if (!locator || !FIND_LOCATOR_SET.has(locator)) return null;
+	let i = 1;
+	let value = args[i] ?? "";
+	i++;
+	let extra = "";
+	if (locator === "nth") {
+		extra = value;
+		value = args[i] ?? "";
+		i++;
+	}
+	let action = "";
+	let name = "";
+	let exact = "";
+	let fillValue = "";
+	for (; i < args.length; i++) {
+		const tok = args[i];
+		if (tok === "--name") {
+			name = args[i + 1] ?? "";
+			i++;
+		} else if (tok === "--exact") exact = "exact";
+		else if (FIND_ACTION_SET.has(tok)) action = tok;
+		else if (action) fillValue = tok;
+	}
+	if (!action) return null;
+	if (locator === "role") extra = name;
+	const command = `find_${action}`;
+	if (action === "fill" || action === "type") return `AB_ACTION|${command}|${locator}|${value}|${extra}|${exact}|${fillValue}|`;
+	return `AB_ACTION|${command}|${locator}|${value}|${extra}|${exact}|`;
+}
 async function buildMessageStream(prompt, options) {
 	const mockFile = process.env["CCQA_CLAUDE_MOCK_FILE"];
 	if (mockFile) return replayMockMessages(mockFile);
@@ -634,6 +1026,20 @@ function substituteVars(value, lookup) {
 	});
 }
 /**
+* Iterate every `${NAME}` / `$NAME` reference name (case-insensitive form)
+* appearing in `value`. Used by callers that want to enumerate refs without
+* also substituting, e.g. the env-scrub map builder. The reference name
+* grammar is the canonical one shared with `substituteVars`.
+*/
+function* iterEnvRefNames(value) {
+	ANY_VAR_RE.lastIndex = 0;
+	let m;
+	while ((m = ANY_VAR_RE.exec(value)) !== null) {
+		const name = m[1] ?? m[2];
+		if (name) yield name;
+	}
+}
+/**
 * Resolve every `$VAR` / `${VAR}` reference against the current process env.
 *
 * Missing variables expand to the empty string, mirroring `sh` behaviour.
@@ -692,74 +1098,6 @@ function refsToJsExpression(value, nameToExpr) {
 	})}\``;
 }
 //#endregion
-//#region src/spec/yaml-schema.ts
-/**
-* An action step: one user-facing browser interaction. `instruction` and
-* `expected` are the natural-language description handed to Claude during
-* `ccqa trace`. URLs live inside `instruction`, either verbatim or via
-* `${ENV_VAR}` references (resolved at runtime).
-*/
-const ActionStepSchema = z.object({
-	instruction: z.string().min(1),
-	expected: z.string().min(1)
-}).strict();
-/**
-* An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
-* `params` values are plain strings; env refs (`${VAR}`) inside them are
-* resolved at expand time the same way step instructions are.
-*/
-const IncludeStepSchema = z.object({
-	include: z.string().min(1),
-	params: z.record(z.string(), z.string()).optional()
-}).strict();
-/**
-* A spec step is either an action step or an include step. The two are
-* discriminated by the presence of the `include` key — see `isIncludeStep`.
-*/
-const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
-/** Top-level spec schema. `.strict()` rejects any unknown key. */
-const TestSpecSchema = z.object({
-	title: z.string().min(1),
-	relatedPaths: z.array(z.string().min(1)).optional(),
-	steps: z.array(StepSchema).min(1)
-}).strict();
-/**
-* A block param declaration. `required` defaults to true; only explicit
-* `required: false` makes it optional. `secret: true` flags the value as
-* sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
-* template literals so the secret never ends up baked into test.spec.ts.
-* `dummy` is a placeholder value surfaced by the draft / drift prompts
-* (which see the block in isolation, before any include site exists);
-* `description` is the param's semantic role, also consumed by those
-* prompts and by spec authors browsing the block.
-*/
-const BlockParamSchema = z.object({
-	name: z.string().min(1),
-	required: z.boolean().optional(),
-	secret: z.boolean().optional(),
-	dummy: z.string().optional(),
-	description: z.string().optional()
-}).strict();
-/**
-* Block schema. Block steps are restricted to ActionStep — nested blocks are
-* forbidden. Including a block from inside another block fails parsing here
-* (the store layer maps the cryptic "Unrecognized key: 'include'" error into
-* a targeted nested-block message).
-*/
-const BlockSpecSchema = z.object({
-	title: z.string().min(1),
-	params: z.array(BlockParamSchema).optional(),
-	steps: z.array(ActionStepSchema).min(1)
-}).strict();
-/** Runtime predicate for the StepSchema union. */
-function isIncludeStep(step) {
-	return "include" in step;
-}
-/** Returns true if a block param is required (default: true). */
-function isParamRequired(param) {
-	return param.required !== false;
-}
-//#endregion
 //#region src/spec/parser.ts
 /** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
 function parseTestSpec(content, source = "spec.yaml") {
@@ -994,6 +1332,32 @@ async function loadAvailableBlocks(cwd) {
 		}))
 	}));
 }
+const TRACE_USER_PROMPT_PATH = ".ccqa/prompts/trace.user.md";
+const TRACE_USER_PROMPT_MAX_BYTES = 32768;
+/**
+* Load project-specific guidance to append to the trace system prompt.
+*
+* Returns the file's contents (trimmed) when `.ccqa/prompts/trace.user.md`
+* exists and is non-empty. Missing file, empty file, or read error all
+* resolve to `null` so callers can treat the override as strictly optional.
+*
+* The file is meant for organisation-specific rules that don't belong in
+* the OSS-default prompt — naming conventions, staging URL hints, repeated
+* UI quirks that recur across specs. Anything that genuinely belongs in
+* one spec should go in that spec's instruction, not here.
+*
+* Size-capped at 32 KiB to keep accidental commits of huge files from
+* blowing up the system prompt; the cap is observable to callers as a
+* truncated warning suffix.
+*/
+async function loadTraceUserPrompt(cwd) {
+	const content = await readFile(join(cwd ?? process.cwd(), TRACE_USER_PROMPT_PATH), "utf-8").catch(() => null);
+	if (content === null) return null;
+	const trimmed = content.trim();
+	if (trimmed.length === 0) return null;
+	if (trimmed.length > TRACE_USER_PROMPT_MAX_BYTES) return trimmed.slice(0, TRACE_USER_PROMPT_MAX_BYTES) + `\n\n[ccqa] (trace.user.md truncated at ${TRACE_USER_PROMPT_MAX_BYTES} bytes)`;
+	return trimmed;
+}
 /**
 * Probe for orphaned files left over from earlier ccqa versions inside
 * `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
@@ -1246,6 +1610,33 @@ function formatAgentBrowserUnavailableMessage() {
 }
 //#endregion
 //#region src/runtime/replay-validate.ts
+function isPollCheck(x) {
+	return x !== null && !Array.isArray(x) && x.kind === "poll-present";
+}
+const SELECTOR_POLL_INTERVAL_MS = 500;
+/** Poll `get count <selector>` until it matches (>=1) or the timeout elapses. */
+function runPollCheck(check, sessionName) {
+	const deadline = Date.now() + check.timeoutMs;
+	for (;;) {
+		const r = spawnAB([
+			"--session",
+			sessionName,
+			"get",
+			"count",
+			check.selector
+		]);
+		const count = r.status === 0 ? Number.parseInt(r.stdout.trim(), 10) : NaN;
+		if (!Number.isNaN(count) && count > 0) return {
+			ok: true,
+			reason: ""
+		};
+		if (Date.now() >= deadline) return {
+			ok: false,
+			reason: `selector not present within ${check.timeoutMs}ms (get count returned ${Number.isNaN(count) ? "error" : count})`
+		};
+		sleepSync(SELECTOR_POLL_INTERVAL_MS);
+	}
+}
 const SHORT_TIMEOUT_MS = 5e3;
 const ASSERT_TIMEOUT_MS = 1e4;
 /**
@@ -1330,6 +1721,7 @@ function actionToAbArgs(action, sessionName) {
 			const raw = sub(action.selector);
 			if (!raw) return null;
 			if (/^\d+$/.test(raw)) return null;
+			if (raw.startsWith("--")) return null;
 			if (raw.startsWith("text=")) return [
 				...base,
 				"wait",
@@ -1338,18 +1730,47 @@ function actionToAbArgs(action, sessionName) {
 				"--timeout",
 				String(SHORT_TIMEOUT_MS)
 			];
-			return [
-				...base,
-				"wait",
-				raw,
-				"--timeout",
-				String(SHORT_TIMEOUT_MS)
-			];
+			return {
+				kind: "poll-present",
+				selector: raw,
+				timeoutMs: SHORT_TIMEOUT_MS
+			};
 		}
 		case "snapshot": return null;
 		case "assert": return assertToAbArgs(action, sub, sessionName);
+		case "find_click":
+		case "find_dblclick":
+		case "find_hover":
+		case "find_focus":
+		case "find_check":
+		case "find_uncheck": return buildFindArgs$1(action, void 0, sub, base);
+		case "find_fill":
+		case "find_type": return buildFindArgs$1(action, sub(action.value), sub, base);
 	}
 }
+/**
+* Build the agent-browser argv for a recorded `find_*` action. Mirrors the
+* codegen shape in `actions-to-script.ts:buildFindArgs` but emits a plain
+* string array. Env refs in `findValue` / `findName` resolve through `sub`
+* so the validator hits the same DOM the generated test will.
+*/
+function buildFindArgs$1(action, fillValue, sub, base) {
+	const locator = action.findLocator;
+	if (!locator || !action.findValue) return null;
+	const innerAction = action.command.slice(5).replace("type", "fill");
+	const out = [
+		...base,
+		"find",
+		locator
+	];
+	if (locator === "nth") out.push(String(action.findIndex ?? 0));
+	out.push(sub(action.findValue));
+	out.push(innerAction);
+	if (fillValue !== void 0) out.push(fillValue);
+	if (locator === "role" && action.findName) out.push("--name", sub(action.findName));
+	if (action.findExact) out.push("--exact");
+	return out;
+}
 function assertToAbArgs(action, sub, sessionName) {
 	const base = ["--session", sessionName];
 	const val = sub(action.value ?? action.observation);
@@ -1368,13 +1789,11 @@ function assertToAbArgs(action, sub, sessionName) {
 		case "text_not_visible": return null;
 		case "element_visible":
 			if (!sel) return null;
-			return [
-				...base,
-				"wait",
-				sel,
-				"--timeout",
-				String(ASSERT_TIMEOUT_MS)
-			];
+			return {
+				kind: "poll-present",
+				selector: sel,
+				timeoutMs: ASSERT_TIMEOUT_MS
+			};
 		case "element_not_visible": return null;
 		case "url_contains": return null;
 		case "element_enabled":
@@ -1382,23 +1801,59 @@ function assertToAbArgs(action, sub, sessionName) {
 		case "element_checked":
 		case "element_unchecked":
 			if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
-			return [
-				...base,
-				"wait",
-				sel,
-				"--timeout",
-				String(ASSERT_TIMEOUT_MS)
-			];
+			return {
+				kind: "poll-present",
+				selector: sel,
+				timeoutMs: ASSERT_TIMEOUT_MS
+			};
 		default: return null;
 	}
 }
+const NO_STEP_ID = "__no_step__";
+/**
+* Replay one recorded action against the validation session. Element-presence
+* checks go through `runPollCheck` (which uses `get count`, never the blocking
+* `wait <selector>`); everything else spawns the agent-browser argv. A single
+* hard-timeout (SIGTERM) retry covers the daemon's occasional under-load drop.
+*/
+function runValidationAction(action, sessionName) {
+	const built = actionToAbArgs(action, sessionName);
+	if (built === null) return {
+		skipped: true,
+		ok: false,
+		reason: ""
+	};
+	if (isPollCheck(built)) {
+		const { ok, reason } = runPollCheck(built, sessionName);
+		return {
+			skipped: false,
+			ok,
+			reason
+		};
+	}
+	let result = spawnAB(built);
+	if (result.status !== 0 && looksLikeHardTimeout(result)) result = spawnAB(built);
+	if (result.status === 0) return {
+		skipped: false,
+		ok: true,
+		reason: ""
+	};
+	return {
+		skipped: false,
+		ok: false,
+		reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
+	};
+}
 function validateActions(actions, opts) {
 	const kept = [];
 	const dropped = [];
-	let skipUntilSideEffect = false;
+	let skipFromStepId = null;
 	for (let i = 0; i < actions.length; i++) {
 		const action = actions[i];
-		if (skipUntilSideEffect && isPassiveCommand(action.command)) {
+		opts.onProgress?.(i, actions.length, action);
+		const stepId = action.stepId ?? NO_STEP_ID;
+		if (skipFromStepId !== null && skipFromStepId !== stepId) skipFromStepId = null;
+		if (skipFromStepId !== null && isPassiveCommand(action.command)) {
 			dropped.push({
 				index: i,
 				action,
@@ -1406,28 +1861,111 @@ function validateActions(actions, opts) {
 			});
 			continue;
 		}
-		skipUntilSideEffect = false;
-		const args = actionToAbArgs(action, opts.sessionName);
-		if (args === null) {
+		const outcome = runValidationAction(action, opts.sessionName);
+		if (outcome.skipped) {
 			kept.push(action);
 			continue;
 		}
-		const result = spawnAB(args);
-		if (result.status === 0) {
+		if (outcome.ok) {
 			kept.push(action);
+			if (skipFromStepId !== null && !isPassiveCommand(action.command)) skipFromStepId = null;
 			continue;
 		}
 		dropped.push({
 			index: i,
 			action,
-			reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
+			reason: outcome.reason
 		});
-		skipUntilSideEffect = true;
+		if (!isPassiveCommand(action.command)) skipFromStepId = stepId;
+	}
+	return splitByMode(actions, rescueLostSteps(actions, kept, dropped, opts), opts.mode ?? "lenient");
+}
+/**
+* Translate the internal `{ kept, dropped }` result of the rescue pass
+* into the public-facing shape. In strict mode the caller sees the same
+* shape as before (kept/dropped); in lenient mode the still-failed
+* actions move to `unstable` with `replayUnstable: true` tagged on, so
+* codegen can warn about them while still emitting the line.
+*/
+function splitByMode(originalActions, result, mode) {
+	if (mode === "strict") return {
+		kept: result.kept,
+		unstable: [],
+		dropped: result.dropped,
+		rescuedSteps: result.rescuedSteps
+	};
+	const droppedByIndex = new Map(result.dropped.map((d) => [d.index, d]));
+	const keptSet = new Set(result.kept);
+	const finalKept = [];
+	const unstable = [];
+	for (let i = 0; i < originalActions.length; i++) {
+		const action = originalActions[i];
+		if (keptSet.has(action)) {
+			finalKept.push(action);
+			continue;
+		}
+		const drop = droppedByIndex.get(i);
+		if (drop) {
+			action.replayUnstable = true;
+			action.replayReason = drop.reason;
+			unstable.push(action);
+		}
 	}
 	return {
+		kept: finalKept,
+		unstable,
+		dropped: [],
+		rescuedSteps: result.rescuedSteps
+	};
+}
+function rescueLostSteps(actions, kept, dropped, opts) {
+	const stepsWithSurvivors = /* @__PURE__ */ new Set();
+	for (const a of kept) if (a.stepId) stepsWithSurvivors.add(a.stepId);
+	const lostStepDrops = /* @__PURE__ */ new Map();
+	for (const d of dropped) {
+		const id = d.action.stepId;
+		if (!id || stepsWithSurvivors.has(id)) continue;
+		const list = lostStepDrops.get(id) ?? [];
+		list.push(d);
+		lostStepDrops.set(id, list);
+	}
+	if (lostStepDrops.size === 0) return {
 		kept,
 		dropped
 	};
+	const rescuedIndices = /* @__PURE__ */ new Set();
+	const rescuedSteps = [];
+	for (const [stepId, drops] of lostStepDrops.entries()) {
+		let anyForThisStep = false;
+		for (const d of drops) {
+			const outcome = runValidationAction(d.action, opts.sessionName);
+			if (outcome.skipped) continue;
+			if (outcome.ok) {
+				rescuedIndices.add(d.index);
+				anyForThisStep = true;
+			}
+		}
+		if (anyForThisStep) rescuedSteps.push(stepId);
+	}
+	if (rescuedIndices.size === 0) return {
+		kept,
+		dropped
+	};
+	const keptSet = new Set(kept);
+	const newKept = [];
+	for (let i = 0; i < actions.length; i++) {
+		const action = actions[i];
+		if (rescuedIndices.has(i) || keptSet.has(action)) newKept.push(action);
+	}
+	return {
+		kept: newKept,
+		dropped: dropped.filter((d) => !rescuedIndices.has(d.index)),
+		rescuedSteps
+	};
+}
+/** Did this agent-browser invocation get SIGTERM'd by the ccqa hard-timeout watchdog? */
+function looksLikeHardTimeout(result) {
+	return result.stderr.includes("agent-browser killed after hard timeout");
 }
 /**
 * Passive (read-only) commands whose only effect is observation. When a
@@ -1438,12 +1976,228 @@ function isPassiveCommand(cmd) {
 	return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
 }
 //#endregion
+//#region src/runtime/env-scrub.ts
+/**
+* Build a list of `[envValue, "${VAR}"]` pairs for every `${VAR}` reference
+* mentioned in the spec OR in any of its expanded (block-inlined) steps.
+* Used at trace time to scrub recorded Claude-text outputs so a value the
+* spec author intentionally threaded through `process.env` is preserved as
+* `${VAR}` in `actions.json` rather than baked in as the concrete
+* trace-time value.
+*
+* Why we walk `spec.steps` AND `expanded`:
+*   - `spec.steps` carries the spec's own `instruction` / `expected` + each
+*     include's raw `params` (which may themselves be `${ENV}` refs).
+*   - `expanded` carries the inlined block-internal steps, whose
+*     `instruction` / `expected` may *also* contain `${ENV}` refs that
+*     don't go through include params.
+*
+* Only refs whose env value is currently non-empty land in the map —
+* scrubbing against an empty string would corrupt unrelated empty strings
+* in the action stream. Names whose env is unset are returned via
+* `unresolved` so the caller can warn the user.
+*
+* Longer values sort first so a `${SHORT}` whose value is a substring of a
+* `${LONG}` value doesn't clobber the longer one.
+*
+* `title` and `relatedPaths` are deliberately NOT scanned — they never
+* reach the recorded action stream.
+*/
+function buildSpecEnvScrub(spec, expanded) {
+	const refNames = /* @__PURE__ */ new Set();
+	for (const step of spec.steps) if (isIncludeStep(step)) for (const v of Object.values(step.params ?? {})) collect(v, refNames);
+	else {
+		collect(step.instruction, refNames);
+		collect(step.expected, refNames);
+	}
+	for (const step of expanded) {
+		collect(step.instruction, refNames);
+		collect(step.expected, refNames);
+	}
+	const map = [];
+	const unresolved = [];
+	for (const name of refNames) {
+		const value = process.env[name];
+		if (typeof value === "string" && value.length > 0) map.push([value, "${" + name + "}"]);
+		else unresolved.push(name);
+	}
+	map.sort((a, b) => b[0].length - a[0].length);
+	return {
+		map,
+		unresolved
+	};
+}
+function collect(value, into) {
+	for (const name of iterEnvRefNames(value)) into.add(name);
+}
+/**
+* Replace every occurrence of an env value with its `${VAR}` placeholder in
+* `text`. **Caller invariant**: the map must be sorted longest-value-first
+* so a shorter value doesn't shadow a longer one that contains it as a
+* substring. `buildSpecEnvScrub` upholds this; hand-built maps should too.
+*/
+function scrubEnvValues(text, scrubMap) {
+	if (scrubMap.length === 0) return text;
+	let out = text;
+	for (const [value, placeholder] of scrubMap) if (out.includes(value)) out = out.replaceAll(value, placeholder);
+	return out;
+}
+//#endregion
+//#region src/runtime/literal-scrub.ts
+/**
+* Patterns are listed in roughly descending confidence — a hit on `clock-hms`
+* is almost certainly bad; a hit on `unix-epoch-sec` (`1[0-9]{9}`) gates on
+* the value starting with `1`, which empirically rules out most SKU / order-id
+* false positives while still catching epoch seconds in the 2001-2033 window.
+*
+* Relative-time labels ("just now", "N minutes ago", "N分前") are the same
+* class of problem as wall-clock literals: the page shows them, Claude
+* captures them, and they're stale before the test ever replays. We only
+* catch the unambiguous variants — bare "now" or "minute" would false-fire
+* on routine UI copy.
+*/
+const UNSTABLE_PATTERNS = [
+	{
+		id: "clock-hms",
+		pattern: /\b\d{2}:\d{2}:\d{2}\b/,
+		label: "clock time HH:MM:SS"
+	},
+	{
+		id: "iso-datetime",
+		pattern: /\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/,
+		label: "ISO datetime"
+	},
+	{
+		id: "iso-date",
+		pattern: /\b\d{4}-\d{2}-\d{2}\b/,
+		label: "ISO date YYYY-MM-DD"
+	},
+	{
+		id: "unix-epoch-sec",
+		pattern: /\b1[0-9]{9}\b/,
+		label: "Unix epoch seconds"
+	},
+	{
+		id: "unix-epoch-ms",
+		pattern: /\b1[0-9]{12}\b/,
+		label: "Unix epoch milliseconds"
+	},
+	{
+		id: "relative-time-en",
+		pattern: /\b\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago\b/i,
+		label: "English relative time (`N <unit> ago`)"
+	},
+	{
+		id: "relative-now-en",
+		pattern: /\bjust\s+now\b/i,
+		label: "English `just now`"
+	},
+	{
+		id: "relative-time-ja",
+		pattern: /\d+\s*(秒|分|時間|日|週間|か月|ヶ月|年)前/,
+		label: "Japanese relative time (`N<unit>前`)"
+	},
+	{
+		id: "relative-now-ja",
+		pattern: /たった今/,
+		label: "Japanese `たった今`"
+	},
+	{
+		id: "ja-date-full",
+		pattern: /\d{4}年\d{1,2}月\d{1,2}日/,
+		label: "Japanese date YYYY年M月D日"
+	},
+	{
+		id: "ja-date-md",
+		pattern: /(?<!年)(?<!\d)\d{1,2}月\d{1,2}日(?![間目])/,
+		label: "Japanese date M月D日"
+	}
+];
+const SCANNABLE_FIELDS = [
+	"selector",
+	"value",
+	"label",
+	"target",
+	"observation",
+	"findValue",
+	"findName"
+];
+/**
+* Inspect a single action and return every (field, pattern) pair that
+* fired. An empty array means the action is safe to keep.
+*/
+function detectUnstableLiterals(action) {
+	const hits = [];
+	for (const field of SCANNABLE_FIELDS) {
+		const raw = action[field];
+		if (typeof raw !== "string" || raw.length === 0) continue;
+		for (const p of UNSTABLE_PATTERNS) {
+			const m = raw.match(p.pattern);
+			if (m) hits.push({
+				field,
+				patternId: p.id,
+				match: m[0]
+			});
+		}
+	}
+	return hits;
+}
+/**
+* Walk every recorded action and split it into kept / dropped buckets. A
+* `snapshot` action is treated specially: its `observation` field is just a
+* comment in the generated script, so we keep the action even if its
+* `observation` carries an unstable literal — the comment will be wrong but
+* the script will still run. All other commands get dropped on any hit
+* because their `selector` / `value` would otherwise drive an
+* unreproducible interaction.
+*/
+function scrubUnstableActions(actions) {
+	const kept = [];
+	const dropped = [];
+	for (let i = 0; i < actions.length; i++) {
+		const action = actions[i];
+		const hits = detectUnstableLiterals(action);
+		if (hits.length === 0) {
+			kept.push(action);
+			continue;
+		}
+		if (action.command === "snapshot" && hits.every((h) => h.field === "observation")) {
+			kept.push(action);
+			continue;
+		}
+		dropped.push({
+			index: i,
+			action,
+			hits
+		});
+	}
+	return {
+		kept,
+		dropped
+	};
+}
+/**
+* Human-readable summary of one drop, suitable for `log.warn`. The format
+* mirrors `replay-validate`'s drop line so both sources of warnings look
+* uniform in the trace output.
+*/
+function formatUnstableDrop(drop) {
+	const { action, hits } = drop;
+	const ids = [...new Set(hits.map((h) => h.patternId))].join(", ");
+	const samples = hits.map((h) => `${h.field}="${h.match}"`).join(", ");
+	return `${`${action.command}${action.assertType ? " " + action.assertType : ""}`}: contains unstable literal (${ids}) — ${samples}`;
+}
+//#endregion
 //#region src/cli/trace.ts
-const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (specPath, opts) => {
+const VALIDATION_MODES = ["lenient", "strict"];
+const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions with a warning but keeps them; 'strict' drops them from actions.json.", (raw) => {
+	if (VALIDATION_MODES.includes(raw)) return raw;
+	throw new Error(`--validation-mode must be one of ${VALIDATION_MODES.join(" | ")}`);
+}, "lenient").action(async (specPath, opts) => {
 	const { featureName, specName } = parseSpecPath(specPath);
-	await runTrace(featureName, specName, opts.model);
+	await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient");
 });
-async function runTrace(featureName, specName, model) {
+async function runTrace(featureName, specName, model, validationMode = "lenient") {
 	header("trace", `${featureName}/${specName}`);
 	try {
 		meta("agent-browser", assertAgentBrowserAvailable());
@@ -1458,17 +2212,23 @@ async function runTrace(featureName, specName, model) {
 	await warnStaleBlockArtifacts();
 	const spec = parseTestSpec(await readSpecFile(featureName, specName));
 	const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
+	const envScrub = buildSpecEnvScrub(spec, expanded);
+	const envScrubMap = envScrub.map;
+	if (envScrub.unresolved.length > 0) warn(`spec references env var(s) with empty/unset values: ${envScrub.unresolved.join(", ")} — their literal trace-time values will be baked into actions.json`);
 	meta("spec", spec.title);
 	meta("steps", expanded.length);
 	const includes = collectIncludedBlockNames(spec);
 	if (includes.length > 0) meta("blocks", includes.join(", "));
 	blank();
 	const sessionName = generateSessionName();
-	const systemPrompt = buildTraceSystemPrompt({
+	const baseSystemPrompt = buildTraceSystemPrompt({
 		title: spec.title,
 		steps: expanded,
 		sessionName
 	});
+	const userPrompt = await loadTraceUserPrompt();
+	if (userPrompt !== null) meta("user-prompt", ".ccqa/prompts/trace.user.md");
+	const systemPrompt = userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`;
 	const prompt = buildTracePrompt(spec.title);
 	info("Running agent-browser session...");
 	blank();
@@ -1499,7 +2259,7 @@ async function runTrace(featureName, specName, model) {
 		},
 		model,
 		onAbAction: (abAction) => {
-			const action = withStepId(parseAbAction(abAction));
+			const action = withStepId(parseAbAction(scrubEnvValues(abAction, envScrubMap)));
 			if (action) traceActions.push(action);
 		},
 		onAbActionFailed: () => {
@@ -1530,14 +2290,14 @@ async function runTrace(featureName, specName, model) {
 						if (routeStep.status === "FAILED") overallStatus = "failed";
 					}
 				} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
-					const action = withStepId(parseAbAction(trimmed));
+					const action = withStepId(parseAbAction(scrubEnvValues(trimmed, envScrubMap)));
 					if (action) traceActions.push(action);
 				}
 			}
 		}
 	});
 	if (isError) overallStatus = "failed";
-	const validatedActions = validateAndReport(traceActions);
+	const validatedActions = validateAndReport(dedupAndReport(scrubAndReport(traceActions)), validationMode);
 	const route = {
 		specName,
 		timestamp: (/* @__PURE__ */ new Date()).toISOString(),
@@ -1558,24 +2318,194 @@ async function runTrace(featureName, specName, model) {
 	hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
 }
 /**
+* Strip actions whose recorded fields contain "unstable literal" values
+* (clock readings, ISO datetimes, Unix-epoch IDs) that Claude baked into
+* the trace despite not coming through `${ENV_VAR}`. These would otherwise
+* pin the generated test to a single run. Reported the same way as
+* `validateAndReport` so users see one uniform "dropped" surface.
+*/
+function scrubAndReport(actions) {
+	if (actions.length === 0) return actions;
+	const { kept, dropped } = scrubUnstableActions(actions);
+	if (dropped.length === 0) return kept;
+	blank();
+	info("post-trace literal scrub (removing run-specific values)...");
+	for (const d of dropped) warn(`dropped action #${d.index + 1} (${formatUnstableDrop(d)})`);
+	meta("scrubbed", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
+	return kept;
+}
+/**
+* Drop *immediate* duplicate AB_ACTION emissions inside the same step.
+* Claude occasionally records the same `find_click` (identical command,
+* locator, value, fields) twice in a row when retrying a selector after a
+* snapshot — only the last attempt is "the canonical one". Collapsing the
+* dupes keeps actions.json from accumulating ghost-retries the LLM never
+* meant to commit.
+*
+* The dedupe is intentionally conservative — adjacent + structurally
+* IDENTICAL only. We do NOT try to compress retries with different
+* selectors / locators (that would risk dropping a legitimate "click the
+* neighbouring button" sequence). The trace prompt now asks Claude not to
+* emit failed attempts in the first place, so this is the belt-and-braces
+* pass.
+*/
+function dedupAndReport(actions) {
+	if (actions.length === 0) return actions;
+	const kept = [];
+	let dropped = 0;
+	for (const action of actions) {
+		const prev = kept[kept.length - 1];
+		if (prev && isAdjacentDuplicate(prev, action)) {
+			dropped += 1;
+			continue;
+		}
+		kept.push(action);
+	}
+	if (dropped === 0) return kept;
+	meta("deduped", `${kept.length}/${actions.length} kept (${dropped} adjacent duplicate(s) dropped)`);
+	return kept;
+}
+/**
+* Two actions are an "adjacent duplicate" when they would generate the
+* exact same agent-browser invocation. We compare by command + every
+* field that drives codegen output, sharing the same stepId (so we don't
+* silently merge two distinct steps that happen to start identically).
+*/
+function isAdjacentDuplicate(a, b) {
+	if (a.command !== b.command) return false;
+	if ((a.stepId ?? "") !== (b.stepId ?? "")) return false;
+	return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.target ?? "") === (b.target ?? "") && (a.label ?? "") === (b.label ?? "") && (a.assertType ?? "") === (b.assertType ?? "") && (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "") && (a.findName ?? "") === (b.findName ?? "") && (a.findIndex ?? -1) === (b.findIndex ?? -1) && (a.findExact ?? false) === (b.findExact ?? false);
+}
+/**
 * Run the post-trace replay validation and emit user-visible drop reports.
 * Splitting this out keeps `runTrace` readable; the function is pure aside
 * from `log.*` and the agent-browser invocations inside `validateActions`.
+*
+* In lenient mode (the default) failing actions are NOT removed — they're
+* tagged with `replayUnstable: true` and merged back into the output stream
+* in their original order so codegen can still emit them (with a `// [warn]`
+* comment) and let the auto-fix loop decide what to do.
 */
-function validateAndReport(actions) {
+function validateAndReport(actions, mode) {
 	if (actions.length === 0) return actions;
 	const sessionName = `${generateSessionName()}-validate`;
 	blank();
-	info("post-trace validation (replaying recorded actions)...");
-	const { kept, dropped } = validateActions(actions, { sessionName });
+	info(`post-trace validation in ${mode} mode (replaying ${actions.length} recorded action(s))...`);
+	const { kept, unstable, dropped, rescuedSteps = [] } = validateActions(actions, {
+		sessionName,
+		mode,
+		onProgress: (i, total, action) => {
+			progress(i, total, validationProgressLabel(action));
+		}
+	});
+	progressEnd();
+	if (rescuedSteps.length > 0) info(`rescued ${rescuedSteps.length} step(s) that had lost every action: ${rescuedSteps.join(", ")}`);
+	if (mode === "lenient") {
+		if (unstable.length === 0) meta("validated", `${kept.length}/${actions.length} kept`);
+		else {
+			for (const u of unstable) warn(`replay-unstable: ${`${u.command}${u.selector ? " " + u.selector : ""}${u.findValue ? " " + u.findValue : ""}`} — ${u.replayReason ?? "(no reason)"} (kept in actions.json with warning)`);
+			meta("validated", `${kept.length}/${actions.length} kept, ${unstable.length} flagged replay-unstable (kept with warning)`);
+		}
+		const merged = mergeKeptAndUnstableInOriginalOrder(actions, kept, unstable);
+		reportPerStepBreakdown(actions, merged);
+		return merged;
+	}
 	if (dropped.length === 0) {
 		meta("validated", `${kept.length}/${actions.length} kept`);
+		reportPerStepBreakdown(actions, kept);
 		return kept;
 	}
-	for (const d of dropped) warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
+	let cascadeStart = null;
+	let cascadeCount = 0;
+	let cascadeStepId;
+	const flushCascade = () => {
+		if (cascadeStart === null || cascadeCount === 0) return;
+		const stepTag = cascadeStepId ? ` in ${cascadeStepId}` : "";
+		warn(`cascade dropped ${cascadeCount} action(s)${stepTag} after action #${cascadeStart}`);
+		cascadeStart = null;
+		cascadeCount = 0;
+		cascadeStepId = void 0;
+	};
+	for (const d of dropped) {
+		const isCascade = d.reason.startsWith("skipped after");
+		if (isCascade && cascadeStart !== null && cascadeStepId === d.action.stepId) {
+			cascadeCount += 1;
+			continue;
+		}
+		flushCascade();
+		if (isCascade) {
+			cascadeStart = d.index;
+			cascadeCount = 1;
+			cascadeStepId = d.action.stepId;
+			continue;
+		}
+		warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
+	}
+	flushCascade();
 	meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
+	reportPerStepBreakdown(actions, kept);
 	return kept;
 }
+/**
+* Lenient-mode helper: re-thread the `kept` and `unstable` lists back into
+* the original recording order. Object identity is fine because the
+* validator pushes original references — no shallow copies.
+*/
+function mergeKeptAndUnstableInOriginalOrder(originalActions, kept, unstable) {
+	const allowed = new Set([...kept, ...unstable]);
+	const merged = [];
+	for (const a of originalActions) if (allowed.has(a)) merged.push(a);
+	return merged;
+}
+/**
+* Compact one-liner used as the progress label while validation replays
+* each action. Keep it under ~80 chars so it fits on a single terminal
+* row when paired with the `[info] N/M ` prefix.
+*/
+function validationProgressLabel(action) {
+	const step = action.stepId ? `${action.stepId} ` : "";
+	const detail = action.findLocator ? `find ${action.findLocator} ${action.findValue ?? ""}`.trim() : action.selector ? `${action.command} ${action.selector}` : action.value ? `${action.command} ${action.value}` : action.command;
+	return `${step}${detail.length > 80 ? detail.slice(0, 77) + "..." : detail}`;
+}
+/**
+* Print a per-step `kept/total` line so a step that lost ALL its actions
+* during validation surfaces clearly. Without this, a spec author can't
+* tell that "verify created content" or "delete the thing" silently fell
+* off the generated test — the trace appears to pass while half the spec
+* is missing. Lost steps are also surfaced as a dedicated warning line so
+* they don't blend into the per-step breakdown noise.
+*/
+function reportPerStepBreakdown(beforeValidation, afterValidation) {
+	const before = groupCountByStep(beforeValidation);
+	const after = groupCountByStep(afterValidation);
+	const ordered = [];
+	const seen = /* @__PURE__ */ new Set();
+	for (const a of beforeValidation) {
+		const id = a.stepId ?? "<no step>";
+		if (seen.has(id)) continue;
+		seen.add(id);
+		ordered.push(id);
+	}
+	const lostSteps = [];
+	for (const id of ordered) {
+		const total = before.get(id) ?? 0;
+		const kept = after.get(id) ?? 0;
+		const dropped = total - kept;
+		const isLost = kept === 0 && total > 0 && id !== "<no step>";
+		if (isLost) lostSteps.push(id);
+		const tag = isLost ? " ⚠ entire step removed" : "";
+		meta(`  ${id}`, `${kept}/${total} kept${dropped > 0 ? `, ${dropped} dropped` : ""}${tag}`);
+	}
+	if (lostSteps.length > 0) warn(`${lostSteps.length} spec step(s) lost every recorded action: ${lostSteps.join(", ")} — the generated test will NOT exercise these steps.`);
+}
+function groupCountByStep(actions) {
+	const counts = /* @__PURE__ */ new Map();
+	for (const a of actions) {
+		const id = a.stepId ?? "<no step>";
+		counts.set(id, (counts.get(id) ?? 0) + 1);
+	}
+	return counts;
+}
 function parseStatusLine(text) {
 	for (const line of text.split("\n")) {
 		const match = line.match(/^(STEP_START|STEP_DONE|ASSERTION_FAILED|STEP_SKIPPED|RUN_COMPLETED)\|([^|]*)\|(.*)$/);
@@ -1666,13 +2596,50 @@ function parseAbAction(line) {
 			target: parts[3],
 			label: parts[4]
 		};
+		case "find_click":
+		case "find_dblclick":
+		case "find_hover":
+		case "find_focus":
+		case "find_check":
+		case "find_uncheck": return parseFindAction(command, parts, false);
+		case "find_fill":
+		case "find_type": return parseFindAction(command, parts, true);
 		default: return null;
 	}
 }
+/**
+* Common parser for the `find_*` family. `<extra>` carries `--name` for
+* `role`, the integer index for `nth`, and is empty otherwise. We accept a
+* literally empty `<extra>` (the LLM emits a placeholder `|` so the
+* positional layout stays stable across locators).
+*/
+function parseFindAction(command, parts, hasFillValue) {
+	const locator = parts[2];
+	const findValue = parts[3];
+	const extra = parts[4] ?? "";
+	const exactToken = parts[5] ?? "";
+	if (!locator || !FIND_LOCATORS.includes(locator) || !findValue) return null;
+	const findExact = exactToken === "exact" ? true : void 0;
+	const findName = locator === "role" && extra ? extra : void 0;
+	const findIndex = locator === "nth" && extra ? Number.parseInt(extra, 10) : void 0;
+	if (locator === "nth" && (findIndex === void 0 || Number.isNaN(findIndex))) return null;
+	return {
+		command,
+		findLocator: locator,
+		findValue,
+		...findExact !== void 0 && { findExact },
+		...findName !== void 0 && { findName },
+		...findIndex !== void 0 && { findIndex },
+		...hasFillValue ? {
+			value: parts[6],
+			label: parts[7]
+		} : { label: parts[6] }
+	};
+}
 //#endregion
 //#region src/codegen/actions-to-script.ts
 function actionsToScript(input) {
-	const { actions, testName, stepMarkers = [] } = input;
+	const { actions, testName, stepMarkers = [], emptySteps = [] } = input;
 	const parts = [...[
 		`import { test } from "vitest";`,
 		`import { spawnSync } from "node:child_process";`,
@@ -1695,7 +2662,7 @@ function actionsToScript(input) {
 		`process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
 		""
 	]];
-	const body = actionsToLines(actions, stepMarkers).map((l) => `  ${l}`).join("\n");
+	const body = actionsToLines(actions, stepMarkers, emptySteps).map((l) => `  ${l}`).join("\n");
 	parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
 	return parts.join("\n");
 }
@@ -1709,13 +2676,31 @@ const ELEMENT_COMMANDS = new Set([
 	"uncheck",
 	"select",
 	"hover",
-	"drag"
+	"drag",
+	"find_click",
+	"find_dblclick",
+	"find_fill",
+	"find_type",
+	"find_hover",
+	"find_focus",
+	"find_check",
+	"find_uncheck"
 ]);
-function actionsToLines(actions, stepMarkers) {
+function actionsToLines(actions, stepMarkers, emptySteps) {
 	const lines = [];
 	let prevLine = null;
-	let prevCommand = null;
+	let pendingOpenSettle = false;
 	const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
+	const emptyByInsertAfter = /* @__PURE__ */ new Map();
+	for (const e of emptySteps) {
+		const list = emptyByInsertAfter.get(e.insertAfterIndex) ?? [];
+		list.push(e);
+		emptyByInsertAfter.set(e.insertAfterIndex, list);
+	}
+	const leadingNotices = emptyByInsertAfter.get(-1) ?? [];
+	for (const n of leadingNotices) appendEmptyStepNotice(lines, n);
+	let currentStepId;
+	let filledValuesThisStep = /* @__PURE__ */ new Set();
 	for (let i = 0; i < actions.length; i++) {
 		const marker = markerByIndex.get(i);
 		if (marker) {
@@ -1723,22 +2708,86 @@ function actionsToLines(actions, stepMarkers) {
 			lines.push(`// step: ${marker.stepId} [${marker.source}]`);
 		}
 		const action = actions[i];
+		if (action.stepId !== currentStepId) {
+			currentStepId = action.stepId;
+			filledValuesThisStep = /* @__PURE__ */ new Set();
+		}
+		const filled = fillValueOf(action);
+		if (filled) filledValuesThisStep.add(filled);
+		if (action.command === "assert" && action.assertType === "text_visible" && typeof action.value === "string" && filledValuesThisStep.has(action.value)) {
+			lines.push(`// [warn] replay-unstable: dropped input-value assert (text_visible ${action.value}) — typed values aren't visible text nodes`);
+			continue;
+		}
 		const line = actionToLine(action);
 		if (line === null) continue;
 		if (line === prevLine) continue;
-		if (prevCommand === "open" && ELEMENT_COMMANDS.has(action.command)) lines.push(`spawnSync("sleep", ["3"], { stdio: "inherit" });`);
+		if (action.command === "open") pendingOpenSettle = true;
+		if (pendingOpenSettle && ELEMENT_COMMANDS.has(action.command)) {
+			lines.push(`spawnSync("sleep", ["3"], { stdio: "inherit" });`);
+			pendingOpenSettle = false;
+		}
+		if (action.replayUnstable) lines.push(`// [warn] replay-unstable: ${action.replayReason ?? "(no reason recorded)"}`);
 		lines.push(line);
 		prevLine = line;
-		prevCommand = action.command;
+		const followups = emptyByInsertAfter.get(i);
+		if (followups) for (const n of followups) appendEmptyStepNotice(lines, n);
 	}
 	return lines;
 }
-/** Returns true if a selector is a session-specific @ref that cannot be replayed. */
+/**
+* The text value a fill-type action types into a field, or null for
+* non-fill actions. Both the plain `fill`/`type` (value in `value`) and the
+* `find_fill`/`find_type` (also `value`) shapes carry it in `action.value`.
+*/
+function fillValueOf(action) {
+	return (action.command === "fill" || action.command === "type" || action.command === "find_fill" || action.command === "find_type") && typeof action.value === "string" && action.value.length > 0 ? action.value : null;
+}
+function appendEmptyStepNotice(lines, notice) {
+	if (lines.length > 0) lines.push("");
+	lines.push(`// step: ${notice.stepId} [${notice.source}]`);
+	lines.push(`// [warn] all actions for this step were dropped during post-trace validation.`);
+	lines.push(`// [warn] the generated test does NOT exercise step ${notice.stepId}. Re-run`);
+	lines.push(`// [warn] \`ccqa trace\` or add manual assertions if this step is load-bearing.`);
+}
+/**
+* Returns true if a selector is a session-specific agent-browser ref that
+* cannot be replayed. Two forms occur:
+*   - `@e14` — the snapshot ref syntax (interactions)
+*   - `button[ref='e4']` / `[ref=e4]` — the ref attribute leaking into a CSS
+*     selector (most often via an assert the agent built from a snapshot row)
+* Refs are re-numbered on every snapshot, so neither survives a fresh run.
+*/
 function isRefSelector(selector) {
-	return typeof selector === "string" && /^@/.test(selector.trim());
+	if (typeof selector !== "string") return false;
+	const s = selector.trim();
+	return /^@/.test(s) || /\[ref\s*=\s*['"]?e\d+['"]?\]/.test(s);
+}
+/**
+* Returns true if a selector picks elements *by the very state being asserted*,
+* which makes an `element_disabled` / `element_enabled` check a tautology.
+*
+* `abAssertDisabled("button[disabled]")` resolves to `is enabled
+* "button[disabled]"`: it first selects an already-disabled element, then
+* confirms it is disabled — always true, and true even when the *target* the
+* spec cares about (e.g. the "Submit" button) is missing or enabled.
+* The agent emits these when it reaches for "the disabled button" instead of
+* naming the element by a state-independent selector. The assertion verifies
+* nothing, so codegen drops it (breadcrumb only) rather than baking a green
+* check that can never fail.
+*
+* Matches the `:disabled` / `:enabled` pseudo-classes and the `[disabled]` /
+* `[aria-disabled=…]` attribute selectors anywhere in the selector.
+*/
+function isStateSelector(selector) {
+	if (typeof selector !== "string") return false;
+	return /:disabled\b|:enabled\b|\[\s*disabled[\s\]=]|\[\s*aria-disabled[\s\]=]/i.test(selector);
 }
 function actionToLine(action) {
 	if ("selector" in action && isRefSelector(action.selector)) return null;
+	if (action.command === "assert" && action.replayUnstable && typeof action.replayReason === "string" && action.replayReason.includes("selector not present")) {
+		const sel = action.selector ?? action.observation ?? "(unknown)";
+		return `// [warn] replay-unstable: dropped over-assertion (${action.assertType ?? "assert"} ${sel}) — selector not present on replay`;
+	}
 	switch (action.command) {
 		case "cookies_clear": return `ab("cookies", "clear");`;
 		case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
@@ -1757,7 +2806,22 @@ function actionToLine(action) {
 		case "wait": {
 			const sel = action.selector;
 			if (/^\d+$/.test(sel)) return `spawnSync("sleep", [${j(sel)}], { stdio: "inherit" });`;
-			return `abWait(${j(sel)});`;
+			if (sel.startsWith("--")) return null;
+			return `abWait(${jExpr(sel)});`;
+		}
+		case "find_click":
+		case "find_dblclick":
+		case "find_hover":
+		case "find_focus":
+		case "find_check":
+		case "find_uncheck": {
+			const args = buildFindArgs(action, void 0);
+			return args === null ? droppedFindMarker(action) : `ab(${args.join(", ")});`;
+		}
+		case "find_fill":
+		case "find_type": {
+			const args = buildFindArgs(action, action.value ?? "");
+			return args === null ? droppedFindMarker(action) : `ab(${args.join(", ")});`;
 		}
 		case "assert": {
 			const val = action.value ?? action.observation;
@@ -1781,9 +2845,11 @@ function actionToLine(action) {
 					if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
 					break;
 				case "element_enabled":
+					if (isStateSelector(sel)) return tautologicalStateAssertMarker(action, sel);
 					if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
 					break;
 				case "element_disabled":
+					if (isStateSelector(sel)) return tautologicalStateAssertMarker(action, sel);
 					if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertDisabled(${j(sel)});`;
 					break;
 				case "element_checked":
@@ -1799,6 +2865,51 @@ function actionToLine(action) {
 		default: return null;
 	}
 }
+/**
+* Build the argument list for `ab("find", ...)` codegen. Layout matches the
+* `agent-browser find <locator> <value> [--name <n>] [--exact] <action>
+* [fillValue]` invocation shape. `findValue` and `findName` go through
+* `jExpr` so `${ENV}` references survive into the generated test; the
+* positional CSS selector inside `first/last/nth` stays as a plain string
+* literal.
+*/
+function buildFindArgs(action, fillValue) {
+	const { findLocator, findValue } = action;
+	if (!findLocator || !findValue) return null;
+	const innerAction = action.command.slice(5).replace("type", "fill");
+	const args = [JSON.stringify("find"), JSON.stringify(findLocator)];
+	if (findLocator === "nth") {
+		args.push(JSON.stringify(String(action.findIndex ?? 0)));
+		args.push(j(findValue));
+	} else if (findLocator === "first" || findLocator === "last") args.push(j(findValue));
+	else args.push(jExpr(findValue));
+	args.push(JSON.stringify(innerAction));
+	if (fillValue !== void 0) args.push(jExpr(fillValue));
+	if (findLocator === "role" && action.findName) args.push(JSON.stringify("--name"), jExpr(action.findName));
+	if (action.findExact) args.push(JSON.stringify("--exact"));
+	return args;
+}
+/**
+* Emit a visible breadcrumb when a `find_*` action lacks the locator/value
+* fields that codegen needs. We can't generate a runnable `ab(...)` line, but
+* a silent skip would make the test pass while quietly dropping a step the
+* spec author cared about. The marker is a TS comment so the file still
+* parses, but `grep -n "find_\\* dropped"` surfaces the issue in CI logs.
+*/
+function droppedFindMarker(action) {
+	const ctx = action.stepId ? ` (stepId=${action.stepId})` : "";
+	return `// [warn] find_* dropped: ${action.command}${ctx} — actions.json is missing findLocator/findValue. Re-run \`ccqa trace\` to regenerate.`;
+}
+/**
+* Breadcrumb for an `element_enabled` / `element_disabled` assert whose selector
+* picks the element by the asserted state (a tautology — see `isStateSelector`).
+* Dropped from the runnable script; surfaces in the test so a reviewer sees the
+* intended check was discarded and can re-assert against a state-independent
+* selector if the state really matters.
+*/
+function tautologicalStateAssertMarker(action, sel) {
+	return `// [warn] dropped tautological assert (${action.assertType ?? "assert"} ${sel ?? "(unknown)"}) — selector matches by the asserted state; target the element by a state-independent selector instead`;
+}
 /** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
 const j = (s) => JSON.stringify(s);
 /**
@@ -1818,13 +2929,19 @@ The trace contains noise: failed attempts, redundant retries, and duplicate oper
 Your task: return a **cleaned-up JSON array** of TraceAction objects that represents the minimal, correct sequence of actions needed to reproduce the test.
 Each TraceAction object has the following shape (use EXACTLY these field names):
-{ "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "..." }
+{ "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "...",
+  "findLocator": "...", "findValue": "...", "findName": "...", "findIndex": 0, "findExact": true }
 Only include fields that are present in the original action. The "command" field is required. For assert actions, "assertType" is also required.
+**\`find_*\` actions (find_click / find_dblclick / find_fill / find_type / find_hover / find_focus / find_check / find_uncheck) are special:**
+They do NOT use \`selector\`. They use \`findLocator\` + \`findValue\` (and optionally \`findName\` / \`findIndex\` / \`findExact\`). When you keep a \`find_*\` action, you MUST copy **every** \`find*\` field from the original verbatim — dropping any of them silently corrupts the recorded selector and the generated test will be broken. Treat the \`find*\` cluster as one atomic unit: keep all or drop all.
 Rules:
 - Remove actions that were failed attempts superseded by a later successful action (e.g., if \`fill selector="text=Foo"\` was followed by \`fill selector="[placeholder='Foo']"\`, keep only the latter)
 - Remove duplicate fill operations on the same field (keep only the last successful fill for each field)
 - For \`click\` and \`fill\` actions: if the selector starts with \`text=\`, it is a failed attempt — remove it (text= selectors only work with the wait command, not click/fill)
+- For \`find_*\` actions: if multiple \`find_*\` of the same command were emitted within the same logical step (Claude tried several locators), keep ONLY the last one — that is the one that finally succeeded
 - Keep all snapshot actions — they serve as comments/observations in the generated test
 - Keep all assert actions — they are the test's verification points and must not be removed
 - Do NOT invent new actions or change values
@@ -1836,6 +2953,11 @@ ${actions.map((a, i) => {
 		if (a.assertType) parts.push(`assertType="${a.assertType}"`);
 		if (a.selector) parts.push(`selector="${a.selector}"`);
 		if (a.value) parts.push(`value="${a.value}"`);
+		if (a.findLocator) parts.push(`findLocator="${a.findLocator}"`);
+		if (a.findValue) parts.push(`findValue="${a.findValue}"`);
+		if (a.findName) parts.push(`findName="${a.findName}"`);
+		if (a.findIndex !== void 0) parts.push(`findIndex=${a.findIndex}`);
+		if (a.findExact) parts.push(`findExact=true`);
 		if (a.observation) parts.push(`→ ${a.observation}`);
 		return parts.join(" ");
 	}).join("\n")}`;
@@ -2033,7 +3155,7 @@ function applySelectorDrift(script, line, oldSelector, newSelector) {
 		applied: false,
 		reason: `oldSelector not found on line ${line}`
 	};
-	lines[idx] = content.replaceAll(oldSelector, newSelector);
+	lines[idx] = replaceSelectorLiteral(content, oldSelector, newSelector);
 	return {
 		applied: true,
 		script: lines.join("\n"),
@@ -2041,6 +3163,44 @@ function applySelectorDrift(script, line, oldSelector, newSelector) {
 	};
 }
 /**
+* Rewrite a selector inside whatever string literal encloses it on the line.
+* The tricky case is when `newSelector` contains a `${...}` env reference
+* and the host literal is a plain `"..."` / `'...'` — a naive `replaceAll`
+* leaves the unescaped `${...}` inside the double-quoted literal and produces
+* invalid TS (the auto-fix loop used to ship this and blow up esbuild). When
+* a template-literal substitution is needed, promote the enclosing literal
+* from "..."/'...' to `...` in one step.
+*/
+function replaceSelectorLiteral(content, oldSelector, newSelector) {
+	if (!/\$\{[A-Za-z_]/.test(newSelector)) return content.replaceAll(oldSelector, newSelector);
+	const tplRe = new RegExp("`([^`]*)" + escapeForRegex(oldSelector) + "([^`]*)`", "g");
+	if (tplRe.test(content)) return content.replace(tplRe, (_m, before, after) => `\`${before}${newSelector}${after}\``);
+	for (const quote of ["\"", "'"]) {
+		const re = new RegExp(`${quote}([^${quote}\\\\]*(?:\\\\.[^${quote}\\\\]*)*)${quote}`, "g");
+		let match;
+		const replacements = [];
+		while ((match = re.exec(content)) !== null) {
+			const inner = match[1] ?? "";
+			if (!inner.includes(oldSelector)) continue;
+			const backtickSafe = inner.replaceAll(oldSelector, newSelector).replace(/`/g, "\\`");
+			replacements.push({
+				start: match.index,
+				end: match.index + match[0].length,
+				rewritten: `\`${backtickSafe}\``
+			});
+		}
+		if (replacements.length > 0) {
+			let out = content;
+			for (const r of replacements.reverse()) out = out.slice(0, r.start) + r.rewritten + out.slice(r.end);
+			return out;
+		}
+	}
+	return content.replaceAll(oldSelector, newSelector);
+}
+function escapeForRegex(s) {
+	return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+/**
 * Build a unified-style diff snippet for showing the user what would change.
 * Just the changed lines with -/+ prefixes; not a real patch.
 */
@@ -2771,10 +3931,13 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
 	const cleanedActions = await cleanupActions(actions, model);
 	if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
 	const markers = buildStepMarkers(expanded, cleanedActions);
+	const emptySteps = findEmptySteps(expanded, cleanedActions);
+	if (emptySteps.length > 0) for (const e of emptySteps) warn(`step ${e.stepId} has no kept actions — generated test will skip it (notice comment inserted).`);
 	const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
 		actions: cleanedActions,
 		testName: spec.title,
-		stepMarkers: markers
+		stepMarkers: markers,
+		emptySteps
 	}));
 	meta("saved", scriptPath);
 	blank();
@@ -2844,6 +4007,42 @@ function buildStepMarkers(steps, actions) {
 	}
 	return markers;
 }
+/**
+* Spec steps that lost every action by the time the trace finished its
+* cleanup + validation passes. `actionsToScript` uses these to splice a
+* visible `// [warn] step N was dropped` block into the generated script,
+* so the spec author can see at a glance that the recorded test stopped
+* exercising part of the spec.
+*
+* `insertAfterIndex = -1` means the lost step came before any kept
+* action; otherwise it's the cleanedActions index whose action precedes
+* the lost step in spec order. Spec order is canonical for the comment
+* placement so the warning lands near the steps that DID survive.
+*/
+function findEmptySteps(steps, cleanedActions) {
+	const presentStepIds = /* @__PURE__ */ new Set();
+	for (const a of cleanedActions) if (a.stepId) presentStepIds.add(a.stepId);
+	const lastActionIndexByStep = /* @__PURE__ */ new Map();
+	for (let i = 0; i < cleanedActions.length; i++) {
+		const id = cleanedActions[i].stepId;
+		if (id) lastActionIndexByStep.set(id, i);
+	}
+	const notices = [];
+	let lastSeenSurvivorIndex = -1;
+	for (const step of steps) {
+		if (presentStepIds.has(step.id)) {
+			const idx = lastActionIndexByStep.get(step.id);
+			if (idx !== void 0) lastSeenSurvivorIndex = idx;
+			continue;
+		}
+		notices.push({
+			stepId: step.id,
+			source: step.source,
+			insertAfterIndex: lastSeenSurvivorIndex
+		});
+	}
+	return notices;
+}
 async function confirmOverwrite(path) {
 	if (!process.stdin.isTTY) {
 		warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
@@ -2912,16 +4111,39 @@ function reattachStepIds(cleaned, original) {
 			cursor = i + 1;
 			break;
 		}
-		if (matched?.stepId) out.push({
-			...c,
-			stepId: matched.stepId
-		});
-		else out.push(c);
+		out.push(matched ? mergeFromOriginal(c, matched) : c);
 	}
 	return out;
 }
+/**
+* Merge a cleaned action back with its original counterpart. Always borrows
+* `stepId` (the cleanup prompt deliberately doesn't surface it). For `find_*`
+* actions, *also* re-attach the find-locator cluster if the cleaned copy
+* dropped any of them — Claude occasionally omits these fields under the
+* cleanup prompt and we'd otherwise emit a structurally broken action that
+* codegen has to silently skip.
+*/
+function mergeFromOriginal(cleaned, original) {
+	const merged = { ...cleaned };
+	if (original.stepId && !merged.stepId) merged.stepId = original.stepId;
+	if (cleaned.command.startsWith("find_")) {
+		if (!merged.findLocator && original.findLocator) merged.findLocator = original.findLocator;
+		if (!merged.findValue && original.findValue) merged.findValue = original.findValue;
+		if (!merged.findName && original.findName) merged.findName = original.findName;
+		if (merged.findIndex === void 0 && original.findIndex !== void 0) merged.findIndex = original.findIndex;
+		if (!merged.findExact && original.findExact) merged.findExact = original.findExact;
+	}
+	if (original.replayUnstable && !merged.replayUnstable) {
+		merged.replayUnstable = original.replayUnstable;
+		if (original.replayReason) merged.replayReason = original.replayReason;
+	}
+	return merged;
+}
 function sameShape(a, b) {
-	return a.command === b.command && (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
+	if (a.command !== b.command) return false;
+	if (a.command.startsWith("find_") && a.findLocator && b.findLocator) return (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "");
+	if (a.command.startsWith("find_")) return true;
+	return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
 }
 //#endregion
 //#region src/claude/extract-json.ts
@@ -3164,56 +4386,6 @@ function buildDriftUserPrompt(existing) {
 	});
 }
 //#endregion
-//#region src/types.ts
-const RouteStepSchema = z.object({
-	title: z.string(),
-	action: z.string(),
-	observation: z.string(),
-	status: z.enum([
-		"PASSED",
-		"FAILED",
-		"SKIPPED"
-	]),
-	reason: z.string().optional()
-});
-z.object({
-	specName: z.string(),
-	timestamp: z.string(),
-	status: z.enum(["passed", "failed"]),
-	steps: z.array(RouteStepSchema)
-});
-const DraftIssueSchema = z.object({
-	severity: z.enum([
-		"OK",
-		"WARN",
-		"ERROR"
-	]),
-	category: z.enum([
-		"assertable",
-		"blocks",
-		"granularity",
-		"unimplemented"
-	]),
-	stepId: z.string().nullable(),
-	message: z.string(),
-	detail: z.string().optional()
-});
-const DraftReportSchema = z.object({
-	issues: z.array(DraftIssueSchema),
-	patch: z.string()
-});
-const DRAFT_CATEGORY_LABEL = {
-	assertable: "Assertability",
-	blocks: "Block references",
-	granularity: "Step granularity",
-	unimplemented: "Unimplemented checks"
-};
-const DraftNamingSchema = z.object({
-	featureName: z.string().min(1),
-	specName: z.string().min(1),
-	reason: z.string().optional()
-});
-//#endregion
 //#region src/drift/analyze.ts
 const DEFAULT_CONCURRENCY$1 = 3;
 /**