ccqa 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/ccqa.mjs
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { n as spawnAB } from "../spawn-ab-
|
|
2
|
+
import { n as spawnAB, t as sleepSync } from "../spawn-ab-DjRh1-4T.mjs";
|
|
3
3
|
import { createRequire } from "node:module";
|
|
4
4
|
import { Command } from "commander";
|
|
5
5
|
import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
|
|
8
8
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
9
|
+
import { ZodError, z } from "zod";
|
|
9
10
|
import { delimiter, dirname, join, relative, resolve } from "node:path";
|
|
10
11
|
import { parse, stringify } from "yaml";
|
|
11
|
-
import { ZodError, z } from "zod";
|
|
12
12
|
import { execFile, spawn } from "node:child_process";
|
|
13
13
|
import { createInterface } from "node:readline";
|
|
14
14
|
import { homedir, tmpdir } from "node:os";
|
|
@@ -57,13 +57,20 @@ agent-browser --session SESSION press <Key>
|
|
|
57
57
|
agent-browser --session SESSION select "<selector>" "<value>"
|
|
58
58
|
agent-browser --session SESSION hover "<selector>"
|
|
59
59
|
agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
|
|
60
|
-
agent-browser --session SESSION wait
|
|
60
|
+
agent-browser --session SESSION wait --load networkidle
|
|
61
|
+
agent-browser --session SESSION get count "<selector>" # element-existence check (returns a number, fast)
|
|
61
62
|
agent-browser --session SESSION cookies clear
|
|
63
|
+
agent-browser --session SESSION find <locator> <value> <action> [<input>] [--name "<n>"] [--exact]
|
|
64
|
+
# See "Selector Rules" for the full \`find\` subset.
|
|
65
|
+
# IMPORTANT: do NOT use \`wait "<css-selector>"\`. agent-browser ignores --timeout on a
|
|
66
|
+
# CSS-selector wait and blocks for ~150s when the selector never matches, killing the run.
|
|
67
|
+
# Wait for readiness with \`wait --text\`, \`wait --load networkidle\`, or just use \`find\`
|
|
68
|
+
# (which waits internally). To check an element exists, use \`get count "<selector>"\`.
|
|
62
69
|
\`\`\`
|
|
63
70
|
|
|
64
71
|
## Selector Rules
|
|
65
72
|
|
|
66
|
-
**ALLOWED — these
|
|
73
|
+
**ALLOWED selector formats — use ONE of these everywhere a selector appears (click, fill, wait, assert, ...):**
|
|
67
74
|
|
|
68
75
|
| Format | Use when |
|
|
69
76
|
|--------|----------|
|
|
@@ -71,24 +78,63 @@ agent-browser --session SESSION cookies clear
|
|
|
71
78
|
| \`text=visible text\` | Unique visible text, no aria-label |
|
|
72
79
|
| \`[placeholder='text']\` | Input identified by placeholder |
|
|
73
80
|
| \`[type='password']\` | Password inputs only |
|
|
74
|
-
| \`a[href*='pattern']\` | Links where \`text=\` fails — use the URL pattern from the ARIA snapshot
|
|
81
|
+
| \`a[href*='pattern']\` | Links where \`text=\` fails — use the URL pattern from the ARIA snapshot |
|
|
82
|
+
| \`[data-testid='...']\`, \`[data-qa='...']\` | Specific attribute selectors when an aria-label is absent |
|
|
83
|
+
|
|
84
|
+
**FORBIDDEN — these break recorded tests and are rejected by the hook layer:**
|
|
85
|
+
|
|
86
|
+
- \`@ref\` / \`@e1\` / \`e14\` — reference IDs are session-specific and change every run.
|
|
87
|
+
- **Bare tag selectors**: \`button\`, \`a\`, \`div\`, \`td\`, \`tr\`, \`main a\`, \`table tbody tr:nth-child(N)\`. These match every element of that tag and are non-deterministic on replay. **This includes the inner selector inside \`find first/last/nth\`** — see the \`find\` rules below.
|
|
88
|
+
- \`[role='button']\` or \`[type='checkbox']\` alone — matches too many elements.
|
|
89
|
+
- JavaScript execution (\`eval\`, \`js\`) — blocked by the hook layer.
|
|
90
|
+
|
|
91
|
+
### \`find\` subset (fallback when no ALLOWED CSS uniquely targets the element)
|
|
92
|
+
|
|
93
|
+
When repeated aria-labels / visible text make ALLOWED selectors ambiguous (e.g. a chat client where every message row has the same "1 reply" button), use one of these — they record as structured actions and replay deterministically:
|
|
94
|
+
|
|
95
|
+
\`\`\`
|
|
96
|
+
find role <role> <action> [--name "<n>"] [--exact]
|
|
97
|
+
find text|label|placeholder|alt|title "<text>" <action> [--exact]
|
|
98
|
+
find testid "<id>" <action>
|
|
99
|
+
find first|last "<ALLOWED-css>" <action>
|
|
100
|
+
find nth <index> "<ALLOWED-css>" <action>
|
|
101
|
+
\`\`\`
|
|
102
|
+
|
|
103
|
+
\`<action>\` is one of \`click | dblclick | fill | type | hover | focus | check | uncheck\`. For \`fill\`/\`type\`, the input value follows the action: \`find label "Email" fill "user@example.com"\`.
|
|
75
104
|
|
|
76
|
-
**
|
|
105
|
+
**Rules for \`find\`:**
|
|
77
106
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
107
|
+
1. Try ALLOWED selectors first. Only reach for \`find\` when they demonstrably cannot uniquely target the element.
|
|
108
|
+
2. **The inner selector for \`first/last/nth\` MUST be one of the ALLOWED formats above.** Never pass a bare tag — "the last button" is meaningless on replay.
|
|
109
|
+
3. \`find last\` is reliable only when the layout guarantees "the target is the bottom-most match" (e.g. the most-recently-sent chat message). Be explicit in the AB_ACTION label.
|
|
110
|
+
4. Argument order is \`<value> <action> [flags]\` — flags after the action. Putting \`--name\` / \`--exact\` before the action makes agent-browser fail with "Unknown subaction".
|
|
111
|
+
5. \`--name "<n>"\` is **role-only**. Never pass it to \`find text\`, \`find label\`, etc.
|
|
112
|
+
6. \`find\` includes its own wait; do not chain a \`wait\` before it.
|
|
83
113
|
|
|
84
|
-
**
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
114
|
+
**Examples:**
|
|
115
|
+
|
|
116
|
+
- ✓ \`find last "[data-testid='reply-link']" click\` — specific attribute + layout-guaranteed last match
|
|
117
|
+
- ✓ \`find role button click --name "Submit"\` — role + accessible name (flags after action)
|
|
118
|
+
- ✗ \`find role button --name "Submit" click\` — wrong order
|
|
119
|
+
- ✗ \`find last "button" click\` — bare tag
|
|
120
|
+
|
|
121
|
+
### Selector workflow
|
|
122
|
+
|
|
123
|
+
1. Run \`snapshot\` and read the ARIA tree.
|
|
124
|
+
2. Identify the element; note its exact \`aria-label\` if present.
|
|
125
|
+
3. If aria-label present → use \`[aria-label='...']\`. Otherwise → use \`text=...\`.
|
|
126
|
+
4. For links where \`text=\` fails, find the link's URL in the snapshot and use \`a[href*='...']\` with a distinctive substring.
|
|
127
|
+
5. For checkboxes: try \`check "text=Label"\` or \`check "[aria-label='Label']"\`.
|
|
128
|
+
6. If repeated labels make every ALLOWED selector ambiguous → use the \`find\` subset above.
|
|
129
|
+
7. Never guess. If a selector fails, take a fresh snapshot before retrying.
|
|
130
|
+
|
|
131
|
+
### Special input types
|
|
132
|
+
|
|
133
|
+
**contenteditable / RichText editors**: \`fill "[contenteditable='true']" "<text>"\` works on contenteditable elements (chat composers, WYSIWYG bodies) — agent-browser sets the text directly. Use a single \`fill\`; do NOT just \`click\` the field and rely on \`keyboard inserttext\` (that keystroke command is not recorded as a structured action, so the text never makes it into the generated test and the field ends up empty on replay).
|
|
134
|
+
|
|
135
|
+
**combobox / select with a required marker (\`*\`)**: required form fields often include the marker in their accessible name. If \`find role combobox click --name "<label>"\` misses, prefer \`find label "<label>" click\` or \`click "[aria-label='<label> *']"\`.
|
|
136
|
+
|
|
137
|
+
**Verifying cleanup / deletion**: assert the *absence* of the deleted thing, not the surrounding listing screen's text. Use \`wait --fn "!document.body.innerText.includes('<unique-label>')"\` (text disappearance) — never \`wait "<css-selector>" --state hidden\` (blocks the daemon) and never \`wait --text "<navbar label>"\` (passes regardless of the deletion).
|
|
92
138
|
|
|
93
139
|
## Test Specification
|
|
94
140
|
|
|
@@ -103,52 +149,42 @@ ${stepsText}
|
|
|
103
149
|
## Execution Workflow
|
|
104
150
|
|
|
105
151
|
For each step:
|
|
106
|
-
1. Emit \`STEP_START|<step-id>|<short description
|
|
107
|
-
2. Run \`snapshot\` and identify selectors from the ARIA tree
|
|
108
|
-
3. Execute the action using an ALLOWED selector
|
|
109
|
-
4. Emit \`AB_ACTION|...\` for every browser action (see
|
|
110
|
-
5. Run \`snapshot\` again to verify the outcome
|
|
111
|
-
6. Confirm at least **two independent signals** (URL change, element appearance, text change,
|
|
112
|
-
7. For each verified signal, emit \`AB_ACTION|assert|...\` (see Assertion Protocol
|
|
113
|
-
8. Emit \`ROUTE_STEP
|
|
114
|
-
9. Emit \`STEP_DONE\`, \`ASSERTION_FAILED\`, or \`STEP_SKIPPED
|
|
115
|
-
|
|
116
|
-
**After form submission or navigation:** take a snapshot before continuing. If an intermediate screen appears (
|
|
152
|
+
1. Emit \`STEP_START|<step-id>|<short description>\`.
|
|
153
|
+
2. Run \`snapshot\` and identify selectors from the ARIA tree.
|
|
154
|
+
3. Execute the action using an ALLOWED selector (see Selector Rules).
|
|
155
|
+
4. Emit \`AB_ACTION|...\` for every browser action (see AB_ACTION Protocol).
|
|
156
|
+
5. Run \`snapshot\` again to verify the outcome.
|
|
157
|
+
6. Confirm at least **two independent signals** (URL change, element appearance, text change, ...).
|
|
158
|
+
7. For each verified signal, emit \`AB_ACTION|assert|...\` (see Assertion Protocol).
|
|
159
|
+
8. Emit \`ROUTE_STEP|...\`.
|
|
160
|
+
9. Emit \`STEP_DONE\`, \`ASSERTION_FAILED\`, or \`STEP_SKIPPED\`.
|
|
161
|
+
|
|
162
|
+
**After form submission or navigation:** take a fresh snapshot before continuing. If an intermediate screen appears (account selection, role picker, ...), complete it and emit AB_ACTION for each interaction.
|
|
117
163
|
|
|
118
164
|
## Guardrails
|
|
119
165
|
|
|
120
|
-
- **Stop after 3 consecutive failures on the same step** — emit \`ASSERTION_FAILED\` and report the blocker.
|
|
121
|
-
- **
|
|
122
|
-
-
|
|
123
|
-
- Do NOT
|
|
124
|
-
- Do NOT
|
|
125
|
-
- **
|
|
126
|
-
- **If \`agent-browser\` is not found, stop immediately.** Do not run \`which\`, \`find\`, \`npm ls\`, \`npm install\`, \`npx\`, \`brew\`, or any other discovery / installation command. Do not try alternate paths. The ccqa host already validates the binary before launching you, so if you see \`command not found\` it is a host-environment problem you cannot fix from inside the test run. Emit one line and terminate: \`ASSERTION_FAILED|step-XX|agent-browser binary not available in PATH\`.
|
|
166
|
+
- **Stop after 3 consecutive failures on the same step** — emit \`ASSERTION_FAILED\` and report the blocker.
|
|
167
|
+
- **No workarounds.** If all ALLOWED selectors fail, emit \`ASSERTION_FAILED|...|selector-drift: ...\`. Do NOT fall back to coordinate clicks, mouse moves, or \`Tab\`+\`Enter\` keyboard navigation — they cannot be recorded as reliable test actions.
|
|
168
|
+
- Do NOT retry a selector without taking a fresh snapshot first.
|
|
169
|
+
- Do NOT work around blockers (login walls, missing data, captchas) — stop and report.
|
|
170
|
+
- **Do NOT suppress errors.** Never use \`2>/dev/null\`, \`|| true\`, \`; true\`, or any technique that hides agent-browser failures. Each \`agent-browser\` invocation must be its own standalone Bash call. Chaining multiple agent-browser commands with \`&&\` / \`;\` / \`|\` is rejected by the hook layer.
|
|
171
|
+
- **If \`agent-browser\` is not found, stop immediately.** Do not run \`which\`, \`find\`, \`npm ls\`, \`npm install\`, \`npx\`, \`brew\`, or any other discovery / installation command. Emit one line and terminate: \`ASSERTION_FAILED|step-XX|agent-browser binary not available in PATH\`.
|
|
127
172
|
|
|
128
173
|
## Source Code Reference
|
|
129
174
|
|
|
130
|
-
You have
|
|
175
|
+
You have \`Read\`, \`Grep\`, and \`Glob\` to inspect the application source code. Use them proactively to find correct selectors — do NOT guess \`a[href*='...']\` patterns.
|
|
131
176
|
|
|
132
|
-
**When to
|
|
133
|
-
- Before clicking a link: Grep for the link text or URL pattern in the codebase to find the exact \`href\` value
|
|
134
|
-
- Before navigating to a new page: Glob for page/route files to understand the URL structure
|
|
135
|
-
- When the ARIA snapshot shows an element but \`text=\` and \`[aria-label=]\` selectors fail: Read the component to find what HTML attributes the element has
|
|
177
|
+
**When**: before clicking a link (find the \`href\`); before navigating to a new page (understand routing); when an ARIA element exists but no ALLOWED selector matches (find the actual HTML attributes).
|
|
136
178
|
|
|
137
|
-
**How
|
|
138
|
-
1. Use \`Grep\` to search for UI text, component names, or URL patterns
|
|
139
|
-
2. Use \`Read\` to inspect the component's JSX/TSX and find \`href\`, \`aria-label\`, \`data-testid\`, or class names
|
|
140
|
-
3. Build a precise ALLOWED selector from the discovered attributes
|
|
179
|
+
**How**: \`Grep\` for UI text / component names / URL patterns → \`Read\` the JSX/TSX to find \`href\`, \`aria-label\`, \`data-testid\`, or class names → build a precise ALLOWED selector.
|
|
141
180
|
|
|
142
|
-
**Rules
|
|
143
|
-
- Only READ source files — never modify them
|
|
144
|
-
- Keep source reading focused — search for specific strings, not entire directories
|
|
181
|
+
**Rules**: only READ source files, never modify them. Keep searches focused.
|
|
145
182
|
|
|
146
183
|
## Waiting for Async Operations
|
|
147
184
|
|
|
148
|
-
Prefer
|
|
185
|
+
Prefer \`wait\` over polling:
|
|
149
186
|
|
|
150
187
|
\`\`\`bash
|
|
151
|
-
# Best: wait for expected text to appear
|
|
152
188
|
agent-browser --session ${sessionName} wait --text "<completion text>"
|
|
153
189
|
\`\`\`
|
|
154
190
|
|
|
@@ -158,7 +194,6 @@ If polling is required (e.g. waiting for a spinner to disappear):
|
|
|
158
194
|
for i in $(seq 1 18); do
|
|
159
195
|
sleep 10
|
|
160
196
|
result=$(agent-browser --session ${sessionName} snapshot 2>&1)
|
|
161
|
-
# Check result for the expected change and break when found
|
|
162
197
|
echo "$result" | grep -q "<done indicator>" && break
|
|
163
198
|
done
|
|
164
199
|
agent-browser --session ${sessionName} snapshot
|
|
@@ -186,18 +221,28 @@ AB_ACTION|drag|<source selector>|<target selector>|<source label>
|
|
|
186
221
|
AB_ACTION|wait|<selector or text>|<label>
|
|
187
222
|
AB_ACTION|snapshot|<key observation, max 100 chars>
|
|
188
223
|
AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
|
|
224
|
+
|
|
225
|
+
# find_* (semantic locator fallback). <extra> = role's --name OR nth's index OR "".
|
|
226
|
+
# <exact> = literal "exact" if --exact was passed, "" otherwise. Keep empty pipe slots.
|
|
227
|
+
AB_ACTION|find_click|<locator>|<value>|<extra>|<exact>|<label>
|
|
228
|
+
AB_ACTION|find_dblclick|<locator>|<value>|<extra>|<exact>|<label>
|
|
229
|
+
AB_ACTION|find_hover|<locator>|<value>|<extra>|<exact>|<label>
|
|
230
|
+
AB_ACTION|find_focus|<locator>|<value>|<extra>|<exact>|<label>
|
|
231
|
+
AB_ACTION|find_check|<locator>|<value>|<extra>|<exact>|<label>
|
|
232
|
+
AB_ACTION|find_uncheck|<locator>|<value>|<extra>|<exact>|<label>
|
|
233
|
+
AB_ACTION|find_fill|<locator>|<value>|<extra>|<exact>|<input>|<label>
|
|
234
|
+
AB_ACTION|find_type|<locator>|<value>|<extra>|<exact>|<input>|<label>
|
|
189
235
|
\`\`\`
|
|
190
236
|
|
|
191
|
-
|
|
237
|
+
Selectors in AB_ACTION must follow Selector Rules. \`find_*\` lines use the locator + value pair instead of a separate selector. Do NOT include literal \`|\` inside any field — replace with a space if necessary.
|
|
192
238
|
|
|
193
|
-
**CRITICAL — record only successful actions.** The AB_ACTION stream is the
|
|
194
|
-
canonical replay sequence: every line in it must be reproducible on a fresh
|
|
195
|
-
browser session. Therefore:
|
|
239
|
+
**CRITICAL — record only successful actions.** The AB_ACTION stream is the canonical replay sequence: every line must be reproducible on a fresh browser session.
|
|
196
240
|
|
|
197
|
-
-
|
|
198
|
-
- If you
|
|
199
|
-
-
|
|
200
|
-
- If a
|
|
241
|
+
- A non-zero exit from agent-browser (selector not found, element not interactable, timeout) → **do NOT emit AB_ACTION** for that attempt. Switch selector and only emit the AB_ACTION for the call that finally succeeded.
|
|
242
|
+
- If you tried several selectors / \`find_*\` locators for the same logical action, emit AB_ACTION for the **last working one only**. Multiple failed attempts in a row will all fail at replay validation and silently delete the step from the generated test.
|
|
243
|
+
- \`AB_ACTION|assert|...\` follows the same rule: only emit assertions you actually verified on the current page in the current snapshot.
|
|
244
|
+
- **Environment-failure recovery is not part of the test.** If a session times out, a network blip drops you to login, or the app crashes and you re-login / re-navigate / re-fill to recover, do NOT emit AB_ACTION for the recovery operations.
|
|
245
|
+
- If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do not leave half-recorded actions in the stream.
|
|
201
246
|
|
|
202
247
|
## Assertion Protocol
|
|
203
248
|
|
|
@@ -212,62 +257,74 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
|
|
|
212
257
|
| \`element_visible\` | Element is visible | CSS selector | (empty) |
|
|
213
258
|
| \`element_not_visible\` | Element is hidden/removed | CSS selector | (empty) |
|
|
214
259
|
| \`url_contains\` | URL contains a pattern | (empty) | URL substring |
|
|
215
|
-
| \`element_enabled\` | Button/input is enabled | CSS selector | (empty) |
|
|
216
|
-
| \`element_disabled\` | Button/input is disabled | CSS selector | (empty) |
|
|
260
|
+
| \`element_enabled\` | Button/input is enabled | CSS selector (state-independent) | (empty) |
|
|
261
|
+
| \`element_disabled\` | Button/input is disabled | CSS selector (state-independent) | (empty) |
|
|
217
262
|
| \`element_checked\` | Checkbox is checked | CSS selector | (empty) |
|
|
218
263
|
| \`element_unchecked\` | Checkbox is unchecked | CSS selector | (empty) |
|
|
219
264
|
|
|
220
|
-
**Stability rules — CRITICAL:**
|
|
221
|
-
|
|
222
|
-
-
|
|
223
|
-
- **
|
|
265
|
+
**Stability rules — CRITICAL. NEVER assert on values that change run-to-run:**
|
|
266
|
+
|
|
267
|
+
- Timestamps, session IDs, exact numeric counts that vary between runs.
|
|
268
|
+
- **Absolute dates / clock times**: \`12:34:56\`, \`2026-05-20\`, \`2026年5月20日\`, \`5月20日\`. These are scrubbed by post-trace literal-scrub anyway — avoid them at the source.
|
|
269
|
+
- **Relative-time labels** — true only in the moment of the trace, stale by replay:
|
|
270
|
+
- English: \`just now\`, \`5 minutes ago\`, \`2 hours ago\`, \`yesterday\`, \`last week\`.
|
|
271
|
+
- Japanese: \`たった今\`, \`3分前\`, \`1時間前\`, \`昨日\`.
|
|
272
|
+
- Dynamic counts like "42 results" → assert on the stable suffix ("results") only.
|
|
273
|
+
- **PREFER**: status text, button labels, URL patterns, element enabled/disabled state.
|
|
224
274
|
|
|
225
|
-
**
|
|
226
|
-
- After a page navigation (\`open\` or \`click\` that navigates), take a **fresh snapshot** BEFORE emitting any assertions
|
|
227
|
-
- Only assert on text/elements that are visible on the **current** page — never assert on text from the previous page
|
|
228
|
-
- If you navigated away from a page, its text is gone — do not emit \`text_visible\` for it
|
|
275
|
+
**No tautological state asserts — CRITICAL for \`element_enabled\` / \`element_disabled\`:**
|
|
229
276
|
|
|
230
|
-
**
|
|
231
|
-
|
|
232
|
-
-
|
|
233
|
-
-
|
|
234
|
-
-
|
|
277
|
+
The selector must identify *which* element by something **other than the state you are asserting**. Selecting the element *by* its state and then asserting that state is a tautology that always passes and verifies nothing.
|
|
278
|
+
|
|
279
|
+
- ✗ \`element_disabled | button[disabled] |\` — picks an already-disabled button, then "confirms" it is disabled. Passes even if the button the spec cares about is missing or enabled.
|
|
280
|
+
- ✗ \`element_enabled | button:enabled |\`, \`[aria-disabled='true']\`, \`input:disabled\` — same trap.
|
|
281
|
+
- ✓ Name the element by a stable, state-independent selector and assert the state on it: e.g. the "Submit" button is \`find role button --name "Submit"\`; to assert it is disabled, give \`element_disabled\` a selector that targets *that* button (a stable \`id\` / \`data-testid\` / unique class), **not** \`[disabled]\`.
|
|
282
|
+
- If you cannot target the specific element without a state pseudo-class/attribute, **do not emit the enabled/disabled assert** — assert a user-visible consequence instead (e.g. the action it gates does not happen, a "you don't have permission" message is shown), or rely on \`text_visible\` for the label plus \`text_not_visible\` for what an enabled control would have produced.
|
|
283
|
+
|
|
284
|
+
**Page-context and selector rules:**
|
|
285
|
+
|
|
286
|
+
- After a navigation, take a **fresh snapshot** before emitting any assertion. Don't assert on text from the previous page.
|
|
287
|
+
- Assertion selectors follow the same Selector Rules as actions — never invent aria-label values; use the exact strings from the current snapshot.
|
|
288
|
+
- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) — but pre-verify with \`wait --text\` per the MUST-VERIFY rule below.
|
|
235
289
|
|
|
236
290
|
**MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
|
|
237
291
|
|
|
238
|
-
The \`snapshot\` output is the **accessibility tree
|
|
292
|
+
The \`snapshot\` output is the **accessibility tree**, but \`agent-browser\` queries the **real DOM**. They don't always agree. Two known traps:
|
|
239
293
|
|
|
240
|
-
1. *Selector trap*: a snapshot row like \`textbox "Email address"\`
|
|
241
|
-
2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes
|
|
294
|
+
1. *Selector trap*: a snapshot row like \`textbox "Email address"\` may be reachable via \`[placeholder='...']\` but **not** via \`[aria-label='...']\` if no aria-label attribute is actually set (the browser inferred the label from \`<label for=>\` / placeholder).
|
|
295
|
+
2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes) will NOT find it.
|
|
296
|
+
3. *Input-value trap*: after you \`fill\` an \`<input>\` / \`<textarea>\` / \`[contenteditable]\`, the text you typed lives in the element's **value**, not as a visible text node. **Do NOT assert the typed value with \`text_visible\`** — it will never match. The spec's "the field reflects X" expectation is implicitly confirmed when the form submits successfully and the value shows up on the *result* page (a list row, a detail page). Assert there, not on the input itself.
|
|
242
297
|
|
|
243
|
-
Before emitting
|
|
298
|
+
Before emitting \`AB_ACTION|assert|...\`, **verify the assertion form actually resolves on the live page**:
|
|
244
299
|
|
|
245
300
|
\`\`\`bash
|
|
246
301
|
# element_visible / element_enabled / element_disabled / element_checked / element_unchecked
|
|
247
|
-
|
|
248
|
-
|
|
302
|
+
# Use get count (fast, returns a number). Do NOT use \`wait "<selector>"\` — it blocks the daemon.
|
|
303
|
+
agent-browser --session SESSION get count "<selector>" # >=1 means present
|
|
249
304
|
# element_not_visible
|
|
250
|
-
agent-browser --session SESSION
|
|
251
|
-
|
|
305
|
+
agent-browser --session SESSION get count "<selector>" # 0 means absent
|
|
252
306
|
# text_visible
|
|
253
307
|
agent-browser --session SESSION wait --text "<text>" --timeout 3000
|
|
254
|
-
|
|
255
308
|
# text_not_visible
|
|
256
|
-
agent-browser --session SESSION wait --
|
|
309
|
+
agent-browser --session SESSION wait --fn "!document.body.innerText.includes('<text>')" --timeout 3000
|
|
257
310
|
\`\`\`
|
|
258
311
|
|
|
259
|
-
|
|
312
|
+
When *no* form verifies — e.g. \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` all timed out, or the visible text turned out to be an \`alt\` — **drop the assertion entirely**. Fewer real assertions beat invented ones that fail at replay. \`url_contains\` is exempt (it checks the URL string, not the DOM).
|
|
260
313
|
|
|
261
|
-
|
|
314
|
+
**Field positions — get these RIGHT.** The line is
|
|
315
|
+
\`AB_ACTION|assert|<assertType>|<selector>|<value>|<observation>\`. The value
|
|
316
|
+
(the asserted text for \`text_visible\`/\`text_not_visible\`/\`url_contains\`) goes
|
|
317
|
+
in the **value** slot, NOT the observation slot. A common mistake is writing
|
|
318
|
+
\`text_visible|||Done|...\` (three pipes → empty selector AND empty value, "Done"
|
|
319
|
+
lands in observation): that records an assert with no value and it fails at
|
|
320
|
+
replay. Use exactly two pipes after the assertType for text asserts.
|
|
262
321
|
|
|
263
|
-
**Examples:**
|
|
264
322
|
\`\`\`
|
|
265
|
-
AB_ACTION|assert|url_contains
|
|
266
|
-
AB_ACTION|assert|element_disabled|.btn-submit||Submit
|
|
267
|
-
AB_ACTION|assert|element_enabled|.btn-submit||Submit
|
|
268
|
-
AB_ACTION|assert|text_visible
|
|
269
|
-
AB_ACTION|assert|text_visible
|
|
270
|
-
AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
|
|
323
|
+
AB_ACTION|assert|url_contains||/dashboard|Navigated to dashboard
|
|
324
|
+
AB_ACTION|assert|element_disabled|.btn-submit||Submit disabled before form is valid
|
|
325
|
+
AB_ACTION|assert|element_enabled|.btn-submit||Submit enabled after form is filled
|
|
326
|
+
AB_ACTION|assert|text_visible||Loading|Operation started
|
|
327
|
+
AB_ACTION|assert|text_visible||Done|Operation completed
|
|
271
328
|
\`\`\`
|
|
272
329
|
|
|
273
330
|
## Status Protocol
|
|
@@ -275,7 +332,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
|
|
|
275
332
|
Emit exactly one status line per step (outside any code block):
|
|
276
333
|
|
|
277
334
|
\`\`\`
|
|
278
|
-
STEP_START|<step-id>|<short description
|
|
335
|
+
STEP_START|<step-id>|<short description>
|
|
279
336
|
STEP_DONE|<step-id>|<what was verified>
|
|
280
337
|
ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
|
|
281
338
|
STEP_SKIPPED|<step-id>|<reason>
|
|
@@ -391,6 +448,35 @@ function run(message) {
|
|
|
391
448
|
write("run", message);
|
|
392
449
|
}
|
|
393
450
|
/**
|
|
451
|
+
* Render a single-line progress indicator for a step-by-step loop.
|
|
452
|
+
*
|
|
453
|
+
* On a TTY the line is rewritten in place via `\r` so the terminal stays
|
|
454
|
+
* uncluttered. In a non-TTY environment (CI, piped runs) we fall back to
|
|
455
|
+
* a regular `[info]` line every PROGRESS_NONTTY_STRIDE steps to avoid
|
|
456
|
+
* spamming the log with one line per action.
|
|
457
|
+
*
|
|
458
|
+
* Callers MUST call `progressEnd()` when the loop finishes (or aborts) so
|
|
459
|
+
* the carriage-return line gets a final newline; otherwise the next log
|
|
460
|
+
* line lands on the same physical row.
|
|
461
|
+
*/
|
|
462
|
+
const PROGRESS_NONTTY_STRIDE = 5;
|
|
463
|
+
let lastProgressNonTtyEmit = -1;
|
|
464
|
+
function progress(current, total, label) {
|
|
465
|
+
const text = `[info] ${current + 1}/${total} ${label}`;
|
|
466
|
+
if (process.stdout.isTTY) {
|
|
467
|
+
process.stdout.write(`\r${text}\x1b[K`);
|
|
468
|
+
return;
|
|
469
|
+
}
|
|
470
|
+
if (current === 0 || current - lastProgressNonTtyEmit >= PROGRESS_NONTTY_STRIDE) {
|
|
471
|
+
process.stdout.write(`${text}\n`);
|
|
472
|
+
lastProgressNonTtyEmit = current;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
function progressEnd() {
|
|
476
|
+
if (process.stdout.isTTY) process.stdout.write(`\r\x1b[K`);
|
|
477
|
+
lastProgressNonTtyEmit = -1;
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
394
480
|
* Time a long-running step under the given scope, emitting `started` and
|
|
395
481
|
* `finished in N.Ns` markers. Scope must be a tag the user wants to grep
|
|
396
482
|
* for — typically "run" for vitest and "fix" for diagnose-loop steps.
|
|
@@ -408,6 +494,163 @@ async function timedPhase(label, fn, scope = "fix") {
|
|
|
408
494
|
}
|
|
409
495
|
}
|
|
410
496
|
//#endregion
|
|
497
|
+
//#region src/spec/yaml-schema.ts
|
|
498
|
+
/**
|
|
499
|
+
* An action step: one user-facing browser interaction. `instruction` and
|
|
500
|
+
* `expected` are the natural-language description handed to Claude during
|
|
501
|
+
* `ccqa trace`. URLs live inside `instruction`, either verbatim or via
|
|
502
|
+
* `${ENV_VAR}` references (resolved at runtime).
|
|
503
|
+
*/
|
|
504
|
+
const ActionStepSchema = z.object({
|
|
505
|
+
instruction: z.string().min(1),
|
|
506
|
+
expected: z.string().min(1)
|
|
507
|
+
}).strict();
|
|
508
|
+
/**
|
|
509
|
+
* An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
|
|
510
|
+
* `params` values are plain strings; env refs (`${VAR}`) inside them are
|
|
511
|
+
* resolved at expand time the same way step instructions are.
|
|
512
|
+
*/
|
|
513
|
+
const IncludeStepSchema = z.object({
|
|
514
|
+
include: z.string().min(1),
|
|
515
|
+
params: z.record(z.string(), z.string()).optional()
|
|
516
|
+
}).strict();
|
|
517
|
+
/**
|
|
518
|
+
* A spec step is either an action step or an include step. The two are
|
|
519
|
+
* discriminated by the presence of the `include` key — see `isIncludeStep`.
|
|
520
|
+
*/
|
|
521
|
+
const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
|
|
522
|
+
/** Top-level spec schema. `.strict()` rejects any unknown key. */
|
|
523
|
+
const TestSpecSchema = z.object({
|
|
524
|
+
title: z.string().min(1),
|
|
525
|
+
relatedPaths: z.array(z.string().min(1)).optional(),
|
|
526
|
+
steps: z.array(StepSchema).min(1)
|
|
527
|
+
}).strict();
|
|
528
|
+
/**
|
|
529
|
+
* A block param declaration. `required` defaults to true; only explicit
|
|
530
|
+
* `required: false` makes it optional. `secret: true` flags the value as
|
|
531
|
+
* sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
|
|
532
|
+
* template literals so the secret never ends up baked into test.spec.ts.
|
|
533
|
+
* `dummy` is a placeholder value surfaced by the draft / drift prompts
|
|
534
|
+
* (which see the block in isolation, before any include site exists);
|
|
535
|
+
* `description` is the param's semantic role, also consumed by those
|
|
536
|
+
* prompts and by spec authors browsing the block.
|
|
537
|
+
*/
|
|
538
|
+
const BlockParamSchema = z.object({
|
|
539
|
+
name: z.string().min(1),
|
|
540
|
+
required: z.boolean().optional(),
|
|
541
|
+
secret: z.boolean().optional(),
|
|
542
|
+
dummy: z.string().optional(),
|
|
543
|
+
description: z.string().optional()
|
|
544
|
+
}).strict();
|
|
545
|
+
/**
|
|
546
|
+
* Block schema. Block steps are restricted to ActionStep — nested blocks are
|
|
547
|
+
* forbidden. Including a block from inside another block fails parsing here
|
|
548
|
+
* (the store layer maps the cryptic "Unrecognized key: 'include'" error into
|
|
549
|
+
* a targeted nested-block message).
|
|
550
|
+
*/
|
|
551
|
+
const BlockSpecSchema = z.object({
|
|
552
|
+
title: z.string().min(1),
|
|
553
|
+
params: z.array(BlockParamSchema).optional(),
|
|
554
|
+
steps: z.array(ActionStepSchema).min(1)
|
|
555
|
+
}).strict();
|
|
556
|
+
/** Runtime predicate for the StepSchema union. */
|
|
557
|
+
function isIncludeStep(step) {
|
|
558
|
+
return "include" in step;
|
|
559
|
+
}
|
|
560
|
+
/** Returns true if a block param is required (default: true). */
|
|
561
|
+
function isParamRequired(param) {
|
|
562
|
+
return param.required !== false;
|
|
563
|
+
}
|
|
564
|
+
//#endregion
|
|
565
|
+
//#region src/types.ts
|
|
566
|
+
const RouteStepSchema = z.object({
|
|
567
|
+
title: z.string(),
|
|
568
|
+
action: z.string(),
|
|
569
|
+
observation: z.string(),
|
|
570
|
+
status: z.enum([
|
|
571
|
+
"PASSED",
|
|
572
|
+
"FAILED",
|
|
573
|
+
"SKIPPED"
|
|
574
|
+
]),
|
|
575
|
+
reason: z.string().optional()
|
|
576
|
+
});
|
|
577
|
+
z.object({
|
|
578
|
+
specName: z.string(),
|
|
579
|
+
timestamp: z.string(),
|
|
580
|
+
status: z.enum(["passed", "failed"]),
|
|
581
|
+
steps: z.array(RouteStepSchema)
|
|
582
|
+
});
|
|
583
|
+
/**
|
|
584
|
+
* Semantic locator strategies exposed by `agent-browser find`. Used by the
|
|
585
|
+
* `find_*` commands when a target cannot be uniquely picked out by the
|
|
586
|
+
* ALLOWED CSS forms (e.g. repeated `aria-label='1 reply'` rows where only
|
|
587
|
+
* "the last one" is meaningful).
|
|
588
|
+
*
|
|
589
|
+
* `first` / `last` / `nth` are positional helpers and their `findValue`
|
|
590
|
+
* carries an inner CSS selector; `nth` additionally needs `findIndex`. The
|
|
591
|
+
* remaining locators read `findValue` as the human-visible text/id.
|
|
592
|
+
* `role` may pair with `findName` to filter by accessible name.
|
|
593
|
+
*/
|
|
594
|
+
const FIND_LOCATORS = [
|
|
595
|
+
"role",
|
|
596
|
+
"text",
|
|
597
|
+
"label",
|
|
598
|
+
"placeholder",
|
|
599
|
+
"alt",
|
|
600
|
+
"title",
|
|
601
|
+
"testid",
|
|
602
|
+
"first",
|
|
603
|
+
"last",
|
|
604
|
+
"nth"
|
|
605
|
+
];
|
|
606
|
+
/**
|
|
607
|
+
* Actions reachable via `agent-browser find <locator> ... <action>`. Kept
|
|
608
|
+
* here next to the locator list so all `find_*` knowledge lives in one
|
|
609
|
+
* place — `cli/trace.ts`, `claude/invoke.ts`, and `runtime/replay-validate.ts`
|
|
610
|
+
* import these instead of redefining their own sets.
|
|
611
|
+
*/
|
|
612
|
+
const FIND_ACTIONS = [
|
|
613
|
+
"click",
|
|
614
|
+
"dblclick",
|
|
615
|
+
"fill",
|
|
616
|
+
"type",
|
|
617
|
+
"hover",
|
|
618
|
+
"focus",
|
|
619
|
+
"check",
|
|
620
|
+
"uncheck"
|
|
621
|
+
];
|
|
622
|
+
const DraftIssueSchema = z.object({
|
|
623
|
+
severity: z.enum([
|
|
624
|
+
"OK",
|
|
625
|
+
"WARN",
|
|
626
|
+
"ERROR"
|
|
627
|
+
]),
|
|
628
|
+
category: z.enum([
|
|
629
|
+
"assertable",
|
|
630
|
+
"blocks",
|
|
631
|
+
"granularity",
|
|
632
|
+
"unimplemented"
|
|
633
|
+
]),
|
|
634
|
+
stepId: z.string().nullable(),
|
|
635
|
+
message: z.string(),
|
|
636
|
+
detail: z.string().optional()
|
|
637
|
+
});
|
|
638
|
+
const DraftReportSchema = z.object({
|
|
639
|
+
issues: z.array(DraftIssueSchema),
|
|
640
|
+
patch: z.string()
|
|
641
|
+
});
|
|
642
|
+
const DRAFT_CATEGORY_LABEL = {
|
|
643
|
+
assertable: "Assertability",
|
|
644
|
+
blocks: "Block references",
|
|
645
|
+
granularity: "Step granularity",
|
|
646
|
+
unimplemented: "Unimplemented checks"
|
|
647
|
+
};
|
|
648
|
+
const DraftNamingSchema = z.object({
|
|
649
|
+
featureName: z.string().min(1),
|
|
650
|
+
specName: z.string().min(1),
|
|
651
|
+
reason: z.string().optional()
|
|
652
|
+
});
|
|
653
|
+
//#endregion
|
|
411
654
|
//#region src/claude/invoke.ts
|
|
412
655
|
function resolveModel(explicit) {
|
|
413
656
|
if (explicit) return explicit;
|
|
@@ -444,12 +687,25 @@ async function invokeClaudeStreaming(options, onEvent) {
|
|
|
444
687
|
if (typeof cmd !== "string") return {};
|
|
445
688
|
if (isBlockedAbSubcommand(cmd)) return {
|
|
446
689
|
decision: "block",
|
|
447
|
-
reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait. Take a fresh snapshot to find the correct selector."
|
|
690
|
+
reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait, find (with role/text/label/placeholder/alt/title/testid/first/last/nth). Take a fresh snapshot to find the correct selector."
|
|
448
691
|
};
|
|
449
692
|
if (hasRefSelector(cmd)) return {
|
|
450
693
|
decision: "block",
|
|
451
694
|
reason: "@ref selectors (like @e14) are session-specific and change every run. They cannot be used in generated tests. Use one of the allowed selector formats instead: [aria-label='...'], text=..., [placeholder='...'], or [type='password']. Take a fresh snapshot and find the element's aria-label or visible text."
|
|
452
695
|
};
|
|
696
|
+
const bareTag = findPositionalBareTag(cmd);
|
|
697
|
+
if (bareTag !== null) return {
|
|
698
|
+
decision: "block",
|
|
699
|
+
reason: `\`find ${bareTag.locator}\` with a bare tag selector (\`${bareTag.selector}\`) is rejected: it matches every <${bareTag.selector}> on the page and is non-deterministic on replay. Pass a specific attribute selector instead, e.g. \`find ${bareTag.locator} "[aria-label='...']" ${bareTag.action}\` or \`find ${bareTag.locator} "[data-qa='...']" ${bareTag.action}\`. Take a fresh snapshot to find the right attribute.`
|
|
700
|
+
};
|
|
701
|
+
if (hasMultipleAbInvocations(cmd)) return {
|
|
702
|
+
decision: "block",
|
|
703
|
+
reason: "Run each `agent-browser` call as its own Bash command. Chaining multiple invocations with &&, ;, |, or || prevents ccqa from recording them as discrete steps and lets failed attempts leak into the trace. Issue one Bash tool call per agent-browser command."
|
|
704
|
+
};
|
|
705
|
+
if (hasErrorSuppression(cmd)) return {
|
|
706
|
+
decision: "block",
|
|
707
|
+
reason: "Do not suppress errors on `agent-browser` commands. Remove `|| true`, `|| :`, `2>/dev/null`, `; true`, and similar redirects so ccqa can detect failures and roll back unsuccessful attempts. Run the command standalone and let it surface its exit code."
|
|
708
|
+
};
|
|
453
709
|
const ab = extractAbActionFromBashCommand(cmd);
|
|
454
710
|
if (ab && onAbAction) {
|
|
455
711
|
lastAbToolUseId = input.tool_use_id;
|
|
@@ -496,7 +752,6 @@ async function invokeClaudeStreaming(options, onEvent) {
|
|
|
496
752
|
const BLOCKED_AB_SUBCOMMANDS = new Set([
|
|
497
753
|
"eval",
|
|
498
754
|
"js",
|
|
499
|
-
"find",
|
|
500
755
|
"label",
|
|
501
756
|
"textbox"
|
|
502
757
|
]);
|
|
@@ -557,6 +812,33 @@ function isBashToolResponseError(tool_response) {
|
|
|
557
812
|
if (r["killed"] === true) return true;
|
|
558
813
|
return false;
|
|
559
814
|
}
|
|
815
|
+
/**
|
|
816
|
+
* Detect `agent-browser ... find first|last|nth <bare-tag> <action>`. A bare
|
|
817
|
+
* tag inside a *positional* finder matches every element of that tag on the
|
|
818
|
+
* page, so "the last button" picks a different element whenever the page
|
|
819
|
+
* shape shifts — recorded tests built on top are flaky by construction. The
|
|
820
|
+
* check is narrow on purpose: `find role button --name X` is fine because
|
|
821
|
+
* role + accessible name stays stable.
|
|
822
|
+
*/
|
|
823
|
+
function findPositionalBareTag(cmd) {
|
|
824
|
+
if (extractAbSubcommand(cmd) !== "find") return null;
|
|
825
|
+
const abIdx = cmd.indexOf("agent-browser");
|
|
826
|
+
const parts = shellTokenize(cmd.slice(abIdx + 13).trim());
|
|
827
|
+
let i = 0;
|
|
828
|
+
while (i < parts.length && parts[i].startsWith("-")) i += 2;
|
|
829
|
+
const locator = parts[i + 1];
|
|
830
|
+
if (locator !== "first" && locator !== "last" && locator !== "nth") return null;
|
|
831
|
+
const innerIdx = locator === "nth" ? i + 3 : i + 2;
|
|
832
|
+
const inner = parts[innerIdx];
|
|
833
|
+
const action = parts[innerIdx + 1] ?? "";
|
|
834
|
+
if (!inner) return null;
|
|
835
|
+
if (!/^[a-zA-Z][a-zA-Z0-9]*$/.test(inner)) return null;
|
|
836
|
+
return {
|
|
837
|
+
locator,
|
|
838
|
+
selector: inner,
|
|
839
|
+
action
|
|
840
|
+
};
|
|
841
|
+
}
|
|
560
842
|
/** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
|
|
561
843
|
function hasRefSelector(cmd) {
|
|
562
844
|
const abIdx = cmd.indexOf("agent-browser");
|
|
@@ -569,6 +851,69 @@ function hasRefSelector(cmd) {
|
|
|
569
851
|
return false;
|
|
570
852
|
}
|
|
571
853
|
/**
|
|
854
|
+
* Returns true when `cmd` contains more than one `agent-browser` invocation
|
|
855
|
+
* chained together via shell operators (`&&`, `||`, `;`, `|`). The
|
|
856
|
+
* PreToolUse hook only records ONE AB_ACTION per Bash call, so chained
|
|
857
|
+
* invocations would silently drop every intermediate failure — turning
|
|
858
|
+
* "I tried four selectors before one worked" into a clean-looking trace
|
|
859
|
+
* with five orphaned actions that later fail at replay.
|
|
860
|
+
*
|
|
861
|
+
* The check tokenizes the command and counts `agent-browser` occurrences
|
|
862
|
+
* that appear at the start of a shell command (i.e. immediately after a
|
|
863
|
+
* statement separator or at index 0). String literals are honoured so
|
|
864
|
+
* `agent-browser fill 'agent-browser'` doesn't false-fire.
|
|
865
|
+
*/
|
|
866
|
+
function hasMultipleAbInvocations(cmd) {
|
|
867
|
+
const boundaries = [0];
|
|
868
|
+
let quote = null;
|
|
869
|
+
for (let i = 0; i < cmd.length; i++) {
|
|
870
|
+
const ch = cmd[i];
|
|
871
|
+
if (quote) {
|
|
872
|
+
if (ch === quote) quote = null;
|
|
873
|
+
continue;
|
|
874
|
+
}
|
|
875
|
+
if (ch === "\"" || ch === "'" || ch === "`") {
|
|
876
|
+
quote = ch;
|
|
877
|
+
continue;
|
|
878
|
+
}
|
|
879
|
+
if (ch === ";" || ch === "|" || ch === "&") {
|
|
880
|
+
while (i + 1 < cmd.length && (cmd[i + 1] === "|" || cmd[i + 1] === "&" || cmd[i + 1] === ";")) i++;
|
|
881
|
+
boundaries.push(i + 1);
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
let count = 0;
|
|
885
|
+
for (const start of boundaries) {
|
|
886
|
+
let j = start;
|
|
887
|
+
while (j < cmd.length && (cmd[j] === " " || cmd[j] === " " || cmd[j] === "\n")) j++;
|
|
888
|
+
if (cmd.slice(j, j + 13) !== "agent-browser") continue;
|
|
889
|
+
const after = cmd[j + 13];
|
|
890
|
+
if (after !== void 0 && /[A-Za-z0-9_\-]/.test(after)) continue;
|
|
891
|
+
count++;
|
|
892
|
+
if (count > 1) return true;
|
|
893
|
+
}
|
|
894
|
+
return false;
|
|
895
|
+
}
|
|
896
|
+
/**
|
|
897
|
+
* Returns true when an `agent-browser` command in `cmd` has its exit
|
|
898
|
+
* status hidden by a shell decorator that would prevent ccqa from rolling
|
|
899
|
+
* back a failed attempt:
|
|
900
|
+
*
|
|
901
|
+
* - trailing `|| true` / `|| :` / `; true` (force exit 0)
|
|
902
|
+
* - `2>/dev/null` and friends (drop stderr, sometimes paired with `|| true`)
|
|
903
|
+
*
|
|
904
|
+
* The agent-browser command itself returns exit 1 on selector miss, so
|
|
905
|
+
* once one of these is present the PostToolUse hook sees `is_error=false`
|
|
906
|
+
* and the bad attempt sneaks into actions.json.
|
|
907
|
+
*/
|
|
908
|
+
function hasErrorSuppression(cmd) {
|
|
909
|
+
if (cmd.indexOf("agent-browser") === -1) return false;
|
|
910
|
+
if (/\|\|\s*(true|:|\s*$|#)/.test(cmd)) return true;
|
|
911
|
+
if (/;\s*(true|:)\b/.test(cmd)) return true;
|
|
912
|
+
if (/2\s*>\s*\/dev\/null/.test(cmd)) return true;
|
|
913
|
+
if (/&\s*>\s*\/dev\/null/.test(cmd)) return true;
|
|
914
|
+
return false;
|
|
915
|
+
}
|
|
916
|
+
/**
|
|
572
917
|
* Parse an `agent-browser --session <name> <cmd> [args...]` bash command
|
|
573
918
|
* and return the corresponding AB_ACTION line, or null if not an agent-browser call.
|
|
574
919
|
*/
|
|
@@ -598,9 +943,56 @@ function extractAbActionFromBashCommand(cmd) {
|
|
|
598
943
|
case "select": return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
|
|
599
944
|
case "drag": return `AB_ACTION|drag|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
|
|
600
945
|
case "snapshot": return null;
|
|
946
|
+
case "find": return extractFindAbAction(args);
|
|
601
947
|
default: return null;
|
|
602
948
|
}
|
|
603
949
|
}
|
|
950
|
+
const FIND_ACTION_SET = new Set(FIND_ACTIONS);
|
|
951
|
+
const FIND_LOCATOR_SET = new Set(FIND_LOCATORS);
|
|
952
|
+
/**
|
|
953
|
+
* Parse the positional tokens of `agent-browser find <locator> <value> [...]
|
|
954
|
+
* <action> [fillValue]` and produce a canonical
|
|
955
|
+
* `AB_ACTION|find_<action>|<locator>|<value>|<extra>|<exact>|...|<label>`
|
|
956
|
+
* line. The wire format keeps a fixed positional layout across locators so
|
|
957
|
+
* downstream `parseAbAction` in `cli/trace.ts` can split on `|` alone:
|
|
958
|
+
*
|
|
959
|
+
* <extra> is `--name` value for role, integer index for nth, "" otherwise.
|
|
960
|
+
* <exact> is the literal "exact" if --exact was passed, "" otherwise.
|
|
961
|
+
*
|
|
962
|
+
* Returns null for malformed invocations — the caller treats null as "not a
|
|
963
|
+
* structured action" and the Bash command still runs unobserved.
|
|
964
|
+
*/
|
|
965
|
+
function extractFindAbAction(args) {
|
|
966
|
+
const locator = args[0];
|
|
967
|
+
if (!locator || !FIND_LOCATOR_SET.has(locator)) return null;
|
|
968
|
+
let i = 1;
|
|
969
|
+
let value = args[i] ?? "";
|
|
970
|
+
i++;
|
|
971
|
+
let extra = "";
|
|
972
|
+
if (locator === "nth") {
|
|
973
|
+
extra = value;
|
|
974
|
+
value = args[i] ?? "";
|
|
975
|
+
i++;
|
|
976
|
+
}
|
|
977
|
+
let action = "";
|
|
978
|
+
let name = "";
|
|
979
|
+
let exact = "";
|
|
980
|
+
let fillValue = "";
|
|
981
|
+
for (; i < args.length; i++) {
|
|
982
|
+
const tok = args[i];
|
|
983
|
+
if (tok === "--name") {
|
|
984
|
+
name = args[i + 1] ?? "";
|
|
985
|
+
i++;
|
|
986
|
+
} else if (tok === "--exact") exact = "exact";
|
|
987
|
+
else if (FIND_ACTION_SET.has(tok)) action = tok;
|
|
988
|
+
else if (action) fillValue = tok;
|
|
989
|
+
}
|
|
990
|
+
if (!action) return null;
|
|
991
|
+
if (locator === "role") extra = name;
|
|
992
|
+
const command = `find_${action}`;
|
|
993
|
+
if (action === "fill" || action === "type") return `AB_ACTION|${command}|${locator}|${value}|${extra}|${exact}|${fillValue}|`;
|
|
994
|
+
return `AB_ACTION|${command}|${locator}|${value}|${extra}|${exact}|`;
|
|
995
|
+
}
|
|
604
996
|
async function buildMessageStream(prompt, options) {
|
|
605
997
|
const mockFile = process.env["CCQA_CLAUDE_MOCK_FILE"];
|
|
606
998
|
if (mockFile) return replayMockMessages(mockFile);
|
|
@@ -634,6 +1026,20 @@ function substituteVars(value, lookup) {
|
|
|
634
1026
|
});
|
|
635
1027
|
}
|
|
636
1028
|
/**
|
|
1029
|
+
* Iterate every `${NAME}` / `$NAME` reference name (case-insensitive form)
|
|
1030
|
+
* appearing in `value`. Used by callers that want to enumerate refs without
|
|
1031
|
+
* also substituting, e.g. the env-scrub map builder. The reference name
|
|
1032
|
+
* grammar is the canonical one shared with `substituteVars`.
|
|
1033
|
+
*/
|
|
1034
|
+
function* iterEnvRefNames(value) {
|
|
1035
|
+
ANY_VAR_RE.lastIndex = 0;
|
|
1036
|
+
let m;
|
|
1037
|
+
while ((m = ANY_VAR_RE.exec(value)) !== null) {
|
|
1038
|
+
const name = m[1] ?? m[2];
|
|
1039
|
+
if (name) yield name;
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
/**
|
|
637
1043
|
* Resolve every `$VAR` / `${VAR}` reference against the current process env.
|
|
638
1044
|
*
|
|
639
1045
|
* Missing variables expand to the empty string, mirroring `sh` behaviour.
|
|
@@ -692,74 +1098,6 @@ function refsToJsExpression(value, nameToExpr) {
|
|
|
692
1098
|
})}\``;
|
|
693
1099
|
}
|
|
694
1100
|
//#endregion
|
|
695
|
-
//#region src/spec/yaml-schema.ts
|
|
696
|
-
/**
|
|
697
|
-
* An action step: one user-facing browser interaction. `instruction` and
|
|
698
|
-
* `expected` are the natural-language description handed to Claude during
|
|
699
|
-
* `ccqa trace`. URLs live inside `instruction`, either verbatim or via
|
|
700
|
-
* `${ENV_VAR}` references (resolved at runtime).
|
|
701
|
-
*/
|
|
702
|
-
const ActionStepSchema = z.object({
|
|
703
|
-
instruction: z.string().min(1),
|
|
704
|
-
expected: z.string().min(1)
|
|
705
|
-
}).strict();
|
|
706
|
-
/**
|
|
707
|
-
* An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
|
|
708
|
-
* `params` values are plain strings; env refs (`${VAR}`) inside them are
|
|
709
|
-
* resolved at expand time the same way step instructions are.
|
|
710
|
-
*/
|
|
711
|
-
const IncludeStepSchema = z.object({
|
|
712
|
-
include: z.string().min(1),
|
|
713
|
-
params: z.record(z.string(), z.string()).optional()
|
|
714
|
-
}).strict();
|
|
715
|
-
/**
|
|
716
|
-
* A spec step is either an action step or an include step. The two are
|
|
717
|
-
* discriminated by the presence of the `include` key — see `isIncludeStep`.
|
|
718
|
-
*/
|
|
719
|
-
const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
|
|
720
|
-
/** Top-level spec schema. `.strict()` rejects any unknown key. */
|
|
721
|
-
const TestSpecSchema = z.object({
|
|
722
|
-
title: z.string().min(1),
|
|
723
|
-
relatedPaths: z.array(z.string().min(1)).optional(),
|
|
724
|
-
steps: z.array(StepSchema).min(1)
|
|
725
|
-
}).strict();
|
|
726
|
-
/**
|
|
727
|
-
* A block param declaration. `required` defaults to true; only explicit
|
|
728
|
-
* `required: false` makes it optional. `secret: true` flags the value as
|
|
729
|
-
* sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
|
|
730
|
-
* template literals so the secret never ends up baked into test.spec.ts.
|
|
731
|
-
* `dummy` is a placeholder value surfaced by the draft / drift prompts
|
|
732
|
-
* (which see the block in isolation, before any include site exists);
|
|
733
|
-
* `description` is the param's semantic role, also consumed by those
|
|
734
|
-
* prompts and by spec authors browsing the block.
|
|
735
|
-
*/
|
|
736
|
-
const BlockParamSchema = z.object({
|
|
737
|
-
name: z.string().min(1),
|
|
738
|
-
required: z.boolean().optional(),
|
|
739
|
-
secret: z.boolean().optional(),
|
|
740
|
-
dummy: z.string().optional(),
|
|
741
|
-
description: z.string().optional()
|
|
742
|
-
}).strict();
|
|
743
|
-
/**
|
|
744
|
-
* Block schema. Block steps are restricted to ActionStep — nested blocks are
|
|
745
|
-
* forbidden. Including a block from inside another block fails parsing here
|
|
746
|
-
* (the store layer maps the cryptic "Unrecognized key: 'include'" error into
|
|
747
|
-
* a targeted nested-block message).
|
|
748
|
-
*/
|
|
749
|
-
const BlockSpecSchema = z.object({
|
|
750
|
-
title: z.string().min(1),
|
|
751
|
-
params: z.array(BlockParamSchema).optional(),
|
|
752
|
-
steps: z.array(ActionStepSchema).min(1)
|
|
753
|
-
}).strict();
|
|
754
|
-
/** Runtime predicate for the StepSchema union. */
|
|
755
|
-
function isIncludeStep(step) {
|
|
756
|
-
return "include" in step;
|
|
757
|
-
}
|
|
758
|
-
/** Returns true if a block param is required (default: true). */
|
|
759
|
-
function isParamRequired(param) {
|
|
760
|
-
return param.required !== false;
|
|
761
|
-
}
|
|
762
|
-
//#endregion
|
|
763
1101
|
//#region src/spec/parser.ts
|
|
764
1102
|
/** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
|
|
765
1103
|
function parseTestSpec(content, source = "spec.yaml") {
|
|
@@ -994,6 +1332,32 @@ async function loadAvailableBlocks(cwd) {
|
|
|
994
1332
|
}))
|
|
995
1333
|
}));
|
|
996
1334
|
}
|
|
1335
|
+
const TRACE_USER_PROMPT_PATH = ".ccqa/prompts/trace.user.md";
|
|
1336
|
+
const TRACE_USER_PROMPT_MAX_BYTES = 32768;
|
|
1337
|
+
/**
|
|
1338
|
+
* Load project-specific guidance to append to the trace system prompt.
|
|
1339
|
+
*
|
|
1340
|
+
* Returns the file's contents (trimmed) when `.ccqa/prompts/trace.user.md`
|
|
1341
|
+
* exists and is non-empty. Missing file, empty file, or read error all
|
|
1342
|
+
* resolve to `null` so callers can treat the override as strictly optional.
|
|
1343
|
+
*
|
|
1344
|
+
* The file is meant for organisation-specific rules that don't belong in
|
|
1345
|
+
* the OSS-default prompt — naming conventions, staging URL hints, repeated
|
|
1346
|
+
* UI quirks that recur across specs. Anything that genuinely belongs in
|
|
1347
|
+
* one spec should go in that spec's instruction, not here.
|
|
1348
|
+
*
|
|
1349
|
+
* Size-capped at 32 KiB to keep accidental commits of huge files from
|
|
1350
|
+
* blowing up the system prompt; the cap is observable to callers as a
|
|
1351
|
+
* truncated warning suffix.
|
|
1352
|
+
*/
|
|
1353
|
+
async function loadTraceUserPrompt(cwd) {
|
|
1354
|
+
const content = await readFile(join(cwd ?? process.cwd(), TRACE_USER_PROMPT_PATH), "utf-8").catch(() => null);
|
|
1355
|
+
if (content === null) return null;
|
|
1356
|
+
const trimmed = content.trim();
|
|
1357
|
+
if (trimmed.length === 0) return null;
|
|
1358
|
+
if (trimmed.length > TRACE_USER_PROMPT_MAX_BYTES) return trimmed.slice(0, TRACE_USER_PROMPT_MAX_BYTES) + `\n\n[ccqa] (trace.user.md truncated at ${TRACE_USER_PROMPT_MAX_BYTES} bytes)`;
|
|
1359
|
+
return trimmed;
|
|
1360
|
+
}
|
|
997
1361
|
/**
|
|
998
1362
|
* Probe for orphaned files left over from earlier ccqa versions inside
|
|
999
1363
|
* `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
|
|
@@ -1246,6 +1610,33 @@ function formatAgentBrowserUnavailableMessage() {
|
|
|
1246
1610
|
}
|
|
1247
1611
|
//#endregion
|
|
1248
1612
|
//#region src/runtime/replay-validate.ts
|
|
1613
|
+
function isPollCheck(x) {
|
|
1614
|
+
return x !== null && !Array.isArray(x) && x.kind === "poll-present";
|
|
1615
|
+
}
|
|
1616
|
+
const SELECTOR_POLL_INTERVAL_MS = 500;
|
|
1617
|
+
/** Poll `get count <selector>` until it matches (>=1) or the timeout elapses. */
|
|
1618
|
+
function runPollCheck(check, sessionName) {
|
|
1619
|
+
const deadline = Date.now() + check.timeoutMs;
|
|
1620
|
+
for (;;) {
|
|
1621
|
+
const r = spawnAB([
|
|
1622
|
+
"--session",
|
|
1623
|
+
sessionName,
|
|
1624
|
+
"get",
|
|
1625
|
+
"count",
|
|
1626
|
+
check.selector
|
|
1627
|
+
]);
|
|
1628
|
+
const count = r.status === 0 ? Number.parseInt(r.stdout.trim(), 10) : NaN;
|
|
1629
|
+
if (!Number.isNaN(count) && count > 0) return {
|
|
1630
|
+
ok: true,
|
|
1631
|
+
reason: ""
|
|
1632
|
+
};
|
|
1633
|
+
if (Date.now() >= deadline) return {
|
|
1634
|
+
ok: false,
|
|
1635
|
+
reason: `selector not present within ${check.timeoutMs}ms (get count returned ${Number.isNaN(count) ? "error" : count})`
|
|
1636
|
+
};
|
|
1637
|
+
sleepSync(SELECTOR_POLL_INTERVAL_MS);
|
|
1638
|
+
}
|
|
1639
|
+
}
|
|
1249
1640
|
const SHORT_TIMEOUT_MS = 5e3;
|
|
1250
1641
|
const ASSERT_TIMEOUT_MS = 1e4;
|
|
1251
1642
|
/**
|
|
@@ -1330,6 +1721,7 @@ function actionToAbArgs(action, sessionName) {
|
|
|
1330
1721
|
const raw = sub(action.selector);
|
|
1331
1722
|
if (!raw) return null;
|
|
1332
1723
|
if (/^\d+$/.test(raw)) return null;
|
|
1724
|
+
if (raw.startsWith("--")) return null;
|
|
1333
1725
|
if (raw.startsWith("text=")) return [
|
|
1334
1726
|
...base,
|
|
1335
1727
|
"wait",
|
|
@@ -1338,18 +1730,47 @@ function actionToAbArgs(action, sessionName) {
|
|
|
1338
1730
|
"--timeout",
|
|
1339
1731
|
String(SHORT_TIMEOUT_MS)
|
|
1340
1732
|
];
|
|
1341
|
-
return
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
String(SHORT_TIMEOUT_MS)
|
|
1347
|
-
];
|
|
1733
|
+
return {
|
|
1734
|
+
kind: "poll-present",
|
|
1735
|
+
selector: raw,
|
|
1736
|
+
timeoutMs: SHORT_TIMEOUT_MS
|
|
1737
|
+
};
|
|
1348
1738
|
}
|
|
1349
1739
|
case "snapshot": return null;
|
|
1350
1740
|
case "assert": return assertToAbArgs(action, sub, sessionName);
|
|
1741
|
+
case "find_click":
|
|
1742
|
+
case "find_dblclick":
|
|
1743
|
+
case "find_hover":
|
|
1744
|
+
case "find_focus":
|
|
1745
|
+
case "find_check":
|
|
1746
|
+
case "find_uncheck": return buildFindArgs$1(action, void 0, sub, base);
|
|
1747
|
+
case "find_fill":
|
|
1748
|
+
case "find_type": return buildFindArgs$1(action, sub(action.value), sub, base);
|
|
1351
1749
|
}
|
|
1352
1750
|
}
|
|
1751
|
+
/**
|
|
1752
|
+
* Build the agent-browser argv for a recorded `find_*` action. Mirrors the
|
|
1753
|
+
* codegen shape in `actions-to-script.ts:buildFindArgs` but emits a plain
|
|
1754
|
+
* string array. Env refs in `findValue` / `findName` resolve through `sub`
|
|
1755
|
+
* so the validator hits the same DOM the generated test will.
|
|
1756
|
+
*/
|
|
1757
|
+
function buildFindArgs$1(action, fillValue, sub, base) {
|
|
1758
|
+
const locator = action.findLocator;
|
|
1759
|
+
if (!locator || !action.findValue) return null;
|
|
1760
|
+
const innerAction = action.command.slice(5).replace("type", "fill");
|
|
1761
|
+
const out = [
|
|
1762
|
+
...base,
|
|
1763
|
+
"find",
|
|
1764
|
+
locator
|
|
1765
|
+
];
|
|
1766
|
+
if (locator === "nth") out.push(String(action.findIndex ?? 0));
|
|
1767
|
+
out.push(sub(action.findValue));
|
|
1768
|
+
out.push(innerAction);
|
|
1769
|
+
if (fillValue !== void 0) out.push(fillValue);
|
|
1770
|
+
if (locator === "role" && action.findName) out.push("--name", sub(action.findName));
|
|
1771
|
+
if (action.findExact) out.push("--exact");
|
|
1772
|
+
return out;
|
|
1773
|
+
}
|
|
1353
1774
|
function assertToAbArgs(action, sub, sessionName) {
|
|
1354
1775
|
const base = ["--session", sessionName];
|
|
1355
1776
|
const val = sub(action.value ?? action.observation);
|
|
@@ -1368,13 +1789,11 @@ function assertToAbArgs(action, sub, sessionName) {
|
|
|
1368
1789
|
case "text_not_visible": return null;
|
|
1369
1790
|
case "element_visible":
|
|
1370
1791
|
if (!sel) return null;
|
|
1371
|
-
return
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
String(ASSERT_TIMEOUT_MS)
|
|
1377
|
-
];
|
|
1792
|
+
return {
|
|
1793
|
+
kind: "poll-present",
|
|
1794
|
+
selector: sel,
|
|
1795
|
+
timeoutMs: ASSERT_TIMEOUT_MS
|
|
1796
|
+
};
|
|
1378
1797
|
case "element_not_visible": return null;
|
|
1379
1798
|
case "url_contains": return null;
|
|
1380
1799
|
case "element_enabled":
|
|
@@ -1382,23 +1801,59 @@ function assertToAbArgs(action, sub, sessionName) {
|
|
|
1382
1801
|
case "element_checked":
|
|
1383
1802
|
case "element_unchecked":
|
|
1384
1803
|
if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
|
|
1385
|
-
return
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
String(ASSERT_TIMEOUT_MS)
|
|
1391
|
-
];
|
|
1804
|
+
return {
|
|
1805
|
+
kind: "poll-present",
|
|
1806
|
+
selector: sel,
|
|
1807
|
+
timeoutMs: ASSERT_TIMEOUT_MS
|
|
1808
|
+
};
|
|
1392
1809
|
default: return null;
|
|
1393
1810
|
}
|
|
1394
1811
|
}
|
|
1812
|
+
const NO_STEP_ID = "__no_step__";
|
|
1813
|
+
/**
|
|
1814
|
+
* Replay one recorded action against the validation session. Element-presence
|
|
1815
|
+
* checks go through `runPollCheck` (which uses `get count`, never the blocking
|
|
1816
|
+
* `wait <selector>`); everything else spawns the agent-browser argv. A single
|
|
1817
|
+
* hard-timeout (SIGTERM) retry covers the daemon's occasional under-load drop.
|
|
1818
|
+
*/
|
|
1819
|
+
function runValidationAction(action, sessionName) {
|
|
1820
|
+
const built = actionToAbArgs(action, sessionName);
|
|
1821
|
+
if (built === null) return {
|
|
1822
|
+
skipped: true,
|
|
1823
|
+
ok: false,
|
|
1824
|
+
reason: ""
|
|
1825
|
+
};
|
|
1826
|
+
if (isPollCheck(built)) {
|
|
1827
|
+
const { ok, reason } = runPollCheck(built, sessionName);
|
|
1828
|
+
return {
|
|
1829
|
+
skipped: false,
|
|
1830
|
+
ok,
|
|
1831
|
+
reason
|
|
1832
|
+
};
|
|
1833
|
+
}
|
|
1834
|
+
let result = spawnAB(built);
|
|
1835
|
+
if (result.status !== 0 && looksLikeHardTimeout(result)) result = spawnAB(built);
|
|
1836
|
+
if (result.status === 0) return {
|
|
1837
|
+
skipped: false,
|
|
1838
|
+
ok: true,
|
|
1839
|
+
reason: ""
|
|
1840
|
+
};
|
|
1841
|
+
return {
|
|
1842
|
+
skipped: false,
|
|
1843
|
+
ok: false,
|
|
1844
|
+
reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
|
|
1845
|
+
};
|
|
1846
|
+
}
|
|
1395
1847
|
function validateActions(actions, opts) {
|
|
1396
1848
|
const kept = [];
|
|
1397
1849
|
const dropped = [];
|
|
1398
|
-
let
|
|
1850
|
+
let skipFromStepId = null;
|
|
1399
1851
|
for (let i = 0; i < actions.length; i++) {
|
|
1400
1852
|
const action = actions[i];
|
|
1401
|
-
|
|
1853
|
+
opts.onProgress?.(i, actions.length, action);
|
|
1854
|
+
const stepId = action.stepId ?? NO_STEP_ID;
|
|
1855
|
+
if (skipFromStepId !== null && skipFromStepId !== stepId) skipFromStepId = null;
|
|
1856
|
+
if (skipFromStepId !== null && isPassiveCommand(action.command)) {
|
|
1402
1857
|
dropped.push({
|
|
1403
1858
|
index: i,
|
|
1404
1859
|
action,
|
|
@@ -1406,28 +1861,111 @@ function validateActions(actions, opts) {
|
|
|
1406
1861
|
});
|
|
1407
1862
|
continue;
|
|
1408
1863
|
}
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
if (args === null) {
|
|
1864
|
+
const outcome = runValidationAction(action, opts.sessionName);
|
|
1865
|
+
if (outcome.skipped) {
|
|
1412
1866
|
kept.push(action);
|
|
1413
1867
|
continue;
|
|
1414
1868
|
}
|
|
1415
|
-
|
|
1416
|
-
if (result.status === 0) {
|
|
1869
|
+
if (outcome.ok) {
|
|
1417
1870
|
kept.push(action);
|
|
1871
|
+
if (skipFromStepId !== null && !isPassiveCommand(action.command)) skipFromStepId = null;
|
|
1418
1872
|
continue;
|
|
1419
1873
|
}
|
|
1420
1874
|
dropped.push({
|
|
1421
1875
|
index: i,
|
|
1422
1876
|
action,
|
|
1423
|
-
reason:
|
|
1877
|
+
reason: outcome.reason
|
|
1424
1878
|
});
|
|
1425
|
-
|
|
1879
|
+
if (!isPassiveCommand(action.command)) skipFromStepId = stepId;
|
|
1880
|
+
}
|
|
1881
|
+
return splitByMode(actions, rescueLostSteps(actions, kept, dropped, opts), opts.mode ?? "lenient");
|
|
1882
|
+
}
|
|
1883
|
+
/**
|
|
1884
|
+
* Translate the internal `{ kept, dropped }` result of the rescue pass
|
|
1885
|
+
* into the public-facing shape. In strict mode the caller sees the same
|
|
1886
|
+
* shape as before (kept/dropped); in lenient mode the still-failed
|
|
1887
|
+
* actions move to `unstable` with `replayUnstable: true` tagged on, so
|
|
1888
|
+
* codegen can warn about them while still emitting the line.
|
|
1889
|
+
*/
|
|
1890
|
+
function splitByMode(originalActions, result, mode) {
|
|
1891
|
+
if (mode === "strict") return {
|
|
1892
|
+
kept: result.kept,
|
|
1893
|
+
unstable: [],
|
|
1894
|
+
dropped: result.dropped,
|
|
1895
|
+
rescuedSteps: result.rescuedSteps
|
|
1896
|
+
};
|
|
1897
|
+
const droppedByIndex = new Map(result.dropped.map((d) => [d.index, d]));
|
|
1898
|
+
const keptSet = new Set(result.kept);
|
|
1899
|
+
const finalKept = [];
|
|
1900
|
+
const unstable = [];
|
|
1901
|
+
for (let i = 0; i < originalActions.length; i++) {
|
|
1902
|
+
const action = originalActions[i];
|
|
1903
|
+
if (keptSet.has(action)) {
|
|
1904
|
+
finalKept.push(action);
|
|
1905
|
+
continue;
|
|
1906
|
+
}
|
|
1907
|
+
const drop = droppedByIndex.get(i);
|
|
1908
|
+
if (drop) {
|
|
1909
|
+
action.replayUnstable = true;
|
|
1910
|
+
action.replayReason = drop.reason;
|
|
1911
|
+
unstable.push(action);
|
|
1912
|
+
}
|
|
1426
1913
|
}
|
|
1427
1914
|
return {
|
|
1915
|
+
kept: finalKept,
|
|
1916
|
+
unstable,
|
|
1917
|
+
dropped: [],
|
|
1918
|
+
rescuedSteps: result.rescuedSteps
|
|
1919
|
+
};
|
|
1920
|
+
}
|
|
1921
|
+
function rescueLostSteps(actions, kept, dropped, opts) {
|
|
1922
|
+
const stepsWithSurvivors = /* @__PURE__ */ new Set();
|
|
1923
|
+
for (const a of kept) if (a.stepId) stepsWithSurvivors.add(a.stepId);
|
|
1924
|
+
const lostStepDrops = /* @__PURE__ */ new Map();
|
|
1925
|
+
for (const d of dropped) {
|
|
1926
|
+
const id = d.action.stepId;
|
|
1927
|
+
if (!id || stepsWithSurvivors.has(id)) continue;
|
|
1928
|
+
const list = lostStepDrops.get(id) ?? [];
|
|
1929
|
+
list.push(d);
|
|
1930
|
+
lostStepDrops.set(id, list);
|
|
1931
|
+
}
|
|
1932
|
+
if (lostStepDrops.size === 0) return {
|
|
1428
1933
|
kept,
|
|
1429
1934
|
dropped
|
|
1430
1935
|
};
|
|
1936
|
+
const rescuedIndices = /* @__PURE__ */ new Set();
|
|
1937
|
+
const rescuedSteps = [];
|
|
1938
|
+
for (const [stepId, drops] of lostStepDrops.entries()) {
|
|
1939
|
+
let anyForThisStep = false;
|
|
1940
|
+
for (const d of drops) {
|
|
1941
|
+
const outcome = runValidationAction(d.action, opts.sessionName);
|
|
1942
|
+
if (outcome.skipped) continue;
|
|
1943
|
+
if (outcome.ok) {
|
|
1944
|
+
rescuedIndices.add(d.index);
|
|
1945
|
+
anyForThisStep = true;
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
if (anyForThisStep) rescuedSteps.push(stepId);
|
|
1949
|
+
}
|
|
1950
|
+
if (rescuedIndices.size === 0) return {
|
|
1951
|
+
kept,
|
|
1952
|
+
dropped
|
|
1953
|
+
};
|
|
1954
|
+
const keptSet = new Set(kept);
|
|
1955
|
+
const newKept = [];
|
|
1956
|
+
for (let i = 0; i < actions.length; i++) {
|
|
1957
|
+
const action = actions[i];
|
|
1958
|
+
if (rescuedIndices.has(i) || keptSet.has(action)) newKept.push(action);
|
|
1959
|
+
}
|
|
1960
|
+
return {
|
|
1961
|
+
kept: newKept,
|
|
1962
|
+
dropped: dropped.filter((d) => !rescuedIndices.has(d.index)),
|
|
1963
|
+
rescuedSteps
|
|
1964
|
+
};
|
|
1965
|
+
}
|
|
1966
|
+
/** Did this agent-browser invocation get SIGTERM'd by the ccqa hard-timeout watchdog? */
|
|
1967
|
+
function looksLikeHardTimeout(result) {
|
|
1968
|
+
return result.stderr.includes("agent-browser killed after hard timeout");
|
|
1431
1969
|
}
|
|
1432
1970
|
/**
|
|
1433
1971
|
* Passive (read-only) commands whose only effect is observation. When a
|
|
@@ -1438,12 +1976,228 @@ function isPassiveCommand(cmd) {
|
|
|
1438
1976
|
return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
|
|
1439
1977
|
}
|
|
1440
1978
|
//#endregion
|
|
1979
|
+
//#region src/runtime/env-scrub.ts
|
|
1980
|
+
/**
|
|
1981
|
+
* Build a list of `[envValue, "${VAR}"]` pairs for every `${VAR}` reference
|
|
1982
|
+
* mentioned in the spec OR in any of its expanded (block-inlined) steps.
|
|
1983
|
+
* Used at trace time to scrub recorded Claude-text outputs so a value the
|
|
1984
|
+
* spec author intentionally threaded through `process.env` is preserved as
|
|
1985
|
+
* `${VAR}` in `actions.json` rather than baked in as the concrete
|
|
1986
|
+
* trace-time value.
|
|
1987
|
+
*
|
|
1988
|
+
* Why we walk `spec.steps` AND `expanded`:
|
|
1989
|
+
* - `spec.steps` carries the spec's own `instruction` / `expected` + each
|
|
1990
|
+
* include's raw `params` (which may themselves be `${ENV}` refs).
|
|
1991
|
+
* - `expanded` carries the inlined block-internal steps, whose
|
|
1992
|
+
* `instruction` / `expected` may *also* contain `${ENV}` refs that
|
|
1993
|
+
* don't go through include params.
|
|
1994
|
+
*
|
|
1995
|
+
* Only refs whose env value is currently non-empty land in the map —
|
|
1996
|
+
* scrubbing against an empty string would corrupt unrelated empty strings
|
|
1997
|
+
* in the action stream. Names whose env is unset are returned via
|
|
1998
|
+
* `unresolved` so the caller can warn the user.
|
|
1999
|
+
*
|
|
2000
|
+
* Longer values sort first so a `${SHORT}` whose value is a substring of a
|
|
2001
|
+
* `${LONG}` value doesn't clobber the longer one.
|
|
2002
|
+
*
|
|
2003
|
+
* `title` and `relatedPaths` are deliberately NOT scanned — they never
|
|
2004
|
+
* reach the recorded action stream.
|
|
2005
|
+
*/
|
|
2006
|
+
function buildSpecEnvScrub(spec, expanded) {
|
|
2007
|
+
const refNames = /* @__PURE__ */ new Set();
|
|
2008
|
+
for (const step of spec.steps) if (isIncludeStep(step)) for (const v of Object.values(step.params ?? {})) collect(v, refNames);
|
|
2009
|
+
else {
|
|
2010
|
+
collect(step.instruction, refNames);
|
|
2011
|
+
collect(step.expected, refNames);
|
|
2012
|
+
}
|
|
2013
|
+
for (const step of expanded) {
|
|
2014
|
+
collect(step.instruction, refNames);
|
|
2015
|
+
collect(step.expected, refNames);
|
|
2016
|
+
}
|
|
2017
|
+
const map = [];
|
|
2018
|
+
const unresolved = [];
|
|
2019
|
+
for (const name of refNames) {
|
|
2020
|
+
const value = process.env[name];
|
|
2021
|
+
if (typeof value === "string" && value.length > 0) map.push([value, "${" + name + "}"]);
|
|
2022
|
+
else unresolved.push(name);
|
|
2023
|
+
}
|
|
2024
|
+
map.sort((a, b) => b[0].length - a[0].length);
|
|
2025
|
+
return {
|
|
2026
|
+
map,
|
|
2027
|
+
unresolved
|
|
2028
|
+
};
|
|
2029
|
+
}
|
|
2030
|
+
function collect(value, into) {
|
|
2031
|
+
for (const name of iterEnvRefNames(value)) into.add(name);
|
|
2032
|
+
}
|
|
2033
|
+
/**
|
|
2034
|
+
* Replace every occurrence of an env value with its `${VAR}` placeholder in
|
|
2035
|
+
* `text`. **Caller invariant**: the map must be sorted longest-value-first
|
|
2036
|
+
* so a shorter value doesn't shadow a longer one that contains it as a
|
|
2037
|
+
* substring. `buildSpecEnvScrub` upholds this; hand-built maps should too.
|
|
2038
|
+
*/
|
|
2039
|
+
function scrubEnvValues(text, scrubMap) {
|
|
2040
|
+
if (scrubMap.length === 0) return text;
|
|
2041
|
+
let out = text;
|
|
2042
|
+
for (const [value, placeholder] of scrubMap) if (out.includes(value)) out = out.replaceAll(value, placeholder);
|
|
2043
|
+
return out;
|
|
2044
|
+
}
|
|
2045
|
+
//#endregion
|
|
2046
|
+
//#region src/runtime/literal-scrub.ts
|
|
2047
|
+
/**
|
|
2048
|
+
* Patterns are listed in roughly descending confidence — a hit on `clock-hms`
|
|
2049
|
+
* is almost certainly bad; a hit on `unix-epoch-sec` (`1[0-9]{9}`) gates on
|
|
2050
|
+
* the value starting with `1`, which empirically rules out most SKU / order-id
|
|
2051
|
+
* false positives while still catching epoch seconds in the 2001-2033 window.
|
|
2052
|
+
*
|
|
2053
|
+
* Relative-time labels ("just now", "N minutes ago", "N分前") are the same
|
|
2054
|
+
* class of problem as wall-clock literals: the page shows them, Claude
|
|
2055
|
+
* captures them, and they're stale before the test ever replays. We only
|
|
2056
|
+
* catch the unambiguous variants — bare "now" or "minute" would false-fire
|
|
2057
|
+
* on routine UI copy.
|
|
2058
|
+
*/
|
|
2059
|
+
const UNSTABLE_PATTERNS = [
|
|
2060
|
+
{
|
|
2061
|
+
id: "clock-hms",
|
|
2062
|
+
pattern: /\b\d{2}:\d{2}:\d{2}\b/,
|
|
2063
|
+
label: "clock time HH:MM:SS"
|
|
2064
|
+
},
|
|
2065
|
+
{
|
|
2066
|
+
id: "iso-datetime",
|
|
2067
|
+
pattern: /\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/,
|
|
2068
|
+
label: "ISO datetime"
|
|
2069
|
+
},
|
|
2070
|
+
{
|
|
2071
|
+
id: "iso-date",
|
|
2072
|
+
pattern: /\b\d{4}-\d{2}-\d{2}\b/,
|
|
2073
|
+
label: "ISO date YYYY-MM-DD"
|
|
2074
|
+
},
|
|
2075
|
+
{
|
|
2076
|
+
id: "unix-epoch-sec",
|
|
2077
|
+
pattern: /\b1[0-9]{9}\b/,
|
|
2078
|
+
label: "Unix epoch seconds"
|
|
2079
|
+
},
|
|
2080
|
+
{
|
|
2081
|
+
id: "unix-epoch-ms",
|
|
2082
|
+
pattern: /\b1[0-9]{12}\b/,
|
|
2083
|
+
label: "Unix epoch milliseconds"
|
|
2084
|
+
},
|
|
2085
|
+
{
|
|
2086
|
+
id: "relative-time-en",
|
|
2087
|
+
pattern: /\b\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago\b/i,
|
|
2088
|
+
label: "English relative time (`N <unit> ago`)"
|
|
2089
|
+
},
|
|
2090
|
+
{
|
|
2091
|
+
id: "relative-now-en",
|
|
2092
|
+
pattern: /\bjust\s+now\b/i,
|
|
2093
|
+
label: "English `just now`"
|
|
2094
|
+
},
|
|
2095
|
+
{
|
|
2096
|
+
id: "relative-time-ja",
|
|
2097
|
+
pattern: /\d+\s*(秒|分|時間|日|週間|か月|ヶ月|年)前/,
|
|
2098
|
+
label: "Japanese relative time (`N<unit>前`)"
|
|
2099
|
+
},
|
|
2100
|
+
{
|
|
2101
|
+
id: "relative-now-ja",
|
|
2102
|
+
pattern: /たった今/,
|
|
2103
|
+
label: "Japanese `たった今`"
|
|
2104
|
+
},
|
|
2105
|
+
{
|
|
2106
|
+
id: "ja-date-full",
|
|
2107
|
+
pattern: /\d{4}年\d{1,2}月\d{1,2}日/,
|
|
2108
|
+
label: "Japanese date YYYY年M月D日"
|
|
2109
|
+
},
|
|
2110
|
+
{
|
|
2111
|
+
id: "ja-date-md",
|
|
2112
|
+
pattern: /(?<!年)(?<!\d)\d{1,2}月\d{1,2}日(?![間目])/,
|
|
2113
|
+
label: "Japanese date M月D日"
|
|
2114
|
+
}
|
|
2115
|
+
];
|
|
2116
|
+
const SCANNABLE_FIELDS = [
|
|
2117
|
+
"selector",
|
|
2118
|
+
"value",
|
|
2119
|
+
"label",
|
|
2120
|
+
"target",
|
|
2121
|
+
"observation",
|
|
2122
|
+
"findValue",
|
|
2123
|
+
"findName"
|
|
2124
|
+
];
|
|
2125
|
+
/**
|
|
2126
|
+
* Inspect a single action and return every (field, pattern) pair that
|
|
2127
|
+
* fired. An empty array means the action is safe to keep.
|
|
2128
|
+
*/
|
|
2129
|
+
function detectUnstableLiterals(action) {
|
|
2130
|
+
const hits = [];
|
|
2131
|
+
for (const field of SCANNABLE_FIELDS) {
|
|
2132
|
+
const raw = action[field];
|
|
2133
|
+
if (typeof raw !== "string" || raw.length === 0) continue;
|
|
2134
|
+
for (const p of UNSTABLE_PATTERNS) {
|
|
2135
|
+
const m = raw.match(p.pattern);
|
|
2136
|
+
if (m) hits.push({
|
|
2137
|
+
field,
|
|
2138
|
+
patternId: p.id,
|
|
2139
|
+
match: m[0]
|
|
2140
|
+
});
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
return hits;
|
|
2144
|
+
}
|
|
2145
|
+
/**
|
|
2146
|
+
* Walk every recorded action and split it into kept / dropped buckets. A
|
|
2147
|
+
* `snapshot` action is treated specially: its `observation` field is just a
|
|
2148
|
+
* comment in the generated script, so we keep the action even if its
|
|
2149
|
+
* `observation` carries an unstable literal — the comment will be wrong but
|
|
2150
|
+
* the script will still run. All other commands get dropped on any hit
|
|
2151
|
+
* because their `selector` / `value` would otherwise drive an
|
|
2152
|
+
* unreproducible interaction.
|
|
2153
|
+
*/
|
|
2154
|
+
function scrubUnstableActions(actions) {
|
|
2155
|
+
const kept = [];
|
|
2156
|
+
const dropped = [];
|
|
2157
|
+
for (let i = 0; i < actions.length; i++) {
|
|
2158
|
+
const action = actions[i];
|
|
2159
|
+
const hits = detectUnstableLiterals(action);
|
|
2160
|
+
if (hits.length === 0) {
|
|
2161
|
+
kept.push(action);
|
|
2162
|
+
continue;
|
|
2163
|
+
}
|
|
2164
|
+
if (action.command === "snapshot" && hits.every((h) => h.field === "observation")) {
|
|
2165
|
+
kept.push(action);
|
|
2166
|
+
continue;
|
|
2167
|
+
}
|
|
2168
|
+
dropped.push({
|
|
2169
|
+
index: i,
|
|
2170
|
+
action,
|
|
2171
|
+
hits
|
|
2172
|
+
});
|
|
2173
|
+
}
|
|
2174
|
+
return {
|
|
2175
|
+
kept,
|
|
2176
|
+
dropped
|
|
2177
|
+
};
|
|
2178
|
+
}
|
|
2179
|
+
/**
|
|
2180
|
+
* Human-readable summary of one drop, suitable for `log.warn`. The format
|
|
2181
|
+
* mirrors `replay-validate`'s drop line so both sources of warnings look
|
|
2182
|
+
* uniform in the trace output.
|
|
2183
|
+
*/
|
|
2184
|
+
function formatUnstableDrop(drop) {
|
|
2185
|
+
const { action, hits } = drop;
|
|
2186
|
+
const ids = [...new Set(hits.map((h) => h.patternId))].join(", ");
|
|
2187
|
+
const samples = hits.map((h) => `${h.field}="${h.match}"`).join(", ");
|
|
2188
|
+
return `${`${action.command}${action.assertType ? " " + action.assertType : ""}`}: contains unstable literal (${ids}) — ${samples}`;
|
|
2189
|
+
}
|
|
2190
|
+
//#endregion
|
|
1441
2191
|
//#region src/cli/trace.ts
|
|
1442
|
-
const
|
|
2192
|
+
const VALIDATION_MODES = ["lenient", "strict"];
|
|
2193
|
+
const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions with a warning but keeps them; 'strict' drops them from actions.json.", (raw) => {
|
|
2194
|
+
if (VALIDATION_MODES.includes(raw)) return raw;
|
|
2195
|
+
throw new Error(`--validation-mode must be one of ${VALIDATION_MODES.join(" | ")}`);
|
|
2196
|
+
}, "lenient").action(async (specPath, opts) => {
|
|
1443
2197
|
const { featureName, specName } = parseSpecPath(specPath);
|
|
1444
|
-
await runTrace(featureName, specName, opts.model);
|
|
2198
|
+
await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient");
|
|
1445
2199
|
});
|
|
1446
|
-
async function runTrace(featureName, specName, model) {
|
|
2200
|
+
async function runTrace(featureName, specName, model, validationMode = "lenient") {
|
|
1447
2201
|
header("trace", `${featureName}/${specName}`);
|
|
1448
2202
|
try {
|
|
1449
2203
|
meta("agent-browser", assertAgentBrowserAvailable());
|
|
@@ -1458,17 +2212,23 @@ async function runTrace(featureName, specName, model) {
|
|
|
1458
2212
|
await warnStaleBlockArtifacts();
|
|
1459
2213
|
const spec = parseTestSpec(await readSpecFile(featureName, specName));
|
|
1460
2214
|
const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
|
|
2215
|
+
const envScrub = buildSpecEnvScrub(spec, expanded);
|
|
2216
|
+
const envScrubMap = envScrub.map;
|
|
2217
|
+
if (envScrub.unresolved.length > 0) warn(`spec references env var(s) with empty/unset values: ${envScrub.unresolved.join(", ")} — their literal trace-time values will be baked into actions.json`);
|
|
1461
2218
|
meta("spec", spec.title);
|
|
1462
2219
|
meta("steps", expanded.length);
|
|
1463
2220
|
const includes = collectIncludedBlockNames(spec);
|
|
1464
2221
|
if (includes.length > 0) meta("blocks", includes.join(", "));
|
|
1465
2222
|
blank();
|
|
1466
2223
|
const sessionName = generateSessionName();
|
|
1467
|
-
const
|
|
2224
|
+
const baseSystemPrompt = buildTraceSystemPrompt({
|
|
1468
2225
|
title: spec.title,
|
|
1469
2226
|
steps: expanded,
|
|
1470
2227
|
sessionName
|
|
1471
2228
|
});
|
|
2229
|
+
const userPrompt = await loadTraceUserPrompt();
|
|
2230
|
+
if (userPrompt !== null) meta("user-prompt", ".ccqa/prompts/trace.user.md");
|
|
2231
|
+
const systemPrompt = userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`;
|
|
1472
2232
|
const prompt = buildTracePrompt(spec.title);
|
|
1473
2233
|
info("Running agent-browser session...");
|
|
1474
2234
|
blank();
|
|
@@ -1499,7 +2259,7 @@ async function runTrace(featureName, specName, model) {
|
|
|
1499
2259
|
},
|
|
1500
2260
|
model,
|
|
1501
2261
|
onAbAction: (abAction) => {
|
|
1502
|
-
const action = withStepId(parseAbAction(abAction));
|
|
2262
|
+
const action = withStepId(parseAbAction(scrubEnvValues(abAction, envScrubMap)));
|
|
1503
2263
|
if (action) traceActions.push(action);
|
|
1504
2264
|
},
|
|
1505
2265
|
onAbActionFailed: () => {
|
|
@@ -1530,14 +2290,14 @@ async function runTrace(featureName, specName, model) {
|
|
|
1530
2290
|
if (routeStep.status === "FAILED") overallStatus = "failed";
|
|
1531
2291
|
}
|
|
1532
2292
|
} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
|
|
1533
|
-
const action = withStepId(parseAbAction(trimmed));
|
|
2293
|
+
const action = withStepId(parseAbAction(scrubEnvValues(trimmed, envScrubMap)));
|
|
1534
2294
|
if (action) traceActions.push(action);
|
|
1535
2295
|
}
|
|
1536
2296
|
}
|
|
1537
2297
|
}
|
|
1538
2298
|
});
|
|
1539
2299
|
if (isError) overallStatus = "failed";
|
|
1540
|
-
const validatedActions = validateAndReport(traceActions);
|
|
2300
|
+
const validatedActions = validateAndReport(dedupAndReport(scrubAndReport(traceActions)), validationMode);
|
|
1541
2301
|
const route = {
|
|
1542
2302
|
specName,
|
|
1543
2303
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -1558,24 +2318,194 @@ async function runTrace(featureName, specName, model) {
|
|
|
1558
2318
|
hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
|
|
1559
2319
|
}
|
|
1560
2320
|
/**
|
|
2321
|
+
* Strip actions whose recorded fields contain "unstable literal" values
|
|
2322
|
+
* (clock readings, ISO datetimes, Unix-epoch IDs) that Claude baked into
|
|
2323
|
+
* the trace despite not coming through `${ENV_VAR}`. These would otherwise
|
|
2324
|
+
* pin the generated test to a single run. Reported the same way as
|
|
2325
|
+
* `validateAndReport` so users see one uniform "dropped" surface.
|
|
2326
|
+
*/
|
|
2327
|
+
function scrubAndReport(actions) {
|
|
2328
|
+
if (actions.length === 0) return actions;
|
|
2329
|
+
const { kept, dropped } = scrubUnstableActions(actions);
|
|
2330
|
+
if (dropped.length === 0) return kept;
|
|
2331
|
+
blank();
|
|
2332
|
+
info("post-trace literal scrub (removing run-specific values)...");
|
|
2333
|
+
for (const d of dropped) warn(`dropped action #${d.index + 1} (${formatUnstableDrop(d)})`);
|
|
2334
|
+
meta("scrubbed", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
|
|
2335
|
+
return kept;
|
|
2336
|
+
}
|
|
2337
|
+
/**
|
|
2338
|
+
* Drop *immediate* duplicate AB_ACTION emissions inside the same step.
|
|
2339
|
+
* Claude occasionally records the same `find_click` (identical command,
|
|
2340
|
+
* locator, value, fields) twice in a row when retrying a selector after a
|
|
2341
|
+
* snapshot — only the last attempt is "the canonical one". Collapsing the
|
|
2342
|
+
* dupes keeps actions.json from accumulating ghost-retries the LLM never
|
|
2343
|
+
* meant to commit.
|
|
2344
|
+
*
|
|
2345
|
+
* The dedupe is intentionally conservative — adjacent + structurally
|
|
2346
|
+
* IDENTICAL only. We do NOT try to compress retries with different
|
|
2347
|
+
* selectors / locators (that would risk dropping a legitimate "click the
|
|
2348
|
+
* neighbouring button" sequence). The trace prompt now asks Claude not to
|
|
2349
|
+
* emit failed attempts in the first place, so this is the belt-and-braces
|
|
2350
|
+
* pass.
|
|
2351
|
+
*/
|
|
2352
|
+
function dedupAndReport(actions) {
|
|
2353
|
+
if (actions.length === 0) return actions;
|
|
2354
|
+
const kept = [];
|
|
2355
|
+
let dropped = 0;
|
|
2356
|
+
for (const action of actions) {
|
|
2357
|
+
const prev = kept[kept.length - 1];
|
|
2358
|
+
if (prev && isAdjacentDuplicate(prev, action)) {
|
|
2359
|
+
dropped += 1;
|
|
2360
|
+
continue;
|
|
2361
|
+
}
|
|
2362
|
+
kept.push(action);
|
|
2363
|
+
}
|
|
2364
|
+
if (dropped === 0) return kept;
|
|
2365
|
+
meta("deduped", `${kept.length}/${actions.length} kept (${dropped} adjacent duplicate(s) dropped)`);
|
|
2366
|
+
return kept;
|
|
2367
|
+
}
|
|
2368
|
+
/**
|
|
2369
|
+
* Two actions are an "adjacent duplicate" when they would generate the
|
|
2370
|
+
* exact same agent-browser invocation. We compare by command + every
|
|
2371
|
+
* field that drives codegen output, sharing the same stepId (so we don't
|
|
2372
|
+
* silently merge two distinct steps that happen to start identically).
|
|
2373
|
+
*/
|
|
2374
|
+
function isAdjacentDuplicate(a, b) {
|
|
2375
|
+
if (a.command !== b.command) return false;
|
|
2376
|
+
if ((a.stepId ?? "") !== (b.stepId ?? "")) return false;
|
|
2377
|
+
return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.target ?? "") === (b.target ?? "") && (a.label ?? "") === (b.label ?? "") && (a.assertType ?? "") === (b.assertType ?? "") && (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "") && (a.findName ?? "") === (b.findName ?? "") && (a.findIndex ?? -1) === (b.findIndex ?? -1) && (a.findExact ?? false) === (b.findExact ?? false);
|
|
2378
|
+
}
|
|
2379
|
+
/**
|
|
1561
2380
|
* Run the post-trace replay validation and emit user-visible drop reports.
|
|
1562
2381
|
* Splitting this out keeps `runTrace` readable; the function is pure aside
|
|
1563
2382
|
* from `log.*` and the agent-browser invocations inside `validateActions`.
|
|
2383
|
+
*
|
|
2384
|
+
* In lenient mode (the default) failing actions are NOT removed — they're
|
|
2385
|
+
* tagged with `replayUnstable: true` and merged back into the output stream
|
|
2386
|
+
* in their original order so codegen can still emit them (with a `// [warn]`
|
|
2387
|
+
* comment) and let the auto-fix loop decide what to do.
|
|
1564
2388
|
*/
|
|
1565
|
-
function validateAndReport(actions) {
|
|
2389
|
+
function validateAndReport(actions, mode) {
|
|
1566
2390
|
if (actions.length === 0) return actions;
|
|
1567
2391
|
const sessionName = `${generateSessionName()}-validate`;
|
|
1568
2392
|
blank();
|
|
1569
|
-
info(
|
|
1570
|
-
const { kept, dropped } = validateActions(actions, {
|
|
2393
|
+
info(`post-trace validation in ${mode} mode (replaying ${actions.length} recorded action(s))...`);
|
|
2394
|
+
const { kept, unstable, dropped, rescuedSteps = [] } = validateActions(actions, {
|
|
2395
|
+
sessionName,
|
|
2396
|
+
mode,
|
|
2397
|
+
onProgress: (i, total, action) => {
|
|
2398
|
+
progress(i, total, validationProgressLabel(action));
|
|
2399
|
+
}
|
|
2400
|
+
});
|
|
2401
|
+
progressEnd();
|
|
2402
|
+
if (rescuedSteps.length > 0) info(`rescued ${rescuedSteps.length} step(s) that had lost every action: ${rescuedSteps.join(", ")}`);
|
|
2403
|
+
if (mode === "lenient") {
|
|
2404
|
+
if (unstable.length === 0) meta("validated", `${kept.length}/${actions.length} kept`);
|
|
2405
|
+
else {
|
|
2406
|
+
for (const u of unstable) warn(`replay-unstable: ${`${u.command}${u.selector ? " " + u.selector : ""}${u.findValue ? " " + u.findValue : ""}`} — ${u.replayReason ?? "(no reason)"} (kept in actions.json with warning)`);
|
|
2407
|
+
meta("validated", `${kept.length}/${actions.length} kept, ${unstable.length} flagged replay-unstable (kept with warning)`);
|
|
2408
|
+
}
|
|
2409
|
+
const merged = mergeKeptAndUnstableInOriginalOrder(actions, kept, unstable);
|
|
2410
|
+
reportPerStepBreakdown(actions, merged);
|
|
2411
|
+
return merged;
|
|
2412
|
+
}
|
|
1571
2413
|
if (dropped.length === 0) {
|
|
1572
2414
|
meta("validated", `${kept.length}/${actions.length} kept`);
|
|
2415
|
+
reportPerStepBreakdown(actions, kept);
|
|
1573
2416
|
return kept;
|
|
1574
2417
|
}
|
|
1575
|
-
|
|
2418
|
+
let cascadeStart = null;
|
|
2419
|
+
let cascadeCount = 0;
|
|
2420
|
+
let cascadeStepId;
|
|
2421
|
+
const flushCascade = () => {
|
|
2422
|
+
if (cascadeStart === null || cascadeCount === 0) return;
|
|
2423
|
+
const stepTag = cascadeStepId ? ` in ${cascadeStepId}` : "";
|
|
2424
|
+
warn(`cascade dropped ${cascadeCount} action(s)${stepTag} after action #${cascadeStart}`);
|
|
2425
|
+
cascadeStart = null;
|
|
2426
|
+
cascadeCount = 0;
|
|
2427
|
+
cascadeStepId = void 0;
|
|
2428
|
+
};
|
|
2429
|
+
for (const d of dropped) {
|
|
2430
|
+
const isCascade = d.reason.startsWith("skipped after");
|
|
2431
|
+
if (isCascade && cascadeStart !== null && cascadeStepId === d.action.stepId) {
|
|
2432
|
+
cascadeCount += 1;
|
|
2433
|
+
continue;
|
|
2434
|
+
}
|
|
2435
|
+
flushCascade();
|
|
2436
|
+
if (isCascade) {
|
|
2437
|
+
cascadeStart = d.index;
|
|
2438
|
+
cascadeCount = 1;
|
|
2439
|
+
cascadeStepId = d.action.stepId;
|
|
2440
|
+
continue;
|
|
2441
|
+
}
|
|
2442
|
+
warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
|
|
2443
|
+
}
|
|
2444
|
+
flushCascade();
|
|
1576
2445
|
meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
|
|
2446
|
+
reportPerStepBreakdown(actions, kept);
|
|
1577
2447
|
return kept;
|
|
1578
2448
|
}
|
|
2449
|
+
/**
|
|
2450
|
+
* Lenient-mode helper: re-thread the `kept` and `unstable` lists back into
|
|
2451
|
+
* the original recording order. Object identity is fine because the
|
|
2452
|
+
* validator pushes original references — no shallow copies.
|
|
2453
|
+
*/
|
|
2454
|
+
function mergeKeptAndUnstableInOriginalOrder(originalActions, kept, unstable) {
|
|
2455
|
+
const allowed = new Set([...kept, ...unstable]);
|
|
2456
|
+
const merged = [];
|
|
2457
|
+
for (const a of originalActions) if (allowed.has(a)) merged.push(a);
|
|
2458
|
+
return merged;
|
|
2459
|
+
}
|
|
2460
|
+
/**
|
|
2461
|
+
* Compact one-liner used as the progress label while validation replays
|
|
2462
|
+
* each action. Keep it under ~80 chars so it fits on a single terminal
|
|
2463
|
+
* row when paired with the `[info] N/M ` prefix.
|
|
2464
|
+
*/
|
|
2465
|
+
function validationProgressLabel(action) {
|
|
2466
|
+
const step = action.stepId ? `${action.stepId} ` : "";
|
|
2467
|
+
const detail = action.findLocator ? `find ${action.findLocator} ${action.findValue ?? ""}`.trim() : action.selector ? `${action.command} ${action.selector}` : action.value ? `${action.command} ${action.value}` : action.command;
|
|
2468
|
+
return `${step}${detail.length > 80 ? detail.slice(0, 77) + "..." : detail}`;
|
|
2469
|
+
}
|
|
2470
|
+
/**
|
|
2471
|
+
* Print a per-step `kept/total` line so a step that lost ALL its actions
|
|
2472
|
+
* during validation surfaces clearly. Without this, a spec author can't
|
|
2473
|
+
* tell that "verify created content" or "delete the thing" silently fell
|
|
2474
|
+
* off the generated test — the trace appears to pass while half the spec
|
|
2475
|
+
* is missing. Lost steps are also surfaced as a dedicated warning line so
|
|
2476
|
+
* they don't blend into the per-step breakdown noise.
|
|
2477
|
+
*/
|
|
2478
|
+
function reportPerStepBreakdown(beforeValidation, afterValidation) {
|
|
2479
|
+
const before = groupCountByStep(beforeValidation);
|
|
2480
|
+
const after = groupCountByStep(afterValidation);
|
|
2481
|
+
const ordered = [];
|
|
2482
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2483
|
+
for (const a of beforeValidation) {
|
|
2484
|
+
const id = a.stepId ?? "<no step>";
|
|
2485
|
+
if (seen.has(id)) continue;
|
|
2486
|
+
seen.add(id);
|
|
2487
|
+
ordered.push(id);
|
|
2488
|
+
}
|
|
2489
|
+
const lostSteps = [];
|
|
2490
|
+
for (const id of ordered) {
|
|
2491
|
+
const total = before.get(id) ?? 0;
|
|
2492
|
+
const kept = after.get(id) ?? 0;
|
|
2493
|
+
const dropped = total - kept;
|
|
2494
|
+
const isLost = kept === 0 && total > 0 && id !== "<no step>";
|
|
2495
|
+
if (isLost) lostSteps.push(id);
|
|
2496
|
+
const tag = isLost ? " ⚠ entire step removed" : "";
|
|
2497
|
+
meta(` ${id}`, `${kept}/${total} kept${dropped > 0 ? `, ${dropped} dropped` : ""}${tag}`);
|
|
2498
|
+
}
|
|
2499
|
+
if (lostSteps.length > 0) warn(`${lostSteps.length} spec step(s) lost every recorded action: ${lostSteps.join(", ")} — the generated test will NOT exercise these steps.`);
|
|
2500
|
+
}
|
|
2501
|
+
function groupCountByStep(actions) {
|
|
2502
|
+
const counts = /* @__PURE__ */ new Map();
|
|
2503
|
+
for (const a of actions) {
|
|
2504
|
+
const id = a.stepId ?? "<no step>";
|
|
2505
|
+
counts.set(id, (counts.get(id) ?? 0) + 1);
|
|
2506
|
+
}
|
|
2507
|
+
return counts;
|
|
2508
|
+
}
|
|
1579
2509
|
function parseStatusLine(text) {
|
|
1580
2510
|
for (const line of text.split("\n")) {
|
|
1581
2511
|
const match = line.match(/^(STEP_START|STEP_DONE|ASSERTION_FAILED|STEP_SKIPPED|RUN_COMPLETED)\|([^|]*)\|(.*)$/);
|
|
@@ -1666,13 +2596,50 @@ function parseAbAction(line) {
|
|
|
1666
2596
|
target: parts[3],
|
|
1667
2597
|
label: parts[4]
|
|
1668
2598
|
};
|
|
2599
|
+
case "find_click":
|
|
2600
|
+
case "find_dblclick":
|
|
2601
|
+
case "find_hover":
|
|
2602
|
+
case "find_focus":
|
|
2603
|
+
case "find_check":
|
|
2604
|
+
case "find_uncheck": return parseFindAction(command, parts, false);
|
|
2605
|
+
case "find_fill":
|
|
2606
|
+
case "find_type": return parseFindAction(command, parts, true);
|
|
1669
2607
|
default: return null;
|
|
1670
2608
|
}
|
|
1671
2609
|
}
|
|
2610
|
+
/**
|
|
2611
|
+
* Common parser for the `find_*` family. `<extra>` carries `--name` for
|
|
2612
|
+
* `role`, the integer index for `nth`, and is empty otherwise. We accept a
|
|
2613
|
+
* literally empty `<extra>` (the LLM emits a placeholder `|` so the
|
|
2614
|
+
* positional layout stays stable across locators).
|
|
2615
|
+
*/
|
|
2616
|
+
function parseFindAction(command, parts, hasFillValue) {
|
|
2617
|
+
const locator = parts[2];
|
|
2618
|
+
const findValue = parts[3];
|
|
2619
|
+
const extra = parts[4] ?? "";
|
|
2620
|
+
const exactToken = parts[5] ?? "";
|
|
2621
|
+
if (!locator || !FIND_LOCATORS.includes(locator) || !findValue) return null;
|
|
2622
|
+
const findExact = exactToken === "exact" ? true : void 0;
|
|
2623
|
+
const findName = locator === "role" && extra ? extra : void 0;
|
|
2624
|
+
const findIndex = locator === "nth" && extra ? Number.parseInt(extra, 10) : void 0;
|
|
2625
|
+
if (locator === "nth" && (findIndex === void 0 || Number.isNaN(findIndex))) return null;
|
|
2626
|
+
return {
|
|
2627
|
+
command,
|
|
2628
|
+
findLocator: locator,
|
|
2629
|
+
findValue,
|
|
2630
|
+
...findExact !== void 0 && { findExact },
|
|
2631
|
+
...findName !== void 0 && { findName },
|
|
2632
|
+
...findIndex !== void 0 && { findIndex },
|
|
2633
|
+
...hasFillValue ? {
|
|
2634
|
+
value: parts[6],
|
|
2635
|
+
label: parts[7]
|
|
2636
|
+
} : { label: parts[6] }
|
|
2637
|
+
};
|
|
2638
|
+
}
|
|
1672
2639
|
//#endregion
|
|
1673
2640
|
//#region src/codegen/actions-to-script.ts
|
|
1674
2641
|
function actionsToScript(input) {
|
|
1675
|
-
const { actions, testName, stepMarkers = [] } = input;
|
|
2642
|
+
const { actions, testName, stepMarkers = [], emptySteps = [] } = input;
|
|
1676
2643
|
const parts = [...[
|
|
1677
2644
|
`import { test } from "vitest";`,
|
|
1678
2645
|
`import { spawnSync } from "node:child_process";`,
|
|
@@ -1695,7 +2662,7 @@ function actionsToScript(input) {
|
|
|
1695
2662
|
`process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
|
|
1696
2663
|
""
|
|
1697
2664
|
]];
|
|
1698
|
-
const body = actionsToLines(actions, stepMarkers).map((l) => ` ${l}`).join("\n");
|
|
2665
|
+
const body = actionsToLines(actions, stepMarkers, emptySteps).map((l) => ` ${l}`).join("\n");
|
|
1699
2666
|
parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
|
|
1700
2667
|
return parts.join("\n");
|
|
1701
2668
|
}
|
|
@@ -1709,13 +2676,31 @@ const ELEMENT_COMMANDS = new Set([
|
|
|
1709
2676
|
"uncheck",
|
|
1710
2677
|
"select",
|
|
1711
2678
|
"hover",
|
|
1712
|
-
"drag"
|
|
2679
|
+
"drag",
|
|
2680
|
+
"find_click",
|
|
2681
|
+
"find_dblclick",
|
|
2682
|
+
"find_fill",
|
|
2683
|
+
"find_type",
|
|
2684
|
+
"find_hover",
|
|
2685
|
+
"find_focus",
|
|
2686
|
+
"find_check",
|
|
2687
|
+
"find_uncheck"
|
|
1713
2688
|
]);
|
|
1714
|
-
function actionsToLines(actions, stepMarkers) {
|
|
2689
|
+
function actionsToLines(actions, stepMarkers, emptySteps) {
|
|
1715
2690
|
const lines = [];
|
|
1716
2691
|
let prevLine = null;
|
|
1717
|
-
let
|
|
2692
|
+
let pendingOpenSettle = false;
|
|
1718
2693
|
const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
|
|
2694
|
+
const emptyByInsertAfter = /* @__PURE__ */ new Map();
|
|
2695
|
+
for (const e of emptySteps) {
|
|
2696
|
+
const list = emptyByInsertAfter.get(e.insertAfterIndex) ?? [];
|
|
2697
|
+
list.push(e);
|
|
2698
|
+
emptyByInsertAfter.set(e.insertAfterIndex, list);
|
|
2699
|
+
}
|
|
2700
|
+
const leadingNotices = emptyByInsertAfter.get(-1) ?? [];
|
|
2701
|
+
for (const n of leadingNotices) appendEmptyStepNotice(lines, n);
|
|
2702
|
+
let currentStepId;
|
|
2703
|
+
let filledValuesThisStep = /* @__PURE__ */ new Set();
|
|
1719
2704
|
for (let i = 0; i < actions.length; i++) {
|
|
1720
2705
|
const marker = markerByIndex.get(i);
|
|
1721
2706
|
if (marker) {
|
|
@@ -1723,22 +2708,86 @@ function actionsToLines(actions, stepMarkers) {
|
|
|
1723
2708
|
lines.push(`// step: ${marker.stepId} [${marker.source}]`);
|
|
1724
2709
|
}
|
|
1725
2710
|
const action = actions[i];
|
|
2711
|
+
if (action.stepId !== currentStepId) {
|
|
2712
|
+
currentStepId = action.stepId;
|
|
2713
|
+
filledValuesThisStep = /* @__PURE__ */ new Set();
|
|
2714
|
+
}
|
|
2715
|
+
const filled = fillValueOf(action);
|
|
2716
|
+
if (filled) filledValuesThisStep.add(filled);
|
|
2717
|
+
if (action.command === "assert" && action.assertType === "text_visible" && typeof action.value === "string" && filledValuesThisStep.has(action.value)) {
|
|
2718
|
+
lines.push(`// [warn] replay-unstable: dropped input-value assert (text_visible ${action.value}) — typed values aren't visible text nodes`);
|
|
2719
|
+
continue;
|
|
2720
|
+
}
|
|
1726
2721
|
const line = actionToLine(action);
|
|
1727
2722
|
if (line === null) continue;
|
|
1728
2723
|
if (line === prevLine) continue;
|
|
1729
|
-
if (
|
|
2724
|
+
if (action.command === "open") pendingOpenSettle = true;
|
|
2725
|
+
if (pendingOpenSettle && ELEMENT_COMMANDS.has(action.command)) {
|
|
2726
|
+
lines.push(`spawnSync("sleep", ["3"], { stdio: "inherit" });`);
|
|
2727
|
+
pendingOpenSettle = false;
|
|
2728
|
+
}
|
|
2729
|
+
if (action.replayUnstable) lines.push(`// [warn] replay-unstable: ${action.replayReason ?? "(no reason recorded)"}`);
|
|
1730
2730
|
lines.push(line);
|
|
1731
2731
|
prevLine = line;
|
|
1732
|
-
|
|
2732
|
+
const followups = emptyByInsertAfter.get(i);
|
|
2733
|
+
if (followups) for (const n of followups) appendEmptyStepNotice(lines, n);
|
|
1733
2734
|
}
|
|
1734
2735
|
return lines;
|
|
1735
2736
|
}
|
|
1736
|
-
/**
|
|
2737
|
+
/**
|
|
2738
|
+
* The text value a fill-type action types into a field, or null for
|
|
2739
|
+
* non-fill actions. Both the plain `fill`/`type` (value in `value`) and the
|
|
2740
|
+
* `find_fill`/`find_type` (also `value`) shapes carry it in `action.value`.
|
|
2741
|
+
*/
|
|
2742
|
+
function fillValueOf(action) {
|
|
2743
|
+
return (action.command === "fill" || action.command === "type" || action.command === "find_fill" || action.command === "find_type") && typeof action.value === "string" && action.value.length > 0 ? action.value : null;
|
|
2744
|
+
}
|
|
2745
|
+
function appendEmptyStepNotice(lines, notice) {
|
|
2746
|
+
if (lines.length > 0) lines.push("");
|
|
2747
|
+
lines.push(`// step: ${notice.stepId} [${notice.source}]`);
|
|
2748
|
+
lines.push(`// [warn] all actions for this step were dropped during post-trace validation.`);
|
|
2749
|
+
lines.push(`// [warn] the generated test does NOT exercise step ${notice.stepId}. Re-run`);
|
|
2750
|
+
lines.push(`// [warn] \`ccqa trace\` or add manual assertions if this step is load-bearing.`);
|
|
2751
|
+
}
|
|
2752
|
+
/**
|
|
2753
|
+
* Returns true if a selector is a session-specific agent-browser ref that
|
|
2754
|
+
* cannot be replayed. Two forms occur:
|
|
2755
|
+
* - `@e14` — the snapshot ref syntax (interactions)
|
|
2756
|
+
* - `button[ref='e4']` / `[ref=e4]` — the ref attribute leaking into a CSS
|
|
2757
|
+
* selector (most often via an assert the agent built from a snapshot row)
|
|
2758
|
+
* Refs are re-numbered on every snapshot, so neither survives a fresh run.
|
|
2759
|
+
*/
|
|
1737
2760
|
function isRefSelector(selector) {
|
|
1738
|
-
|
|
2761
|
+
if (typeof selector !== "string") return false;
|
|
2762
|
+
const s = selector.trim();
|
|
2763
|
+
return /^@/.test(s) || /\[ref\s*=\s*['"]?e\d+['"]?\]/.test(s);
|
|
2764
|
+
}
|
|
2765
|
+
/**
|
|
2766
|
+
* Returns true if a selector picks elements *by the very state being asserted*,
|
|
2767
|
+
* which makes an `element_disabled` / `element_enabled` check a tautology.
|
|
2768
|
+
*
|
|
2769
|
+
* `abAssertDisabled("button[disabled]")` resolves to `is enabled
|
|
2770
|
+
* "button[disabled]"`: it first selects an already-disabled element, then
|
|
2771
|
+
* confirms it is disabled — always true, and true even when the *target* the
|
|
2772
|
+
* spec cares about (e.g. the "Submit" button) is missing or enabled.
|
|
2773
|
+
* The agent emits these when it reaches for "the disabled button" instead of
|
|
2774
|
+
* naming the element by a state-independent selector. The assertion verifies
|
|
2775
|
+
* nothing, so codegen drops it (breadcrumb only) rather than baking a green
|
|
2776
|
+
* check that can never fail.
|
|
2777
|
+
*
|
|
2778
|
+
* Matches the `:disabled` / `:enabled` pseudo-classes and the `[disabled]` /
|
|
2779
|
+
* `[aria-disabled=…]` attribute selectors anywhere in the selector.
|
|
2780
|
+
*/
|
|
2781
|
+
function isStateSelector(selector) {
|
|
2782
|
+
if (typeof selector !== "string") return false;
|
|
2783
|
+
return /:disabled\b|:enabled\b|\[\s*disabled[\s\]=]|\[\s*aria-disabled[\s\]=]/i.test(selector);
|
|
1739
2784
|
}
|
|
1740
2785
|
function actionToLine(action) {
|
|
1741
2786
|
if ("selector" in action && isRefSelector(action.selector)) return null;
|
|
2787
|
+
if (action.command === "assert" && action.replayUnstable && typeof action.replayReason === "string" && action.replayReason.includes("selector not present")) {
|
|
2788
|
+
const sel = action.selector ?? action.observation ?? "(unknown)";
|
|
2789
|
+
return `// [warn] replay-unstable: dropped over-assertion (${action.assertType ?? "assert"} ${sel}) — selector not present on replay`;
|
|
2790
|
+
}
|
|
1742
2791
|
switch (action.command) {
|
|
1743
2792
|
case "cookies_clear": return `ab("cookies", "clear");`;
|
|
1744
2793
|
case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
|
|
@@ -1757,7 +2806,22 @@ function actionToLine(action) {
|
|
|
1757
2806
|
case "wait": {
|
|
1758
2807
|
const sel = action.selector;
|
|
1759
2808
|
if (/^\d+$/.test(sel)) return `spawnSync("sleep", [${j(sel)}], { stdio: "inherit" });`;
|
|
1760
|
-
|
|
2809
|
+
if (sel.startsWith("--")) return null;
|
|
2810
|
+
return `abWait(${jExpr(sel)});`;
|
|
2811
|
+
}
|
|
2812
|
+
case "find_click":
|
|
2813
|
+
case "find_dblclick":
|
|
2814
|
+
case "find_hover":
|
|
2815
|
+
case "find_focus":
|
|
2816
|
+
case "find_check":
|
|
2817
|
+
case "find_uncheck": {
|
|
2818
|
+
const args = buildFindArgs(action, void 0);
|
|
2819
|
+
return args === null ? droppedFindMarker(action) : `ab(${args.join(", ")});`;
|
|
2820
|
+
}
|
|
2821
|
+
case "find_fill":
|
|
2822
|
+
case "find_type": {
|
|
2823
|
+
const args = buildFindArgs(action, action.value ?? "");
|
|
2824
|
+
return args === null ? droppedFindMarker(action) : `ab(${args.join(", ")});`;
|
|
1761
2825
|
}
|
|
1762
2826
|
case "assert": {
|
|
1763
2827
|
const val = action.value ?? action.observation;
|
|
@@ -1781,9 +2845,11 @@ function actionToLine(action) {
|
|
|
1781
2845
|
if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
|
|
1782
2846
|
break;
|
|
1783
2847
|
case "element_enabled":
|
|
2848
|
+
if (isStateSelector(sel)) return tautologicalStateAssertMarker(action, sel);
|
|
1784
2849
|
if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
|
|
1785
2850
|
break;
|
|
1786
2851
|
case "element_disabled":
|
|
2852
|
+
if (isStateSelector(sel)) return tautologicalStateAssertMarker(action, sel);
|
|
1787
2853
|
if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertDisabled(${j(sel)});`;
|
|
1788
2854
|
break;
|
|
1789
2855
|
case "element_checked":
|
|
@@ -1799,6 +2865,51 @@ function actionToLine(action) {
|
|
|
1799
2865
|
default: return null;
|
|
1800
2866
|
}
|
|
1801
2867
|
}
|
|
2868
|
+
/**
|
|
2869
|
+
* Build the argument list for `ab("find", ...)` codegen. Layout matches the
|
|
2870
|
+
* `agent-browser find <locator> <value> [--name <n>] [--exact] <action>
|
|
2871
|
+
* [fillValue]` invocation shape. `findValue` and `findName` go through
|
|
2872
|
+
* `jExpr` so `${ENV}` references survive into the generated test; the
|
|
2873
|
+
* positional CSS selector inside `first/last/nth` stays as a plain string
|
|
2874
|
+
* literal.
|
|
2875
|
+
*/
|
|
2876
|
+
function buildFindArgs(action, fillValue) {
|
|
2877
|
+
const { findLocator, findValue } = action;
|
|
2878
|
+
if (!findLocator || !findValue) return null;
|
|
2879
|
+
const innerAction = action.command.slice(5).replace("type", "fill");
|
|
2880
|
+
const args = [JSON.stringify("find"), JSON.stringify(findLocator)];
|
|
2881
|
+
if (findLocator === "nth") {
|
|
2882
|
+
args.push(JSON.stringify(String(action.findIndex ?? 0)));
|
|
2883
|
+
args.push(j(findValue));
|
|
2884
|
+
} else if (findLocator === "first" || findLocator === "last") args.push(j(findValue));
|
|
2885
|
+
else args.push(jExpr(findValue));
|
|
2886
|
+
args.push(JSON.stringify(innerAction));
|
|
2887
|
+
if (fillValue !== void 0) args.push(jExpr(fillValue));
|
|
2888
|
+
if (findLocator === "role" && action.findName) args.push(JSON.stringify("--name"), jExpr(action.findName));
|
|
2889
|
+
if (action.findExact) args.push(JSON.stringify("--exact"));
|
|
2890
|
+
return args;
|
|
2891
|
+
}
|
|
2892
|
+
/**
|
|
2893
|
+
* Emit a visible breadcrumb when a `find_*` action lacks the locator/value
|
|
2894
|
+
* fields that codegen needs. We can't generate a runnable `ab(...)` line, but
|
|
2895
|
+
* a silent skip would make the test pass while quietly dropping a step the
|
|
2896
|
+
* spec author cared about. The marker is a TS comment so the file still
|
|
2897
|
+
* parses, but `grep -n "find_\\* dropped"` surfaces the issue in CI logs.
|
|
2898
|
+
*/
|
|
2899
|
+
function droppedFindMarker(action) {
|
|
2900
|
+
const ctx = action.stepId ? ` (stepId=${action.stepId})` : "";
|
|
2901
|
+
return `// [warn] find_* dropped: ${action.command}${ctx} — actions.json is missing findLocator/findValue. Re-run \`ccqa trace\` to regenerate.`;
|
|
2902
|
+
}
|
|
2903
|
+
/**
|
|
2904
|
+
* Breadcrumb for an `element_enabled` / `element_disabled` assert whose selector
|
|
2905
|
+
* picks the element by the asserted state (a tautology — see `isStateSelector`).
|
|
2906
|
+
* Dropped from the runnable script; surfaces in the test so a reviewer sees the
|
|
2907
|
+
* intended check was discarded and can re-assert against a state-independent
|
|
2908
|
+
* selector if the state really matters.
|
|
2909
|
+
*/
|
|
2910
|
+
function tautologicalStateAssertMarker(action, sel) {
|
|
2911
|
+
return `// [warn] dropped tautological assert (${action.assertType ?? "assert"} ${sel ?? "(unknown)"}) — selector matches by the asserted state; target the element by a state-independent selector instead`;
|
|
2912
|
+
}
|
|
1802
2913
|
/** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
|
|
1803
2914
|
const j = (s) => JSON.stringify(s);
|
|
1804
2915
|
/**
|
|
@@ -1818,13 +2929,19 @@ The trace contains noise: failed attempts, redundant retries, and duplicate oper
|
|
|
1818
2929
|
Your task: return a **cleaned-up JSON array** of TraceAction objects that represents the minimal, correct sequence of actions needed to reproduce the test.
|
|
1819
2930
|
|
|
1820
2931
|
Each TraceAction object has the following shape (use EXACTLY these field names):
|
|
1821
|
-
{ "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "..."
|
|
2932
|
+
{ "command": "...", "assertType": "...", "selector": "...", "value": "...", "label": "...", "observation": "...",
|
|
2933
|
+
"findLocator": "...", "findValue": "...", "findName": "...", "findIndex": 0, "findExact": true }
|
|
2934
|
+
|
|
1822
2935
|
Only include fields that are present in the original action. The "command" field is required. For assert actions, "assertType" is also required.
|
|
1823
2936
|
|
|
2937
|
+
**\`find_*\` actions (find_click / find_dblclick / find_fill / find_type / find_hover / find_focus / find_check / find_uncheck) are special:**
|
|
2938
|
+
They do NOT use \`selector\`. They use \`findLocator\` + \`findValue\` (and optionally \`findName\` / \`findIndex\` / \`findExact\`). When you keep a \`find_*\` action, you MUST copy **every** \`find*\` field from the original verbatim — dropping any of them silently corrupts the recorded selector and the generated test will be broken. Treat the \`find*\` cluster as one atomic unit: keep all or drop all.
|
|
2939
|
+
|
|
1824
2940
|
Rules:
|
|
1825
2941
|
- Remove actions that were failed attempts superseded by a later successful action (e.g., if \`fill selector="text=Foo"\` was followed by \`fill selector="[placeholder='Foo']"\`, keep only the latter)
|
|
1826
2942
|
- Remove duplicate fill operations on the same field (keep only the last successful fill for each field)
|
|
1827
2943
|
- For \`click\` and \`fill\` actions: if the selector starts with \`text=\`, it is a failed attempt — remove it (text= selectors only work with the wait command, not click/fill)
|
|
2944
|
+
- For \`find_*\` actions: if multiple \`find_*\` of the same command were emitted within the same logical step (Claude tried several locators), keep ONLY the last one — that is the one that finally succeeded
|
|
1828
2945
|
- Keep all snapshot actions — they serve as comments/observations in the generated test
|
|
1829
2946
|
- Keep all assert actions — they are the test's verification points and must not be removed
|
|
1830
2947
|
- Do NOT invent new actions or change values
|
|
@@ -1836,6 +2953,11 @@ ${actions.map((a, i) => {
|
|
|
1836
2953
|
if (a.assertType) parts.push(`assertType="${a.assertType}"`);
|
|
1837
2954
|
if (a.selector) parts.push(`selector="${a.selector}"`);
|
|
1838
2955
|
if (a.value) parts.push(`value="${a.value}"`);
|
|
2956
|
+
if (a.findLocator) parts.push(`findLocator="${a.findLocator}"`);
|
|
2957
|
+
if (a.findValue) parts.push(`findValue="${a.findValue}"`);
|
|
2958
|
+
if (a.findName) parts.push(`findName="${a.findName}"`);
|
|
2959
|
+
if (a.findIndex !== void 0) parts.push(`findIndex=${a.findIndex}`);
|
|
2960
|
+
if (a.findExact) parts.push(`findExact=true`);
|
|
1839
2961
|
if (a.observation) parts.push(`→ ${a.observation}`);
|
|
1840
2962
|
return parts.join(" ");
|
|
1841
2963
|
}).join("\n")}`;
|
|
@@ -2033,7 +3155,7 @@ function applySelectorDrift(script, line, oldSelector, newSelector) {
|
|
|
2033
3155
|
applied: false,
|
|
2034
3156
|
reason: `oldSelector not found on line ${line}`
|
|
2035
3157
|
};
|
|
2036
|
-
lines[idx] = content
|
|
3158
|
+
lines[idx] = replaceSelectorLiteral(content, oldSelector, newSelector);
|
|
2037
3159
|
return {
|
|
2038
3160
|
applied: true,
|
|
2039
3161
|
script: lines.join("\n"),
|
|
@@ -2041,6 +3163,44 @@ function applySelectorDrift(script, line, oldSelector, newSelector) {
|
|
|
2041
3163
|
};
|
|
2042
3164
|
}
|
|
2043
3165
|
/**
|
|
3166
|
+
* Rewrite a selector inside whatever string literal encloses it on the line.
|
|
3167
|
+
* The tricky case is when `newSelector` contains a `${...}` env reference
|
|
3168
|
+
* and the host literal is a plain `"..."` / `'...'` — a naive `replaceAll`
|
|
3169
|
+
* leaves the unescaped `${...}` inside the double-quoted literal and produces
|
|
3170
|
+
* invalid TS (the auto-fix loop used to ship this and blow up esbuild). When
|
|
3171
|
+
* a template-literal substitution is needed, promote the enclosing literal
|
|
3172
|
+
* from "..."/'...' to `...` in one step.
|
|
3173
|
+
*/
|
|
3174
|
+
function replaceSelectorLiteral(content, oldSelector, newSelector) {
|
|
3175
|
+
if (!/\$\{[A-Za-z_]/.test(newSelector)) return content.replaceAll(oldSelector, newSelector);
|
|
3176
|
+
const tplRe = new RegExp("`([^`]*)" + escapeForRegex(oldSelector) + "([^`]*)`", "g");
|
|
3177
|
+
if (tplRe.test(content)) return content.replace(tplRe, (_m, before, after) => `\`${before}${newSelector}${after}\``);
|
|
3178
|
+
for (const quote of ["\"", "'"]) {
|
|
3179
|
+
const re = new RegExp(`${quote}([^${quote}\\\\]*(?:\\\\.[^${quote}\\\\]*)*)${quote}`, "g");
|
|
3180
|
+
let match;
|
|
3181
|
+
const replacements = [];
|
|
3182
|
+
while ((match = re.exec(content)) !== null) {
|
|
3183
|
+
const inner = match[1] ?? "";
|
|
3184
|
+
if (!inner.includes(oldSelector)) continue;
|
|
3185
|
+
const backtickSafe = inner.replaceAll(oldSelector, newSelector).replace(/`/g, "\\`");
|
|
3186
|
+
replacements.push({
|
|
3187
|
+
start: match.index,
|
|
3188
|
+
end: match.index + match[0].length,
|
|
3189
|
+
rewritten: `\`${backtickSafe}\``
|
|
3190
|
+
});
|
|
3191
|
+
}
|
|
3192
|
+
if (replacements.length > 0) {
|
|
3193
|
+
let out = content;
|
|
3194
|
+
for (const r of replacements.reverse()) out = out.slice(0, r.start) + r.rewritten + out.slice(r.end);
|
|
3195
|
+
return out;
|
|
3196
|
+
}
|
|
3197
|
+
}
|
|
3198
|
+
return content.replaceAll(oldSelector, newSelector);
|
|
3199
|
+
}
|
|
3200
|
+
function escapeForRegex(s) {
|
|
3201
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3202
|
+
}
|
|
3203
|
+
/**
|
|
2044
3204
|
* Build a unified-style diff snippet for showing the user what would change.
|
|
2045
3205
|
* Just the changed lines with -/+ prefixes; not a real patch.
|
|
2046
3206
|
*/
|
|
@@ -2771,10 +3931,13 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
|
|
|
2771
3931
|
const cleanedActions = await cleanupActions(actions, model);
|
|
2772
3932
|
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
2773
3933
|
const markers = buildStepMarkers(expanded, cleanedActions);
|
|
3934
|
+
const emptySteps = findEmptySteps(expanded, cleanedActions);
|
|
3935
|
+
if (emptySteps.length > 0) for (const e of emptySteps) warn(`step ${e.stepId} has no kept actions — generated test will skip it (notice comment inserted).`);
|
|
2774
3936
|
const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
|
|
2775
3937
|
actions: cleanedActions,
|
|
2776
3938
|
testName: spec.title,
|
|
2777
|
-
stepMarkers: markers
|
|
3939
|
+
stepMarkers: markers,
|
|
3940
|
+
emptySteps
|
|
2778
3941
|
}));
|
|
2779
3942
|
meta("saved", scriptPath);
|
|
2780
3943
|
blank();
|
|
@@ -2844,6 +4007,42 @@ function buildStepMarkers(steps, actions) {
|
|
|
2844
4007
|
}
|
|
2845
4008
|
return markers;
|
|
2846
4009
|
}
|
|
4010
|
+
/**
|
|
4011
|
+
* Spec steps that lost every action by the time the trace finished its
|
|
4012
|
+
* cleanup + validation passes. `actionsToScript` uses these to splice a
|
|
4013
|
+
* visible `// [warn] step N was dropped` block into the generated script,
|
|
4014
|
+
* so the spec author can see at a glance that the recorded test stopped
|
|
4015
|
+
* exercising part of the spec.
|
|
4016
|
+
*
|
|
4017
|
+
* `insertAfterIndex = -1` means the lost step came before any kept
|
|
4018
|
+
* action; otherwise it's the cleanedActions index whose action precedes
|
|
4019
|
+
* the lost step in spec order. Spec order is canonical for the comment
|
|
4020
|
+
* placement so the warning lands near the steps that DID survive.
|
|
4021
|
+
*/
|
|
4022
|
+
function findEmptySteps(steps, cleanedActions) {
|
|
4023
|
+
const presentStepIds = /* @__PURE__ */ new Set();
|
|
4024
|
+
for (const a of cleanedActions) if (a.stepId) presentStepIds.add(a.stepId);
|
|
4025
|
+
const lastActionIndexByStep = /* @__PURE__ */ new Map();
|
|
4026
|
+
for (let i = 0; i < cleanedActions.length; i++) {
|
|
4027
|
+
const id = cleanedActions[i].stepId;
|
|
4028
|
+
if (id) lastActionIndexByStep.set(id, i);
|
|
4029
|
+
}
|
|
4030
|
+
const notices = [];
|
|
4031
|
+
let lastSeenSurvivorIndex = -1;
|
|
4032
|
+
for (const step of steps) {
|
|
4033
|
+
if (presentStepIds.has(step.id)) {
|
|
4034
|
+
const idx = lastActionIndexByStep.get(step.id);
|
|
4035
|
+
if (idx !== void 0) lastSeenSurvivorIndex = idx;
|
|
4036
|
+
continue;
|
|
4037
|
+
}
|
|
4038
|
+
notices.push({
|
|
4039
|
+
stepId: step.id,
|
|
4040
|
+
source: step.source,
|
|
4041
|
+
insertAfterIndex: lastSeenSurvivorIndex
|
|
4042
|
+
});
|
|
4043
|
+
}
|
|
4044
|
+
return notices;
|
|
4045
|
+
}
|
|
2847
4046
|
async function confirmOverwrite(path) {
|
|
2848
4047
|
if (!process.stdin.isTTY) {
|
|
2849
4048
|
warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
|
|
@@ -2912,16 +4111,39 @@ function reattachStepIds(cleaned, original) {
|
|
|
2912
4111
|
cursor = i + 1;
|
|
2913
4112
|
break;
|
|
2914
4113
|
}
|
|
2915
|
-
|
|
2916
|
-
...c,
|
|
2917
|
-
stepId: matched.stepId
|
|
2918
|
-
});
|
|
2919
|
-
else out.push(c);
|
|
4114
|
+
out.push(matched ? mergeFromOriginal(c, matched) : c);
|
|
2920
4115
|
}
|
|
2921
4116
|
return out;
|
|
2922
4117
|
}
|
|
4118
|
+
/**
|
|
4119
|
+
* Merge a cleaned action back with its original counterpart. Always borrows
|
|
4120
|
+
* `stepId` (the cleanup prompt deliberately doesn't surface it). For `find_*`
|
|
4121
|
+
* actions, *also* re-attach the find-locator cluster if the cleaned copy
|
|
4122
|
+
* dropped any of them — Claude occasionally omits these fields under the
|
|
4123
|
+
* cleanup prompt and we'd otherwise emit a structurally broken action that
|
|
4124
|
+
* codegen has to silently skip.
|
|
4125
|
+
*/
|
|
4126
|
+
function mergeFromOriginal(cleaned, original) {
|
|
4127
|
+
const merged = { ...cleaned };
|
|
4128
|
+
if (original.stepId && !merged.stepId) merged.stepId = original.stepId;
|
|
4129
|
+
if (cleaned.command.startsWith("find_")) {
|
|
4130
|
+
if (!merged.findLocator && original.findLocator) merged.findLocator = original.findLocator;
|
|
4131
|
+
if (!merged.findValue && original.findValue) merged.findValue = original.findValue;
|
|
4132
|
+
if (!merged.findName && original.findName) merged.findName = original.findName;
|
|
4133
|
+
if (merged.findIndex === void 0 && original.findIndex !== void 0) merged.findIndex = original.findIndex;
|
|
4134
|
+
if (!merged.findExact && original.findExact) merged.findExact = original.findExact;
|
|
4135
|
+
}
|
|
4136
|
+
if (original.replayUnstable && !merged.replayUnstable) {
|
|
4137
|
+
merged.replayUnstable = original.replayUnstable;
|
|
4138
|
+
if (original.replayReason) merged.replayReason = original.replayReason;
|
|
4139
|
+
}
|
|
4140
|
+
return merged;
|
|
4141
|
+
}
|
|
2923
4142
|
function sameShape(a, b) {
|
|
2924
|
-
|
|
4143
|
+
if (a.command !== b.command) return false;
|
|
4144
|
+
if (a.command.startsWith("find_") && a.findLocator && b.findLocator) return (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "");
|
|
4145
|
+
if (a.command.startsWith("find_")) return true;
|
|
4146
|
+
return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
|
|
2925
4147
|
}
|
|
2926
4148
|
//#endregion
|
|
2927
4149
|
//#region src/claude/extract-json.ts
|
|
@@ -3164,56 +4386,6 @@ function buildDriftUserPrompt(existing) {
|
|
|
3164
4386
|
});
|
|
3165
4387
|
}
|
|
3166
4388
|
//#endregion
|
|
3167
|
-
//#region src/types.ts
|
|
3168
|
-
const RouteStepSchema = z.object({
|
|
3169
|
-
title: z.string(),
|
|
3170
|
-
action: z.string(),
|
|
3171
|
-
observation: z.string(),
|
|
3172
|
-
status: z.enum([
|
|
3173
|
-
"PASSED",
|
|
3174
|
-
"FAILED",
|
|
3175
|
-
"SKIPPED"
|
|
3176
|
-
]),
|
|
3177
|
-
reason: z.string().optional()
|
|
3178
|
-
});
|
|
3179
|
-
z.object({
|
|
3180
|
-
specName: z.string(),
|
|
3181
|
-
timestamp: z.string(),
|
|
3182
|
-
status: z.enum(["passed", "failed"]),
|
|
3183
|
-
steps: z.array(RouteStepSchema)
|
|
3184
|
-
});
|
|
3185
|
-
const DraftIssueSchema = z.object({
|
|
3186
|
-
severity: z.enum([
|
|
3187
|
-
"OK",
|
|
3188
|
-
"WARN",
|
|
3189
|
-
"ERROR"
|
|
3190
|
-
]),
|
|
3191
|
-
category: z.enum([
|
|
3192
|
-
"assertable",
|
|
3193
|
-
"blocks",
|
|
3194
|
-
"granularity",
|
|
3195
|
-
"unimplemented"
|
|
3196
|
-
]),
|
|
3197
|
-
stepId: z.string().nullable(),
|
|
3198
|
-
message: z.string(),
|
|
3199
|
-
detail: z.string().optional()
|
|
3200
|
-
});
|
|
3201
|
-
const DraftReportSchema = z.object({
|
|
3202
|
-
issues: z.array(DraftIssueSchema),
|
|
3203
|
-
patch: z.string()
|
|
3204
|
-
});
|
|
3205
|
-
const DRAFT_CATEGORY_LABEL = {
|
|
3206
|
-
assertable: "Assertability",
|
|
3207
|
-
blocks: "Block references",
|
|
3208
|
-
granularity: "Step granularity",
|
|
3209
|
-
unimplemented: "Unimplemented checks"
|
|
3210
|
-
};
|
|
3211
|
-
const DraftNamingSchema = z.object({
|
|
3212
|
-
featureName: z.string().min(1),
|
|
3213
|
-
specName: z.string().min(1),
|
|
3214
|
-
reason: z.string().optional()
|
|
3215
|
-
});
|
|
3216
|
-
//#endregion
|
|
3217
4389
|
//#region src/drift/analyze.ts
|
|
3218
4390
|
const DEFAULT_CONCURRENCY$1 = 3;
|
|
3219
4391
|
/**
|