@metasession.co/devaudit-cli 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@metasession.co/devaudit-cli",
3
- "version": "0.1.26",
3
+ "version": "0.1.28",
4
4
  "description": "DevAudit CLI — installs, syncs, and operates the Metasession SDLC across consumer projects.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "dependencies": {
35
35
  "@clack/prompts": "^0.8.2",
36
- "@metasession.co/devaudit-plugin-sdk": "^0.1.26",
36
+ "@metasession.co/devaudit-plugin-sdk": "^0.1.28",
37
37
  "commander": "^12.1.0",
38
38
  "consola": "^3.2.3",
39
39
  "env-paths": "^3.0.0",
@@ -6,11 +6,22 @@
6
6
  # VERSION=$(./scripts/derive-release-version.sh)
7
7
  #
8
8
  # Priority:
9
- # 1. REQ tag in commit subject: "[REQ-037] feat(kitchen): ..." -> REQ-037
10
- # 2. Ref in commit body: "Ref: REQ-037" -> REQ-037
11
- # 3. Bracketed tag in commit body: merge commit whose body is the PR title
12
- # "... [REQ-037] ..." -> REQ-037
13
- # 4. Fallback: bare date -> v2026.05.17
9
+ # 1. REQ tag in commit subject: "[REQ-037] feat(kitchen): ..." -> REQ-037
10
+ # 2. Ref in commit body: "Ref: REQ-037" -> REQ-037
11
+ # 3. Bracketed tag in commit body: merge commit whose body is the PR title
12
+ # "... [REQ-037] ..." -> REQ-037
13
+ # 4. Pending release ticket on disk: exactly one
14
+ # compliance/pending-releases/RELEASE-TICKET-REQ-XXX.md
15
+ # -> REQ-XXX
16
+ # 5. Fallback: bare date -> v2026.05.17
17
+ #
18
+ # Step 4 (DevAudit-Installer#92) handles `chore:` / `docs:` / `ci:`
19
+ # commits (e.g. a `devaudit update` sync) landing on the integration
20
+ # branch between feature merge and release-PR open. Such a commit has
21
+ # no REQ tag in its message → steps 1-3 fall through. The release
22
+ # ticket on disk is a stronger explicit-operator-state signal than the
23
+ # bare date — when exactly one ticket is open, attribute to it.
24
+ # Multiple open tickets stays ambiguous → bare-date fallback.
14
25
  #
15
26
  # The id is taken from a bracketed [REQ-XXX] tag (subject or body) or the
16
27
  # `Ref:` line — NOT from unbracketed prose (e.g. "target close: REQ-002" must
@@ -53,5 +64,24 @@ if echo "$BODY" | grep -qE '\[REQ-[0-9]+\]'; then
53
64
  exit 0
54
65
  fi
55
66
 
56
- # 4. Fallback: bare date in UTC
67
+ # 4. Pending release ticket on disk: when exactly one
68
+ # `compliance/pending-releases/RELEASE-TICKET-REQ-*.md` is present, the
69
+ # operator's explicit state says THIS is the in-flight release. Use it.
70
+ # Zero or multiple → ambiguous, fall through to the bare date.
71
+ # DevAudit-Installer#92.
72
+ if [ -d compliance/pending-releases ]; then
73
+ # NUL-delimited count so filenames with spaces don't trip us up.
74
+ TICKET_COUNT=$(find compliance/pending-releases -maxdepth 1 -type f \
75
+ -name 'RELEASE-TICKET-REQ-*.md' -print0 2>/dev/null \
76
+ | tr -cd '\0' | wc -c)
77
+ if [ "$TICKET_COUNT" = "1" ]; then
78
+ find compliance/pending-releases -maxdepth 1 -type f \
79
+ -name 'RELEASE-TICKET-REQ-*.md' -print 2>/dev/null \
80
+ | head -1 | xargs -n1 basename \
81
+ | sed -E 's/^RELEASE-TICKET-(REQ-[0-9]+)\.md$/\1/'
82
+ exit 0
83
+ fi
84
+ fi
85
+
86
+ # 5. Fallback: bare date in UTC
57
87
  echo "v$(date -u +%Y.%m.%d)"
@@ -111,6 +111,49 @@ make_fixture "$WORK/c8" "Merge pull request #7 from metasession-dev/feat/req-002
111
111
  chore(deps): [REQ-002] dependency hardening — close R-001"
112
112
  assert_eq "merge-commit body [REQ-002] -> REQ-002" "REQ-002" "$(run_helper)"
113
113
 
114
+ # Case 9 (DevAudit-Installer#92): a chore: sync commit has no REQ tag in
115
+ # its message but a pending release ticket exists on disk. Must attribute
116
+ # to the REQ from that ticket instead of falling through to the bare date.
117
+ # Regression for REQ-051 / REQ-052 gate evidence landing on phantom date
118
+ # releases when a `chore: devaudit update to 0.1.x` commit landed between
119
+ # the feature merge and the release-PR open on wawagardenbar-app.
120
+ make_fixture "$WORK/c9" "chore: devaudit update to 0.1.27"
121
+ mkdir -p compliance/pending-releases
122
+ cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
123
+ # Release Ticket: REQ-051
124
+
125
+ **Status:** TESTED - PENDING SIGN-OFF
126
+ TICKET
127
+ assert_eq "chore: sync + single pending ticket REQ-051 -> REQ-051" "REQ-051" "$(run_helper)"
128
+
129
+ # Case 10: a chore: sync commit with MULTIPLE pending tickets — ambiguous,
130
+ # stays at the bare-date fallback (don't guess between them).
131
+ make_fixture "$WORK/c10" "chore: devaudit update to 0.1.27"
132
+ mkdir -p compliance/pending-releases
133
+ cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
134
+ # Release Ticket: REQ-051
135
+ TICKET
136
+ cat > compliance/pending-releases/RELEASE-TICKET-REQ-052.md <<'TICKET'
137
+ # Release Ticket: REQ-052
138
+ TICKET
139
+ assert_eq "chore: sync + two pending tickets -> bare date $TODAY" "$TODAY" "$(run_helper)"
140
+
141
+ # Case 11: a chore: sync commit + no compliance/pending-releases dir at
142
+ # all — still falls back to the bare date (the directory may be missing
143
+ # for projects that haven't started their first tracked release yet).
144
+ make_fixture "$WORK/c11" "chore: devaudit update to 0.1.27"
145
+ assert_eq "chore: sync + no pending dir -> bare date $TODAY" "$TODAY" "$(run_helper)"
146
+
147
+ # Case 12: a feat: commit with a [REQ-XXX] tag in the subject AND a
148
+ # pending ticket for a DIFFERENT REQ. Subject wins (step 1 returns
149
+ # before step 4 fires).
150
+ make_fixture "$WORK/c12" "[REQ-099] feat: in-flight feature for a different REQ"
151
+ mkdir -p compliance/pending-releases
152
+ cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
153
+ # Release Ticket: REQ-051
154
+ TICKET
155
+ assert_eq "subject [REQ-099] beats pending REQ-051 -> REQ-099" "REQ-099" "$(run_helper)"
156
+
114
157
  echo ""
115
158
  echo "=== Summary: $PASS pass / $FAIL fail ==="
116
159
 
@@ -87,6 +87,8 @@ The bootstrap workflow:
87
87
 
88
88
  10. **Offer a CI job** — write the YAML (or equivalent) for the project's CI system, but **do not commit it without confirmation**. Show it inline first. On a **DevAudit** project, `.github/workflows/ci.yml` is generated and marked do-not-edit-manually — don't hand-edit it; instead drive the E2E gate from `sdlc-config.json`. If the suite must run against a **disposable local database** (the rule on any project with no separate test instance — never test against prod), set `e2e_setup_command` (e.g. `supabase start` + load schema + seed) and `e2e_env` (e.g. `E2E_LOCAL=1`, local coords, a dummy email key) so the gate severs production. See [Local-database E2E in CI](https://github.com/metasession-dev/DevAudit-Installer/blob/main/docs/e2e-local-db-ci.md), then `devaudit update` to regenerate.
89
89
 
90
+ **Upload both artefact shapes.** Playwright writes per-test artefacts to *two* places: `test-results/<spec>-<title>[-retryN]/{trace.zip, video.webm, *.png, error-context.md}` — **spec-named**, human-mappable — and `playwright-report/data/<content-hash>.zip` — **hash-named**, indexed by the HTML report. Ensure the project's CI uploads **both** `playwright-report/` (for the HTML viewer) and `test-results/` (for spec-named traces / videos / error-context). If only one is uploaded, propose a small follow-up PR to add the other — it costs ~80 MB of artefact storage and saves the operator from walking the HTML report's hash index to find a specific trace.
91
+
90
92
  11. **Write a short README** in the test directory explaining structure, how to run, how to add new tests, and how to update visual baselines. Future contributors (and the skill itself, on next invocation) will thank you.
91
93
 
92
94
  After bootstrap, if there's a change to test, continue to Phase 2 as normal. If the user only wanted the suite set up with no specific change in mind, stop here with a final summary.
@@ -157,6 +159,7 @@ Write the tests in the project's existing style.
157
159
  - **Reuse existing helpers.** Page Object Models, fixtures, custom commands, test-data factories — use them. Don't invent parallel infrastructure.
158
160
  - **Match the assertion style.** If the codebase uses `expect(locator).toBeVisible()`, don't switch to `assert.isTrue(...)`.
159
161
  - **Read 2–3 nearby tests before writing.** Fastest way to absorb conventions you wouldn't have noticed otherwise.
162
+ - **Check `references/common-patterns.md` before writing role-based locators** for component-library UI (shadcn/ui, Radix, MUI, etc.). A short appendix of known framework × library gotchas — `CardTitle` is a `<div>` not a heading; Radix `<Select>` renders two `role="combobox"` nodes; Next.js `<Link>` clicks don't fire network requests — saves a round-trip through a failing selector each time.
160
163
 
161
164
  For **visual regression** specifically:
162
165
  - New tests need baseline images. Generate them, but **do not auto-approve** — surface them for the user to verify before they're committed.
@@ -168,15 +171,23 @@ Do additions, updates, and (approved) deletions in the same change so the suite
168
171
 
169
172
  Run the suite. Strategy:
170
173
 
171
- 1. **Run the new and updated tests first** in isolation if the framework supports filtering. Fast feedback on whether your tests themselves work.
172
- 2. **Then run the full suite** to catch regressions outside the changed area.
173
- 3. **For visual regression**, run the project's normal comparison mode against existing baselines.
174
+ 1. **Iterate focused.** During fix-and-verify, run only the failing specs (`--grep`, spec-path args, or a CI input that scopes to a subset). Cycle time is what makes the loop tractable — full regression for every iteration burns CI budget and operator patience. Expect to loop: fix focused run → fix → focused run, many times.
175
+ 2. **Run full regression once, at the end.** Once the focused set is green, run the full suite to catch unintended side effects in untouched areas.
176
+ 3. **For CI-driven verification, ensure the workflow accepts a subset input.** A `workflow_dispatch.inputs.specs` (or equivalent) lets a developer fire a scoped run without local infrastructure. Recommend setting this up if the project doesn't have it the speed-up (~5–10 min vs ~30–60 min) is the difference between a tractable loop and a hated one.
177
+ 4. **For visual regression**, run the project's normal comparison mode against existing baselines.
174
178
 
175
179
  Triage every failure into one of these buckets *before* taking any action:
176
180
 
181
+ **0. Read the page snapshot first.** Modern Playwright writes `test-results/<spec>-<title>[-retryN]/error-context.md` — a markdown accessibility-tree snapshot of the page at failure time. It's enough to triage selector / role / wait-condition failures without extracting the trace zip. Reach for the trace only when the snapshot is ambiguous (e.g. the failure depends on a transition or a network race the snapshot can't show).
182
+
183
+ **Filter for all terminal failure statuses.** `failed` and `timedOut` are distinct in Playwright's JSON reporter; `interrupted` is also possible. When summarising failures from `reporter=json` output, use `select(.status == "failed" or .status == "timedOut" or .status == "interrupted")` — `select(.status == "failed")` alone hides hung tests.
184
+
185
+ Then bucket each failure:
186
+
177
187
  - **Flake** — non-deterministic; passes on rerun. Rerun once. If it passes, note it. If it keeps flaking, flag it but don't file a noisy bug.
178
188
  - **Test bug** — your test is wrong (bad selector, wrong assertion, timing). Fix the test; don't file anything.
179
189
  - **Application defect** — the app does the wrong thing. File it.
190
+ - **Seed-data gap** — the page works, the test's assertion is correct, but the seeded fixture doesn't satisfy the assertion (empty table, no transactions for the day, missing user role). Fix the seed script (or the test's own setup), not the test logic or the product.
180
191
  - **Visual diff — intended** — the snapshot changed because the change intentionally changed the UI. Update the baseline and surface it for user approval.
181
192
  - **Visual diff — unintended** — a snapshot changed somewhere the change shouldn't have affected. File it as a regression.
182
193
 
@@ -0,0 +1,98 @@
1
+ # Common framework × component-library patterns
2
+
3
+ Known gotchas that bite Playwright role-based and text-based locators when the
4
+ SUT uses shadcn/ui, Radix UI, Next.js, or similar component libraries. Each
5
+ entry: the **symptom** an operator hits, the **cause** (why the obvious locator
6
+ misses), the **locator that works**.
7
+
8
+ These are the patterns the e2e-test-engineer skill has tripped over in real
9
+ release-suite triage cycles. The list is intentionally short — only patterns
10
+ that have produced an actual failed-locator triage at least once. Add to this
11
+ file when a new framework × library combination produces a recurrent miss.
12
+
13
+ ## shadcn `CardTitle` is a `<div>`, not a heading
14
+
15
+ **Symptom.** `await page.getByRole('heading', { name: /Units of Measurement/i })`
16
+ returns zero matches even though the card visibly shows that title.
17
+
18
+ **Cause.** shadcn/ui's `CardTitle` component renders as a styled `<div>`, not
19
+ `<h1..h6>`. There is no heading role to match.
20
+
21
+ **Locator that works.**
22
+
23
+ ```ts
24
+ // Prefer a data-testid on the card.
25
+ await page.getByTestId('card-units-of-measurement').getByText('Units of Measurement');
26
+ // Or scope by exact text inside the card's title slot.
27
+ await page.locator('[data-slot="card-title"]', { hasText: 'Units of Measurement' });
28
+ ```
29
+
30
+ If the project owns the card markup, the cheap fix is to wrap the title text
31
+ in a real `<h2>` (or pass `as="h2"`) so role-based locators keep working.
32
+
33
+ ## Radix `<Select>` renders two `role="combobox"` nodes
34
+
35
+ **Symptom.** `page.getByRole('combobox').nth(2).click()` collapses onto the
36
+ wrong target when a sibling field is added or reordered.
37
+
38
+ **Cause.** Radix's `<Select>` renders the visible trigger **and** a hidden
39
+ accessibility companion — both report `role="combobox"`. A form with N selects
40
+ has 2N matching nodes, and positional `.nth()` indices shift unpredictably as
41
+ fields are added.
42
+
43
+ **Locator that works.**
44
+
45
+ ```ts
46
+ // Anchor by the associated label, not by position.
47
+ await page.getByLabel('Payment method').click();
48
+ // Or by a data-testid on the trigger:
49
+ await page.getByTestId('select-payment-method').click();
50
+ ```
51
+
52
+ Avoid `.nth(N)` on any role that a Radix primitive renders twice (`combobox`,
53
+ `listbox`, `dialog` in some variants).
54
+
55
+ ## Next.js `<Link>` clicks don't trigger network requests
56
+
57
+ **Symptom.** `await page.waitForLoadState('networkidle')` returns immediately
58
+ after `await link.click()`, before the URL has actually changed; the next
59
+ assertion runs against the previous page's DOM.
60
+
61
+ **Cause.** Next.js's `<Link>` performs client-side route transitions via the
62
+ App Router — no network round-trip, so `networkidle` was already idle.
63
+
64
+ **Locator that works.**
65
+
66
+ ```ts
67
+ await Promise.all([
68
+ page.waitForURL(/\/inventory\/snapshots/),
69
+ page.getByRole('link', { name: 'View snapshots' }).click(),
70
+ ]);
71
+ // Or, after the click:
72
+ await page.waitForURL(/\/inventory\/snapshots/);
73
+ ```
74
+
75
+ `waitForURL` is the right primitive for any client-side navigation (Next.js,
76
+ React Router, Vue Router, SvelteKit). `networkidle` is for full-page loads.
77
+
78
+ ## Button-with-Badge in `CardTitle` breaks `getByText(…, { exact: true })`
79
+
80
+ **Symptom.** `await page.getByText('Filter Tabs', { exact: true })` returns
81
+ zero matches on a card whose title visibly reads "Filter Tabs".
82
+
83
+ **Cause.** The title slot contains `<icon> Filter Tabs <Badge>1</Badge>` — the
84
+ badge's text content concatenates into the parent's text, so the exact match
85
+ is against `"Filter Tabs1"`, not `"Filter Tabs"`.
86
+
87
+ **Locator that works.**
88
+
89
+ ```ts
90
+ // Drop exact: when the title has decorative siblings.
91
+ await page.getByText('Filter Tabs');
92
+ // Or scope by the specific text node.
93
+ await page.locator('[data-slot="card-title"]').filter({ hasText: 'Filter Tabs' });
94
+ ```
95
+
96
+ The same pattern bites any title slot that mixes a text label with a
97
+ count-badge, status pill, or icon-with-tooltip sibling. Default to `exact: false`
98
+ for component-library titles, and use `getByTestId` when ambiguity is real.
@@ -91,6 +91,41 @@ Runs **first**, before any `REQ-XXX` is assigned. It decides which of the six ch
91
91
 
92
92
  Only the **tracked** route continues into Phase 1; the others run the Lightweight path below. The off-ramps are deliberate — dragging housekeeping through tracked-change machinery it doesn't need is exactly the failure mode this step exists to prevent — but they are still **driven to completion**, never dumped as a checklist for the operator to run alone.
93
93
 
94
+ **Worked examples** (one per change-type the skill keeps mis-routing without one):
95
+
96
+ *Tracked feature — REQ-XXX assigned*
97
+
98
+ > - **Change type:** Feature
99
+ > - **Commit type:** feat
100
+ > - **Requirement:** REQ-XXX (new)
101
+ > - **Risk:** MEDIUM
102
+ > - **Path:** Full SDLC Stages 1–5
103
+ > - **Gates/evidence:** plan + RTM row + unit/integration/e2e evidence + UAT four-eyes + Production approval
104
+ > - **Your approvals:** UAT four-eyes + Production approval
105
+ > - **Skipped:** none
106
+
107
+ *Test fix surfaced by suite drift*
108
+
109
+ > - **Change type:** Housekeeping (test maintenance)
110
+ > - **Commit type:** test
111
+ > - **Requirement:** none
112
+ > - **Risk:** LOW
113
+ > - **Path:** Lightweight (gates → PR review → merge)
114
+ > - **Gates/evidence:** quality-gates smoke (default CI Gate 4); no full regression on PR (run via `workflow_dispatch` while iterating, full regression on next nightly)
115
+ > - **Your approvals:** PR review only
116
+ > - **Skipped:** RTM, evidence pack, UAT four-eyes, Production approval
117
+
118
+ *Workflow tweak (CI artifact upload, gate timeout bump, etc.)*
119
+
120
+ > - **Change type:** Housekeeping (CI maintenance)
121
+ > - **Commit type:** ci
122
+ > - **Requirement:** none
123
+ > - **Risk:** LOW
124
+ > - **Path:** Lightweight (gates → verify-via-dispatch → PR review → merge)
125
+ > - **Gates/evidence:** quality-gates smoke + a `gh workflow run <file> --ref <branch>` on the modified workflow before merge (silent CI regressions are the failure mode this catches)
126
+ > - **Your approvals:** PR review only
127
+ > - **Skipped:** RTM, evidence pack, UAT four-eyes, Production approval
128
+
94
129
  ### Lightweight path (housekeeping / trivial / compliance-doc-only)
95
130
 
96
131
  Reached from Phase 0 for non-tracked change-types. The skill drives this end-to-end; the only difference from the tracked cycle is the absence of *ceremony*, not the absence of *guidance*. It pauses only where a human is genuinely required (PR review, merge).
@@ -100,9 +135,10 @@ Reached from Phase 0 for non-tracked change-types. The skill drives this end-to-
100
135
  3. **Run all gates locally** (`npm run lint`, `npx tsc --noEmit`, the test suite, `semgrep`, `npm audit` — or the stack-adapter equivalents). Trivial ≠ unverified; never `--no-verify`.
101
136
  4. **Commit** with a housekeeping type and **no** `REQ-XXX` — `docs:` / `chore:` / `ci:` / `build:` / `test:` / `revert:` are exempt from the `[REQ-XXX]` rule; a `compliance:` doc-only change references the existing REQ. `Co-Authored-By: Claude` if AI-assisted.
102
137
  5. **Push and open the PR** into `$INTEGRATION_BRANCH` (`gh pr create --base "$INTEGRATION_BRANCH" --head <branch>`). CI runs the same quality gates; `compliance-validation.yml` finds no `REQ-XXX` and skips artifact validation.
103
- 6. **Report honest status** wait for CI, name any failing check, fix and re-push. Never announce "ready" while a required check is red.
104
- 7. **Guide review merge.** A human still reviews the PR (separation of duties). There is **no** portal release approval, no UAT four-eyes, no Production gate, and no close-out. Merge once CI is green and the reviewer approves.
105
- 8. **Done.** A housekeeping push produces at most a bare-date release (`vYYYY.MM.DD`) with no approval gate; a doc-only push attaches its docs to the existing `REQ-XXX` release. No further action required report completion and stop.
138
+ 6. **For `ci:` changes, verify-via-dispatch before merging.** `gh workflow run <workflow.yml> --ref <branch>` fires the modified workflow against the PR branch. If the change broke a step, the dispatch run fails loudly and you fix-forward *before* the merge ships the broken gate to `$INTEGRATION_BRANCH`. This is the cheapest insurance against silent CI regressions — a `ci:` change that breaks a gate is most damaging *after* it lands.
139
+ 7. **Report honest status** wait for CI, name any failing check, fix and re-push. Never announce "ready" while a required check is red.
140
+ 8. **Guide review → merge.** A human still reviews the PR (separation of duties). There is **no** portal release approval, no UAT four-eyes, no Production gate, and no close-out. Merge once CI is green and the reviewer approves.
141
+ 9. **Done.** A housekeeping push produces at most a bare-date release (`vYYYY.MM.DD`) with no approval gate; a doc-only push attaches its docs to the existing `REQ-XXX` release. No further action required — report completion and stop.
106
142
 
107
143
  ### Phase 1 — Plan (SDLC stage 1)
108
144
 
@@ -127,13 +163,17 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
127
163
  - CRITICAL — HIGH plus targeted security tests (authz bypass attempts, input fuzzing where applicable).
128
164
  3. **For any e2e or visual-regression test work in this step, invoke `e2e-test-engineer`** — do not author e2e tests directly. The orchestrator passes the implementation plan + the diff so far to the e2e-test-engineer skill, which derives scenarios, reconciles with the existing pack, and runs the suite.
129
165
  4. **Implement against the plan.** Reference `compliance/plans/REQ-XXX/implementation-plan.md` as you go. Any deviation from the plan must be noted in the plan itself under a `## Plan deviation` section — never silently diverge.
130
- 5. **Run all gates locally** before pushing:
166
+ 5. **Run gates locally, cheap-first.** The gates are not equivalent-cost — `npm run lint` is seconds, `npx playwright test` is 30–60 minutes. Iterate on the fast gates; spend the e2e cost once.
167
+
168
+ **Fast gates** (run on every change, ideally pre-commit):
131
169
  - `npm run lint` (or stack-adapter equivalent)
132
170
  - `npx tsc --noEmit` (or stack-adapter equivalent)
133
171
  - `npx vitest run` (unit/integration)
134
- - `npx playwright test` (e2e — delegated to `e2e-test-engineer`)
135
172
  - `semgrep scan --config auto`
136
173
  - `npm audit --audit-level=high` (or stack-adapter equivalent)
174
+
175
+ **E2E gate** — run *once*, after the fast gates are clean:
176
+ - `npx playwright test` (delegated to `e2e-test-engineer`, which has its own focused-iteration discipline for within-e2e fix-and-verify loops)
137
177
  6. **On gate failure**, iterate up to N=3 attempts. Each iteration: read the failure output, propose a fix, apply, re-run. On exhausted attempts, halt with the full failure output and surface to the human — never use `--no-verify`, `eslint-disable`, `@ts-expect-error`, `xfail`, or any other bypass.
138
178
  7. **Commit** using Conventional Commits with `Ref: REQ-XXX` trailer and `Co-Authored-By: Claude` trailer. One commit per logical step; never amend a commit that's already been pushed.
139
179
  8. **Land the work on `$INTEGRATION_BRANCH`.** Push the feature branch, then:
@@ -150,9 +190,12 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
150
190
  compliance/evidence/REQ-XXX/
151
191
  ├── YYYY-MM-DD_e2e-results.json
152
192
  ├── YYYY-MM-DD_playwright-report/
193
+ ├── YYYY-MM-DD_traces/ ← per-test trace.zip + error-context.md
153
194
  ├── YYYY-MM-DD_unit-coverage/
154
195
  └── YYYY-MM-DD_screenshots/*.png
155
196
  ```
197
+
198
+ Copy Playwright's `test-results/` folder verbatim into `YYYY-MM-DD_traces/` so trace-by-test-name is available for audit without walking the HTML report's hash-name index. For HIGH/CRITICAL releases the traces are part of the audit trail — *"what state was the page in when test X failed and was overridden?"* answers in one `ls` instead of an HTML-report walk.
156
199
  3. **Upload each artefact to the portal**:
157
200
  ```bash
158
201
  devaudit push <project-slug> REQ-XXX <evidence-type> <file> \
@@ -181,10 +224,19 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
181
224
  - Test plan
182
225
  - SDLC checklist
183
226
  2. **Verify the UAT reviewer ≠ skill-trigger user** for HIGH/CRITICAL. If they match, halt with a configuration error: "HIGH/CRITICAL risk requires an independent UAT reviewer; the configured reviewer matches the trigger user — fix the four-eyes attestation slot in the implementation plan and re-run."
227
+
228
+ **Solo-operator teams.** On a one-person team, the literal "reviewer ≠ submitter" check is structurally unsatisfiable. The supported interpretation is *actor type, not human identity* — AI tooling (the skill-trigger) and the human operator (the portal-approver) are distinct actors. Document this on the release ticket under `## Sign-off (dual-actor)` with the explicit interpretation, and ensure the human operator has independently reviewed the diff before clicking *Approve Production* in the portal. Without this attestation the four-eyes claim is performative.
184
229
  3. **Apply labels** — `awaiting-uat-review`, `risk:<class>`.
185
230
  4. **Comment on the issue**: "Implementation complete. PR #M opened. Evidence on portal: <link>. UAT review requested. Resume with `resume REQ-XXX` once UAT approval is granted on the portal."
186
231
  5. **Hard stop.** Phase 4 ends here. Do not proceed to merge; the human's next action is reviewing on the portal.
187
232
 
233
+ **When an external gate hangs or fails for unrelated reasons.** A required gate may fail for reasons outside the change's scope — flaky infra, an unrelated regression test that hangs at hour-plus runtime with no log activity, a known-failing suite. When this happens:
234
+
235
+ 1. **Verify it's actually unrelated.** Read the failure (or the lack of one). If it's the change's fault, fix it; this section does not apply.
236
+ 2. **Document the rationale on the PR.** A sticky comment naming: which gate, what the failure was, why it's unrelated to the change, what the safety net is (nightly run on `$INTEGRATION_BRANCH`, post-deploy verification, etc.).
237
+ 3. **Cancel-and-admin-merge is allowed** when **all three** hold: (a) ≥3 other required gates are green, (b) the change has no scope-overlap with the failing gate (e.g. service-layer fix vs hung UI e2e, or an `E2E: N/A by scope` test-plan), and (c) a fallback verification exists (nightly e2e on `$INTEGRATION_BRANCH`, post-deploy smoke, etc.). If any of the three fail, hold the merge and surface the blocker to the operator.
238
+ 4. **Record the decision in the release ticket.** The release ticket's `## Verification` section must mention the cancelled gate by run-ID and the fallback that justifies bypassing it. Auditors look here first.
239
+
188
240
  ### Phase 5 — Finalise or change-request loop (SDLC stage 5)
189
241
 
190
242
  Invoked separately by the user after UAT activity on the portal. Trigger: "resume REQ-XXX", "REQ-XXX UAT done", or just re-firing the skill on the same issue.
@@ -372,10 +372,16 @@ jobs:
372
372
  --category test_report ${FLAGS}
373
373
  fi
374
374
 
375
- # Upload test summary report (test_report category)
375
+ # Upload test summary report — precise evidence_type=test_report
376
+ # (was compliance_document). The portal's Compliance Gates panel
377
+ # filters by evidence_type, so the markdown summary belongs in the
378
+ # Test Reports gate alongside playwright-report.zip + coverage
379
+ # summary. Markdown renders inline (MarkdownRenderer); auditor
380
+ # reads pass/fail counts + narrative without downloading the zip.
381
+ # devaudit#370 follow-up.
376
382
  if [ -f "compliance/test-summary-report.md" ]; then
377
383
  upload test-summary-report.md \
378
- {{PROJECT_SLUG}} _compliance-docs compliance_document compliance/test-summary-report.md \
384
+ {{PROJECT_SLUG}} _compliance-docs test_report compliance/test-summary-report.md \
379
385
  --category test_report ${FLAGS}
380
386
  fi
381
387
 
@@ -125,8 +125,9 @@ jobs:
125
125
  DERIVED_META=()
126
126
  [ -n "$DERIVED_CT" ] && DERIVED_META+=(--change-type "$DERIVED_CT")
127
127
 
128
- # Upload compliance docs (planning category)
129
- for DOC in compliance/RTM.md compliance/test-plan.md compliance/test-cases.md compliance/test-summary-report.md; do
128
+ # Upload planning docs (RTM / Test Plan / Test Cases) as
129
+ # compliance_document they surface under the Documents tab.
130
+ for DOC in compliance/RTM.md compliance/test-plan.md compliance/test-cases.md; do
130
131
  if [ -f "$DOC" ]; then
131
132
  echo "Uploading: $(basename "$DOC")"
132
133
  bash scripts/upload-evidence.sh \
@@ -137,6 +138,19 @@ jobs:
137
138
  fi
138
139
  done
139
140
 
141
+ # Test summary report — precise evidence_type=test_report so it
142
+ # lands in the portal's Test Reports gate (rendered inline by the
143
+ # MarkdownRenderer). devaudit#370 follow-up; same change applied
144
+ # in ci.yml's gate-evidence upload step.
145
+ if [ -f "compliance/test-summary-report.md" ]; then
146
+ echo "Uploading: test-summary-report.md (test_report type)"
147
+ bash scripts/upload-evidence.sh \
148
+ {{PROJECT_SLUG}} _compliance-docs test_report compliance/test-summary-report.md \
149
+ --category test_report ${FLAGS} --release "${DERIVED_RELEASE}" \
150
+ "${DERIVED_META[@]}" \
151
+ || echo "Warning: Failed to upload test-summary-report.md"
152
+ fi
153
+
140
154
  # Project-level governance docs (devaudit#370 Phase 3a). When the
141
155
  # operator commits any of these markdown files, upload with the
142
156
  # precise evidence_type so the portal's framework-coverage matrix