npm - @metasession.co/devaudit-cli - Versions diffs - 0.1.26 → 0.1.28 - Mend

@metasession.co/devaudit-cli 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@metasession.co/devaudit-cli",
-  "version": "0.1.26",
+  "version": "0.1.28",
   "description": "DevAudit CLI — installs, syncs, and operates the Metasession SDLC across consumer projects.",
   "type": "module",
   "bin": {
@@ -33,7 +33,7 @@
   },
   "dependencies": {
     "@clack/prompts": "^0.8.2",
-    "@metasession.co/devaudit-plugin-sdk": "^0.1.26",
+    "@metasession.co/devaudit-plugin-sdk": "^0.1.28",
     "commander": "^12.1.0",
     "consola": "^3.2.3",
     "env-paths": "^3.0.0",

package/sdlc/files/_common/scripts/derive-release-version.sh CHANGED Viewed

@@ -6,11 +6,22 @@
 #   VERSION=$(./scripts/derive-release-version.sh)
 #
 # Priority:
-#   1. REQ tag in commit subject:   "[REQ-037] feat(kitchen): ..." -> REQ-037
-#   2. Ref in commit body:          "Ref: REQ-037"                 -> REQ-037
-#   3. Bracketed tag in commit body: merge commit whose body is the PR title
-#                                    "... [REQ-037] ..."           -> REQ-037
-#   4. Fallback:                    bare date                      -> v2026.05.17
+#   1. REQ tag in commit subject:     "[REQ-037] feat(kitchen): ..." -> REQ-037
+#   2. Ref in commit body:            "Ref: REQ-037"                 -> REQ-037
+#   3. Bracketed tag in commit body:   merge commit whose body is the PR title
+#                                      "... [REQ-037] ..."           -> REQ-037
+#   4. Pending release ticket on disk: exactly one
+#                                      compliance/pending-releases/RELEASE-TICKET-REQ-XXX.md
+#                                                                    -> REQ-XXX
+#   5. Fallback:                      bare date                      -> v2026.05.17
+#
+# Step 4 (DevAudit-Installer#92) handles `chore:` / `docs:` / `ci:`
+# commits (e.g. a `devaudit update` sync) landing on the integration
+# branch between feature merge and release-PR open. Such a commit has
+# no REQ tag in its message → steps 1-3 fall through. The release
+# ticket on disk is a stronger explicit-operator-state signal than the
+# bare date — when exactly one ticket is open, attribute to it.
+# Multiple open tickets stays ambiguous → bare-date fallback.
 #
 # The id is taken from a bracketed [REQ-XXX] tag (subject or body) or the
 # `Ref:` line — NOT from unbracketed prose (e.g. "target close: REQ-002" must
@@ -53,5 +64,24 @@ if echo "$BODY" | grep -qE '\[REQ-[0-9]+\]'; then
   exit 0
 fi
-# 4. Fallback: bare date in UTC
+# 4. Pending release ticket on disk: when exactly one
+# `compliance/pending-releases/RELEASE-TICKET-REQ-*.md` is present, the
+# operator's explicit state says THIS is the in-flight release. Use it.
+# Zero or multiple → ambiguous, fall through to the bare date.
+# DevAudit-Installer#92.
+if [ -d compliance/pending-releases ]; then
+  # NUL-delimited count so filenames with spaces don't trip us up.
+  TICKET_COUNT=$(find compliance/pending-releases -maxdepth 1 -type f \
+    -name 'RELEASE-TICKET-REQ-*.md' -print0 2>/dev/null \
+    | tr -cd '\0' | wc -c)
+  if [ "$TICKET_COUNT" = "1" ]; then
+    find compliance/pending-releases -maxdepth 1 -type f \
+      -name 'RELEASE-TICKET-REQ-*.md' -print 2>/dev/null \
+      | head -1 | xargs -n1 basename \
+      | sed -E 's/^RELEASE-TICKET-(REQ-[0-9]+)\.md$/\1/'
+    exit 0
+  fi
+fi
+# 5. Fallback: bare date in UTC
 echo "v$(date -u +%Y.%m.%d)"

package/sdlc/files/_common/scripts/derive-release-version.test.sh CHANGED Viewed

@@ -111,6 +111,49 @@ make_fixture "$WORK/c8" "Merge pull request #7 from metasession-dev/feat/req-002
 chore(deps): [REQ-002] dependency hardening — close R-001"
 assert_eq "merge-commit body [REQ-002] -> REQ-002" "REQ-002" "$(run_helper)"
+# Case 9 (DevAudit-Installer#92): a chore: sync commit has no REQ tag in
+# its message but a pending release ticket exists on disk. Must attribute
+# to the REQ from that ticket instead of falling through to the bare date.
+# Regression for REQ-051 / REQ-052 gate evidence landing on phantom date
+# releases when a `chore: devaudit update to 0.1.x` commit landed between
+# the feature merge and the release-PR open on wawagardenbar-app.
+make_fixture "$WORK/c9" "chore: devaudit update to 0.1.27"
+mkdir -p compliance/pending-releases
+cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
+# Release Ticket: REQ-051
+**Status:** TESTED - PENDING SIGN-OFF
+TICKET
+assert_eq "chore: sync + single pending ticket REQ-051 -> REQ-051" "REQ-051" "$(run_helper)"
+# Case 10: a chore: sync commit with MULTIPLE pending tickets — ambiguous,
+# stays at the bare-date fallback (don't guess between them).
+make_fixture "$WORK/c10" "chore: devaudit update to 0.1.27"
+mkdir -p compliance/pending-releases
+cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
+# Release Ticket: REQ-051
+TICKET
+cat > compliance/pending-releases/RELEASE-TICKET-REQ-052.md <<'TICKET'
+# Release Ticket: REQ-052
+TICKET
+assert_eq "chore: sync + two pending tickets -> bare date $TODAY" "$TODAY" "$(run_helper)"
+# Case 11: a chore: sync commit + no compliance/pending-releases dir at
+# all — still falls back to the bare date (the directory may be missing
+# for projects that haven't started their first tracked release yet).
+make_fixture "$WORK/c11" "chore: devaudit update to 0.1.27"
+assert_eq "chore: sync + no pending dir -> bare date $TODAY" "$TODAY" "$(run_helper)"
+# Case 12: a feat: commit with a [REQ-XXX] tag in the subject AND a
+# pending ticket for a DIFFERENT REQ. Subject wins (step 1 returns
+# before step 4 fires).
+make_fixture "$WORK/c12" "[REQ-099] feat: in-flight feature for a different REQ"
+mkdir -p compliance/pending-releases
+cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
+# Release Ticket: REQ-051
+TICKET
+assert_eq "subject [REQ-099] beats pending REQ-051 -> REQ-099" "REQ-099" "$(run_helper)"
 echo ""
 echo "=== Summary: $PASS pass / $FAIL fail ==="

package/sdlc/files/_common/skills/e2e-test-engineer/SKILL.md CHANGED Viewed

@@ -87,6 +87,8 @@ The bootstrap workflow:
 10. **Offer a CI job** — write the YAML (or equivalent) for the project's CI system, but **do not commit it without confirmation**. Show it inline first. On a **DevAudit** project, `.github/workflows/ci.yml` is generated and marked do-not-edit-manually — don't hand-edit it; instead drive the E2E gate from `sdlc-config.json`. If the suite must run against a **disposable local database** (the rule on any project with no separate test instance — never test against prod), set `e2e_setup_command` (e.g. `supabase start` + load schema + seed) and `e2e_env` (e.g. `E2E_LOCAL=1`, local coords, a dummy email key) so the gate severs production. See [Local-database E2E in CI](https://github.com/metasession-dev/DevAudit-Installer/blob/main/docs/e2e-local-db-ci.md), then `devaudit update` to regenerate.
+    **Upload both artefact shapes.** Playwright writes per-test artefacts to *two* places: `test-results/<spec>-<title>[-retryN]/{trace.zip, video.webm, *.png, error-context.md}` — **spec-named**, human-mappable — and `playwright-report/data/<content-hash>.zip` — **hash-named**, indexed by the HTML report. Ensure the project's CI uploads **both** `playwright-report/` (for the HTML viewer) and `test-results/` (for spec-named traces / videos / error-context). If only one is uploaded, propose a small follow-up PR to add the other — it costs ~80 MB of artefact storage and saves the operator from walking the HTML report's hash index to find a specific trace.
 11. **Write a short README** in the test directory explaining structure, how to run, how to add new tests, and how to update visual baselines. Future contributors (and the skill itself, on next invocation) will thank you.
 After bootstrap, if there's a change to test, continue to Phase 2 as normal. If the user only wanted the suite set up with no specific change in mind, stop here with a final summary.
@@ -157,6 +159,7 @@ Write the tests in the project's existing style.
 - **Reuse existing helpers.** Page Object Models, fixtures, custom commands, test-data factories — use them. Don't invent parallel infrastructure.
 - **Match the assertion style.** If the codebase uses `expect(locator).toBeVisible()`, don't switch to `assert.isTrue(...)`.
 - **Read 2–3 nearby tests before writing.** Fastest way to absorb conventions you wouldn't have noticed otherwise.
+- **Check `references/common-patterns.md` before writing role-based locators** for component-library UI (shadcn/ui, Radix, MUI, etc.). A short appendix of known framework × library gotchas — `CardTitle` is a `<div>` not a heading; Radix `<Select>` renders two `role="combobox"` nodes; Next.js `<Link>` clicks don't fire network requests — saves a round-trip through a failing selector each time.
 For **visual regression** specifically:
 - New tests need baseline images. Generate them, but **do not auto-approve** — surface them for the user to verify before they're committed.
@@ -168,15 +171,23 @@ Do additions, updates, and (approved) deletions in the same change so the suite
 Run the suite. Strategy:
-1. **Run the new and updated tests first** in isolation if the framework supports filtering. Fast feedback on whether your tests themselves work.
-2. **Then run the full suite** to catch regressions outside the changed area.
-3. **For visual regression**, run the project's normal comparison mode against existing baselines.
+1. **Iterate focused.** During fix-and-verify, run only the failing specs (`--grep`, spec-path args, or a CI input that scopes to a subset). Cycle time is what makes the loop tractable — full regression for every iteration burns CI budget and operator patience. Expect to loop: fix → focused run → fix → focused run, many times.
+2. **Run full regression once, at the end.** Once the focused set is green, run the full suite to catch unintended side effects in untouched areas.
+3. **For CI-driven verification, ensure the workflow accepts a subset input.** A `workflow_dispatch.inputs.specs` (or equivalent) lets a developer fire a scoped run without local infrastructure. Recommend setting this up if the project doesn't have it — the speed-up (~5–10 min vs ~30–60 min) is the difference between a tractable loop and a hated one.
+4. **For visual regression**, run the project's normal comparison mode against existing baselines.
 Triage every failure into one of these buckets *before* taking any action:
+**0. Read the page snapshot first.** Modern Playwright writes `test-results/<spec>-<title>[-retryN]/error-context.md` — a markdown accessibility-tree snapshot of the page at failure time. It's enough to triage selector / role / wait-condition failures without extracting the trace zip. Reach for the trace only when the snapshot is ambiguous (e.g. the failure depends on a transition or a network race the snapshot can't show).
+**Filter for all terminal failure statuses.** `failed` and `timedOut` are distinct in Playwright's JSON reporter; `interrupted` is also possible. When summarising failures from `reporter=json` output, use `select(.status == "failed" or .status == "timedOut" or .status == "interrupted")` — `select(.status == "failed")` alone hides hung tests.
+Then bucket each failure:
 - **Flake** — non-deterministic; passes on rerun. Rerun once. If it passes, note it. If it keeps flaking, flag it but don't file a noisy bug.
 - **Test bug** — your test is wrong (bad selector, wrong assertion, timing). Fix the test; don't file anything.
 - **Application defect** — the app does the wrong thing. File it.
+- **Seed-data gap** — the page works, the test's assertion is correct, but the seeded fixture doesn't satisfy the assertion (empty table, no transactions for the day, missing user role). Fix the seed script (or the test's own setup), not the test logic or the product.
 - **Visual diff — intended** — the snapshot changed because the change intentionally changed the UI. Update the baseline and surface it for user approval.
 - **Visual diff — unintended** — a snapshot changed somewhere the change shouldn't have affected. File it as a regression.

package/sdlc/files/_common/skills/e2e-test-engineer/references/common-patterns.md ADDED Viewed

@@ -0,0 +1,98 @@
+# Common framework × component-library patterns
+Known gotchas that bite Playwright role-based and text-based locators when the
+SUT uses shadcn/ui, Radix UI, Next.js, or similar component libraries. Each
+entry: the **symptom** an operator hits, the **cause** (why the obvious locator
+misses), the **locator that works**.
+These are the patterns the e2e-test-engineer skill has tripped over in real
+release-suite triage cycles. The list is intentionally short — only patterns
+that have produced an actual failed-locator triage at least once. Add to this
+file when a new framework × library combination produces a recurrent miss.
+## shadcn `CardTitle` is a `<div>`, not a heading
+**Symptom.** `await page.getByRole('heading', { name: /Units of Measurement/i })`
+returns zero matches even though the card visibly shows that title.
+**Cause.** shadcn/ui's `CardTitle` component renders as a styled `<div>`, not
+`<h1..h6>`. There is no heading role to match.
+**Locator that works.**
+```ts
+// Prefer a data-testid on the card.
+await page.getByTestId('card-units-of-measurement').getByText('Units of Measurement');
+// Or scope by exact text inside the card's title slot.
+await page.locator('[data-slot="card-title"]', { hasText: 'Units of Measurement' });
+```
+If the project owns the card markup, the cheap fix is to wrap the title text
+in a real `<h2>` (or pass `as="h2"`) so role-based locators keep working.
+## Radix `<Select>` renders two `role="combobox"` nodes
+**Symptom.** `page.getByRole('combobox').nth(2).click()` collapses onto the
+wrong target when a sibling field is added or reordered.
+**Cause.** Radix's `<Select>` renders the visible trigger **and** a hidden
+accessibility companion — both report `role="combobox"`. A form with N selects
+has 2N matching nodes, and positional `.nth()` indices shift unpredictably as
+fields are added.
+**Locator that works.**
+```ts
+// Anchor by the associated label, not by position.
+await page.getByLabel('Payment method').click();
+// Or by a data-testid on the trigger:
+await page.getByTestId('select-payment-method').click();
+```
+Avoid `.nth(N)` on any role that a Radix primitive renders twice (`combobox`,
+`listbox`, `dialog` in some variants).
+## Next.js `<Link>` clicks don't trigger network requests
+**Symptom.** `await page.waitForLoadState('networkidle')` returns immediately
+after `await link.click()`, before the URL has actually changed; the next
+assertion runs against the previous page's DOM.
+**Cause.** Next.js's `<Link>` performs client-side route transitions via the
+App Router — no network round-trip, so `networkidle` was already idle.
+**Locator that works.**
+```ts
+await Promise.all([
+  page.waitForURL(/\/inventory\/snapshots/),
+  page.getByRole('link', { name: 'View snapshots' }).click(),
+]);
+// Or, after the click:
+await page.waitForURL(/\/inventory\/snapshots/);
+```
+`waitForURL` is the right primitive for any client-side navigation (Next.js,
+React Router, Vue Router, SvelteKit). `networkidle` is for full-page loads.
+## Button-with-Badge in `CardTitle` breaks `getByText(…, { exact: true })`
+**Symptom.** `await page.getByText('Filter Tabs', { exact: true })` returns
+zero matches on a card whose title visibly reads "Filter Tabs".
+**Cause.** The title slot contains `<icon> Filter Tabs <Badge>1</Badge>` — the
+badge's text content concatenates into the parent's text, so the exact match
+is against `"Filter Tabs1"`, not `"Filter Tabs"`.
+**Locator that works.**
+```ts
+// Drop exact: when the title has decorative siblings.
+await page.getByText('Filter Tabs');
+// Or scope by the specific text node.
+await page.locator('[data-slot="card-title"]').filter({ hasText: 'Filter Tabs' });
+```
+The same pattern bites any title slot that mixes a text label with a
+count-badge, status pill, or icon-with-tooltip sibling. Default to `exact: false`
+for component-library titles, and use `getByTestId` when ambiguity is real.

package/sdlc/files/_common/skills/sdlc-implementer/SKILL.md CHANGED Viewed

@@ -91,6 +91,41 @@ Runs **first**, before any `REQ-XXX` is assigned. It decides which of the six ch
 Only the **tracked** route continues into Phase 1; the others run the Lightweight path below. The off-ramps are deliberate — dragging housekeeping through tracked-change machinery it doesn't need is exactly the failure mode this step exists to prevent — but they are still **driven to completion**, never dumped as a checklist for the operator to run alone.
+**Worked examples** (one per change-type the skill keeps mis-routing without one):
+*Tracked feature — REQ-XXX assigned*
+> - **Change type:** Feature
+> - **Commit type:** feat
+> - **Requirement:** REQ-XXX (new)
+> - **Risk:** MEDIUM
+> - **Path:** Full SDLC Stages 1–5
+> - **Gates/evidence:** plan + RTM row + unit/integration/e2e evidence + UAT four-eyes + Production approval
+> - **Your approvals:** UAT four-eyes + Production approval
+> - **Skipped:** none
+*Test fix surfaced by suite drift*
+> - **Change type:** Housekeeping (test maintenance)
+> - **Commit type:** test
+> - **Requirement:** none
+> - **Risk:** LOW
+> - **Path:** Lightweight (gates → PR review → merge)
+> - **Gates/evidence:** quality-gates smoke (default CI Gate 4); no full regression on PR (run via `workflow_dispatch` while iterating, full regression on next nightly)
+> - **Your approvals:** PR review only
+> - **Skipped:** RTM, evidence pack, UAT four-eyes, Production approval
+*Workflow tweak (CI artifact upload, gate timeout bump, etc.)*
+> - **Change type:** Housekeeping (CI maintenance)
+> - **Commit type:** ci
+> - **Requirement:** none
+> - **Risk:** LOW
+> - **Path:** Lightweight (gates → verify-via-dispatch → PR review → merge)
+> - **Gates/evidence:** quality-gates smoke + a `gh workflow run <file> --ref <branch>` on the modified workflow before merge (silent CI regressions are the failure mode this catches)
+> - **Your approvals:** PR review only
+> - **Skipped:** RTM, evidence pack, UAT four-eyes, Production approval
 ### Lightweight path (housekeeping / trivial / compliance-doc-only)
 Reached from Phase 0 for non-tracked change-types. The skill drives this end-to-end; the only difference from the tracked cycle is the absence of *ceremony*, not the absence of *guidance*. It pauses only where a human is genuinely required (PR review, merge).
@@ -100,9 +135,10 @@ Reached from Phase 0 for non-tracked change-types. The skill drives this end-to-
 3. **Run all gates locally** (`npm run lint`, `npx tsc --noEmit`, the test suite, `semgrep`, `npm audit` — or the stack-adapter equivalents). Trivial ≠ unverified; never `--no-verify`.
 4. **Commit** with a housekeeping type and **no** `REQ-XXX` — `docs:` / `chore:` / `ci:` / `build:` / `test:` / `revert:` are exempt from the `[REQ-XXX]` rule; a `compliance:` doc-only change references the existing REQ. `Co-Authored-By: Claude` if AI-assisted.
 5. **Push and open the PR** into `$INTEGRATION_BRANCH` (`gh pr create --base "$INTEGRATION_BRANCH" --head <branch>`). CI runs the same quality gates; `compliance-validation.yml` finds no `REQ-XXX` and skips artifact validation.
-6. **Report honest status** — wait for CI, name any failing check, fix and re-push. Never announce "ready" while a required check is red.
-7. **Guide review → merge.** A human still reviews the PR (separation of duties). There is **no** portal release approval, no UAT four-eyes, no Production gate, and no close-out. Merge once CI is green and the reviewer approves.
-8. **Done.** A housekeeping push produces at most a bare-date release (`vYYYY.MM.DD`) with no approval gate; a doc-only push attaches its docs to the existing `REQ-XXX` release. No further action required — report completion and stop.
+6. **For `ci:` changes, verify-via-dispatch before merging.** `gh workflow run <workflow.yml> --ref <branch>` fires the modified workflow against the PR branch. If the change broke a step, the dispatch run fails loudly and you fix-forward *before* the merge ships the broken gate to `$INTEGRATION_BRANCH`. This is the cheapest insurance against silent CI regressions — a `ci:` change that breaks a gate is most damaging *after* it lands.
+7. **Report honest status** — wait for CI, name any failing check, fix and re-push. Never announce "ready" while a required check is red.
+8. **Guide review → merge.** A human still reviews the PR (separation of duties). There is **no** portal release approval, no UAT four-eyes, no Production gate, and no close-out. Merge once CI is green and the reviewer approves.
+9. **Done.** A housekeeping push produces at most a bare-date release (`vYYYY.MM.DD`) with no approval gate; a doc-only push attaches its docs to the existing `REQ-XXX` release. No further action required — report completion and stop.
 ### Phase 1 — Plan (SDLC stage 1)
@@ -127,13 +163,17 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
    - CRITICAL — HIGH plus targeted security tests (authz bypass attempts, input fuzzing where applicable).
 3. **For any e2e or visual-regression test work in this step, invoke `e2e-test-engineer`** — do not author e2e tests directly. The orchestrator passes the implementation plan + the diff so far to the e2e-test-engineer skill, which derives scenarios, reconciles with the existing pack, and runs the suite.
 4. **Implement against the plan.** Reference `compliance/plans/REQ-XXX/implementation-plan.md` as you go. Any deviation from the plan must be noted in the plan itself under a `## Plan deviation` section — never silently diverge.
-5. **Run all gates locally** before pushing:
+5. **Run gates locally, cheap-first.** The gates are not equivalent-cost — `npm run lint` is seconds, `npx playwright test` is 30–60 minutes. Iterate on the fast gates; spend the e2e cost once.
+   **Fast gates** (run on every change, ideally pre-commit):
    - `npm run lint` (or stack-adapter equivalent)
    - `npx tsc --noEmit` (or stack-adapter equivalent)
    - `npx vitest run` (unit/integration)
-   - `npx playwright test` (e2e — delegated to `e2e-test-engineer`)
    - `semgrep scan --config auto`
    - `npm audit --audit-level=high` (or stack-adapter equivalent)
+   **E2E gate** — run *once*, after the fast gates are clean:
+   - `npx playwright test` (delegated to `e2e-test-engineer`, which has its own focused-iteration discipline for within-e2e fix-and-verify loops)
 6. **On gate failure**, iterate up to N=3 attempts. Each iteration: read the failure output, propose a fix, apply, re-run. On exhausted attempts, halt with the full failure output and surface to the human — never use `--no-verify`, `eslint-disable`, `@ts-expect-error`, `xfail`, or any other bypass.
 7. **Commit** using Conventional Commits with `Ref: REQ-XXX` trailer and `Co-Authored-By: Claude` trailer. One commit per logical step; never amend a commit that's already been pushed.
 8. **Land the work on `$INTEGRATION_BRANCH`.** Push the feature branch, then:
@@ -150,9 +190,12 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
    compliance/evidence/REQ-XXX/
    ├── YYYY-MM-DD_e2e-results.json
    ├── YYYY-MM-DD_playwright-report/
+   ├── YYYY-MM-DD_traces/                ← per-test trace.zip + error-context.md
    ├── YYYY-MM-DD_unit-coverage/
    └── YYYY-MM-DD_screenshots/*.png
    ```
+   Copy Playwright's `test-results/` folder verbatim into `YYYY-MM-DD_traces/` so trace-by-test-name is available for audit without walking the HTML report's hash-name index. For HIGH/CRITICAL releases the traces are part of the audit trail — *"what state was the page in when test X failed and was overridden?"* answers in one `ls` instead of an HTML-report walk.
 3. **Upload each artefact to the portal**:
    ```bash
    devaudit push <project-slug> REQ-XXX <evidence-type> <file> \
@@ -181,10 +224,19 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
    - Test plan
    - SDLC checklist
 2. **Verify the UAT reviewer ≠ skill-trigger user** for HIGH/CRITICAL. If they match, halt with a configuration error: "HIGH/CRITICAL risk requires an independent UAT reviewer; the configured reviewer matches the trigger user — fix the four-eyes attestation slot in the implementation plan and re-run."
+   **Solo-operator teams.** On a one-person team, the literal "reviewer ≠ submitter" check is structurally unsatisfiable. The supported interpretation is *actor type, not human identity* — AI tooling (the skill-trigger) and the human operator (the portal-approver) are distinct actors. Document this on the release ticket under `## Sign-off (dual-actor)` with the explicit interpretation, and ensure the human operator has independently reviewed the diff before clicking *Approve Production* in the portal. Without this attestation the four-eyes claim is performative.
 3. **Apply labels** — `awaiting-uat-review`, `risk:<class>`.
 4. **Comment on the issue**: "Implementation complete. PR #M opened. Evidence on portal: <link>. UAT review requested. Resume with `resume REQ-XXX` once UAT approval is granted on the portal."
 5. **Hard stop.** Phase 4 ends here. Do not proceed to merge; the human's next action is reviewing on the portal.
+**When an external gate hangs or fails for unrelated reasons.** A required gate may fail for reasons outside the change's scope — flaky infra, an unrelated regression test that hangs at hour-plus runtime with no log activity, a known-failing suite. When this happens:
+1. **Verify it's actually unrelated.** Read the failure (or the lack of one). If it's the change's fault, fix it; this section does not apply.
+2. **Document the rationale on the PR.** A sticky comment naming: which gate, what the failure was, why it's unrelated to the change, what the safety net is (nightly run on `$INTEGRATION_BRANCH`, post-deploy verification, etc.).
+3. **Cancel-and-admin-merge is allowed** when **all three** hold: (a) ≥3 other required gates are green, (b) the change has no scope-overlap with the failing gate (e.g. service-layer fix vs hung UI e2e, or an `E2E: N/A by scope` test-plan), and (c) a fallback verification exists (nightly e2e on `$INTEGRATION_BRANCH`, post-deploy smoke, etc.). If any of the three fail, hold the merge and surface the blocker to the operator.
+4. **Record the decision in the release ticket.** The release ticket's `## Verification` section must mention the cancelled gate by run-ID and the fallback that justifies bypassing it. Auditors look here first.
 ### Phase 5 — Finalise or change-request loop (SDLC stage 5)
 Invoked separately by the user after UAT activity on the portal. Trigger: "resume REQ-XXX", "REQ-XXX UAT done", or just re-firing the skill on the same issue.

package/sdlc/files/ci/ci.yml.template CHANGED Viewed

@@ -372,10 +372,16 @@ jobs:
               --category test_report ${FLAGS}
           fi
-          # Upload test summary report (test_report category)
+          # Upload test summary report — precise evidence_type=test_report
+          # (was compliance_document). The portal's Compliance Gates panel
+          # filters by evidence_type, so the markdown summary belongs in the
+          # Test Reports gate alongside playwright-report.zip + coverage
+          # summary. Markdown renders inline (MarkdownRenderer); auditor
+          # reads pass/fail counts + narrative without downloading the zip.
+          # devaudit#370 follow-up.
           if [ -f "compliance/test-summary-report.md" ]; then
             upload test-summary-report.md \
-              {{PROJECT_SLUG}} _compliance-docs compliance_document compliance/test-summary-report.md \
+              {{PROJECT_SLUG}} _compliance-docs test_report compliance/test-summary-report.md \
               --category test_report ${FLAGS}
           fi

package/sdlc/files/ci/compliance-evidence.yml.template CHANGED Viewed

@@ -125,8 +125,9 @@ jobs:
           DERIVED_META=()
           [ -n "$DERIVED_CT" ] && DERIVED_META+=(--change-type "$DERIVED_CT")
-          # Upload compliance docs (planning category)
-          for DOC in compliance/RTM.md compliance/test-plan.md compliance/test-cases.md compliance/test-summary-report.md; do
+          # Upload planning docs (RTM / Test Plan / Test Cases) as
+          # compliance_document — they surface under the Documents tab.
+          for DOC in compliance/RTM.md compliance/test-plan.md compliance/test-cases.md; do
             if [ -f "$DOC" ]; then
               echo "Uploading: $(basename "$DOC")"
               bash scripts/upload-evidence.sh \
@@ -137,6 +138,19 @@ jobs:
             fi
           done
+          # Test summary report — precise evidence_type=test_report so it
+          # lands in the portal's Test Reports gate (rendered inline by the
+          # MarkdownRenderer). devaudit#370 follow-up; same change applied
+          # in ci.yml's gate-evidence upload step.
+          if [ -f "compliance/test-summary-report.md" ]; then
+            echo "Uploading: test-summary-report.md (test_report type)"
+            bash scripts/upload-evidence.sh \
+              {{PROJECT_SLUG}} _compliance-docs test_report compliance/test-summary-report.md \
+              --category test_report ${FLAGS} --release "${DERIVED_RELEASE}" \
+              "${DERIVED_META[@]}" \
+              || echo "Warning: Failed to upload test-summary-report.md"
+          fi
           # Project-level governance docs (devaudit#370 Phase 3a). When the
           # operator commits any of these markdown files, upload with the
           # precise evidence_type so the portal's framework-coverage matrix