@metasession.co/devaudit-cli 0.1.27 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/sdlc/files/_common/scripts/derive-release-version.sh +36 -6
- package/sdlc/files/_common/scripts/derive-release-version.test.sh +43 -0
- package/sdlc/files/_common/skills/e2e-test-engineer/SKILL.md +14 -3
- package/sdlc/files/_common/skills/e2e-test-engineer/references/common-patterns.md +98 -0
- package/sdlc/files/_common/skills/sdlc-implementer/SKILL.md +57 -5
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@metasession.co/devaudit-cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.28",
|
|
4
4
|
"description": "DevAudit CLI — installs, syncs, and operates the Metasession SDLC across consumer projects.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"@clack/prompts": "^0.8.2",
|
|
36
|
-
"@metasession.co/devaudit-plugin-sdk": "^0.1.
|
|
36
|
+
"@metasession.co/devaudit-plugin-sdk": "^0.1.28",
|
|
37
37
|
"commander": "^12.1.0",
|
|
38
38
|
"consola": "^3.2.3",
|
|
39
39
|
"env-paths": "^3.0.0",
|
|
@@ -6,11 +6,22 @@
|
|
|
6
6
|
# VERSION=$(./scripts/derive-release-version.sh)
|
|
7
7
|
#
|
|
8
8
|
# Priority:
|
|
9
|
-
# 1. REQ tag in commit subject:
|
|
10
|
-
# 2. Ref in commit body:
|
|
11
|
-
# 3. Bracketed tag in commit body:
|
|
12
|
-
#
|
|
13
|
-
# 4.
|
|
9
|
+
# 1. REQ tag in commit subject: "[REQ-037] feat(kitchen): ..." -> REQ-037
|
|
10
|
+
# 2. Ref in commit body: "Ref: REQ-037" -> REQ-037
|
|
11
|
+
# 3. Bracketed tag in commit body: merge commit whose body is the PR title
|
|
12
|
+
# "... [REQ-037] ..." -> REQ-037
|
|
13
|
+
# 4. Pending release ticket on disk: exactly one
|
|
14
|
+
# compliance/pending-releases/RELEASE-TICKET-REQ-XXX.md
|
|
15
|
+
# -> REQ-XXX
|
|
16
|
+
# 5. Fallback: bare date -> v2026.05.17
|
|
17
|
+
#
|
|
18
|
+
# Step 4 (DevAudit-Installer#92) handles `chore:` / `docs:` / `ci:`
|
|
19
|
+
# commits (e.g. a `devaudit update` sync) landing on the integration
|
|
20
|
+
# branch between feature merge and release-PR open. Such a commit has
|
|
21
|
+
# no REQ tag in its message → steps 1-3 fall through. The release
|
|
22
|
+
# ticket on disk is a stronger explicit-operator-state signal than the
|
|
23
|
+
# bare date — when exactly one ticket is open, attribute to it.
|
|
24
|
+
# Multiple open tickets stays ambiguous → bare-date fallback.
|
|
14
25
|
#
|
|
15
26
|
# The id is taken from a bracketed [REQ-XXX] tag (subject or body) or the
|
|
16
27
|
# `Ref:` line — NOT from unbracketed prose (e.g. "target close: REQ-002" must
|
|
@@ -53,5 +64,24 @@ if echo "$BODY" | grep -qE '\[REQ-[0-9]+\]'; then
|
|
|
53
64
|
exit 0
|
|
54
65
|
fi
|
|
55
66
|
|
|
56
|
-
# 4.
|
|
67
|
+
# 4. Pending release ticket on disk: when exactly one
|
|
68
|
+
# `compliance/pending-releases/RELEASE-TICKET-REQ-*.md` is present, the
|
|
69
|
+
# operator's explicit state says THIS is the in-flight release. Use it.
|
|
70
|
+
# Zero or multiple → ambiguous, fall through to the bare date.
|
|
71
|
+
# DevAudit-Installer#92.
|
|
72
|
+
if [ -d compliance/pending-releases ]; then
|
|
73
|
+
# NUL-delimited count so filenames with spaces don't trip us up.
|
|
74
|
+
TICKET_COUNT=$(find compliance/pending-releases -maxdepth 1 -type f \
|
|
75
|
+
-name 'RELEASE-TICKET-REQ-*.md' -print0 2>/dev/null \
|
|
76
|
+
| tr -cd '\0' | wc -c)
|
|
77
|
+
if [ "$TICKET_COUNT" = "1" ]; then
|
|
78
|
+
find compliance/pending-releases -maxdepth 1 -type f \
|
|
79
|
+
-name 'RELEASE-TICKET-REQ-*.md' -print 2>/dev/null \
|
|
80
|
+
| head -1 | xargs -n1 basename \
|
|
81
|
+
| sed -E 's/^RELEASE-TICKET-(REQ-[0-9]+)\.md$/\1/'
|
|
82
|
+
exit 0
|
|
83
|
+
fi
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
# 5. Fallback: bare date in UTC
|
|
57
87
|
echo "v$(date -u +%Y.%m.%d)"
|
|
@@ -111,6 +111,49 @@ make_fixture "$WORK/c8" "Merge pull request #7 from metasession-dev/feat/req-002
|
|
|
111
111
|
chore(deps): [REQ-002] dependency hardening — close R-001"
|
|
112
112
|
assert_eq "merge-commit body [REQ-002] -> REQ-002" "REQ-002" "$(run_helper)"
|
|
113
113
|
|
|
114
|
+
# Case 9 (DevAudit-Installer#92): a chore: sync commit has no REQ tag in
|
|
115
|
+
# its message but a pending release ticket exists on disk. Must attribute
|
|
116
|
+
# to the REQ from that ticket instead of falling through to the bare date.
|
|
117
|
+
# Regression for REQ-051 / REQ-052 gate evidence landing on phantom date
|
|
118
|
+
# releases when a `chore: devaudit update to 0.1.x` commit landed between
|
|
119
|
+
# the feature merge and the release-PR open on wawagardenbar-app.
|
|
120
|
+
make_fixture "$WORK/c9" "chore: devaudit update to 0.1.27"
|
|
121
|
+
mkdir -p compliance/pending-releases
|
|
122
|
+
cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
|
|
123
|
+
# Release Ticket: REQ-051
|
|
124
|
+
|
|
125
|
+
**Status:** TESTED - PENDING SIGN-OFF
|
|
126
|
+
TICKET
|
|
127
|
+
assert_eq "chore: sync + single pending ticket REQ-051 -> REQ-051" "REQ-051" "$(run_helper)"
|
|
128
|
+
|
|
129
|
+
# Case 10: a chore: sync commit with MULTIPLE pending tickets — ambiguous,
|
|
130
|
+
# stays at the bare-date fallback (don't guess between them).
|
|
131
|
+
make_fixture "$WORK/c10" "chore: devaudit update to 0.1.27"
|
|
132
|
+
mkdir -p compliance/pending-releases
|
|
133
|
+
cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
|
|
134
|
+
# Release Ticket: REQ-051
|
|
135
|
+
TICKET
|
|
136
|
+
cat > compliance/pending-releases/RELEASE-TICKET-REQ-052.md <<'TICKET'
|
|
137
|
+
# Release Ticket: REQ-052
|
|
138
|
+
TICKET
|
|
139
|
+
assert_eq "chore: sync + two pending tickets -> bare date $TODAY" "$TODAY" "$(run_helper)"
|
|
140
|
+
|
|
141
|
+
# Case 11: a chore: sync commit + no compliance/pending-releases dir at
|
|
142
|
+
# all — still falls back to the bare date (the directory may be missing
|
|
143
|
+
# for projects that haven't started their first tracked release yet).
|
|
144
|
+
make_fixture "$WORK/c11" "chore: devaudit update to 0.1.27"
|
|
145
|
+
assert_eq "chore: sync + no pending dir -> bare date $TODAY" "$TODAY" "$(run_helper)"
|
|
146
|
+
|
|
147
|
+
# Case 12: a feat: commit with a [REQ-XXX] tag in the subject AND a
|
|
148
|
+
# pending ticket for a DIFFERENT REQ. Subject wins (step 1 returns
|
|
149
|
+
# before step 4 fires).
|
|
150
|
+
make_fixture "$WORK/c12" "[REQ-099] feat: in-flight feature for a different REQ"
|
|
151
|
+
mkdir -p compliance/pending-releases
|
|
152
|
+
cat > compliance/pending-releases/RELEASE-TICKET-REQ-051.md <<'TICKET'
|
|
153
|
+
# Release Ticket: REQ-051
|
|
154
|
+
TICKET
|
|
155
|
+
assert_eq "subject [REQ-099] beats pending REQ-051 -> REQ-099" "REQ-099" "$(run_helper)"
|
|
156
|
+
|
|
114
157
|
echo ""
|
|
115
158
|
echo "=== Summary: $PASS pass / $FAIL fail ==="
|
|
116
159
|
|
|
@@ -87,6 +87,8 @@ The bootstrap workflow:
|
|
|
87
87
|
|
|
88
88
|
10. **Offer a CI job** — write the YAML (or equivalent) for the project's CI system, but **do not commit it without confirmation**. Show it inline first. On a **DevAudit** project, `.github/workflows/ci.yml` is generated and marked do-not-edit-manually — don't hand-edit it; instead drive the E2E gate from `sdlc-config.json`. If the suite must run against a **disposable local database** (the rule on any project with no separate test instance — never test against prod), set `e2e_setup_command` (e.g. `supabase start` + load schema + seed) and `e2e_env` (e.g. `E2E_LOCAL=1`, local coords, a dummy email key) so the gate severs production. See [Local-database E2E in CI](https://github.com/metasession-dev/DevAudit-Installer/blob/main/docs/e2e-local-db-ci.md), then `devaudit update` to regenerate.
|
|
89
89
|
|
|
90
|
+
**Upload both artefact shapes.** Playwright writes per-test artefacts to *two* places: `test-results/<spec>-<title>[-retryN]/{trace.zip, video.webm, *.png, error-context.md}` — **spec-named**, human-mappable — and `playwright-report/data/<content-hash>.zip` — **hash-named**, indexed by the HTML report. Ensure the project's CI uploads **both** `playwright-report/` (for the HTML viewer) and `test-results/` (for spec-named traces / videos / error-context). If only one is uploaded, propose a small follow-up PR to add the other — it costs ~80 MB of artefact storage and saves the operator from walking the HTML report's hash index to find a specific trace.
|
|
91
|
+
|
|
90
92
|
11. **Write a short README** in the test directory explaining structure, how to run, how to add new tests, and how to update visual baselines. Future contributors (and the skill itself, on next invocation) will thank you.
|
|
91
93
|
|
|
92
94
|
After bootstrap, if there's a change to test, continue to Phase 2 as normal. If the user only wanted the suite set up with no specific change in mind, stop here with a final summary.
|
|
@@ -157,6 +159,7 @@ Write the tests in the project's existing style.
|
|
|
157
159
|
- **Reuse existing helpers.** Page Object Models, fixtures, custom commands, test-data factories — use them. Don't invent parallel infrastructure.
|
|
158
160
|
- **Match the assertion style.** If the codebase uses `expect(locator).toBeVisible()`, don't switch to `assert.isTrue(...)`.
|
|
159
161
|
- **Read 2–3 nearby tests before writing.** Fastest way to absorb conventions you wouldn't have noticed otherwise.
|
|
162
|
+
- **Check `references/common-patterns.md` before writing role-based locators** for component-library UI (shadcn/ui, Radix, MUI, etc.). A short appendix of known framework × library gotchas — `CardTitle` is a `<div>` not a heading; Radix `<Select>` renders two `role="combobox"` nodes; Next.js `<Link>` clicks don't fire network requests — saves a round-trip through a failing selector each time.
|
|
160
163
|
|
|
161
164
|
For **visual regression** specifically:
|
|
162
165
|
- New tests need baseline images. Generate them, but **do not auto-approve** — surface them for the user to verify before they're committed.
|
|
@@ -168,15 +171,23 @@ Do additions, updates, and (approved) deletions in the same change so the suite
|
|
|
168
171
|
|
|
169
172
|
Run the suite. Strategy:
|
|
170
173
|
|
|
171
|
-
1. **
|
|
172
|
-
2. **
|
|
173
|
-
3. **For
|
|
174
|
+
1. **Iterate focused.** During fix-and-verify, run only the failing specs (`--grep`, spec-path args, or a CI input that scopes to a subset). Cycle time is what makes the loop tractable — full regression for every iteration burns CI budget and operator patience. Expect to loop: fix → focused run → fix → focused run, many times.
|
|
175
|
+
2. **Run full regression once, at the end.** Once the focused set is green, run the full suite to catch unintended side effects in untouched areas.
|
|
176
|
+
3. **For CI-driven verification, ensure the workflow accepts a subset input.** A `workflow_dispatch.inputs.specs` (or equivalent) lets a developer fire a scoped run without local infrastructure. Recommend setting this up if the project doesn't have it — the speed-up (~5–10 min vs ~30–60 min) is the difference between a tractable loop and a hated one.
|
|
177
|
+
4. **For visual regression**, run the project's normal comparison mode against existing baselines.
|
|
174
178
|
|
|
175
179
|
Triage every failure into one of these buckets *before* taking any action:
|
|
176
180
|
|
|
181
|
+
**0. Read the page snapshot first.** Modern Playwright writes `test-results/<spec>-<title>[-retryN]/error-context.md` — a markdown accessibility-tree snapshot of the page at failure time. It's enough to triage selector / role / wait-condition failures without extracting the trace zip. Reach for the trace only when the snapshot is ambiguous (e.g. the failure depends on a transition or a network race the snapshot can't show).
|
|
182
|
+
|
|
183
|
+
**Filter for all terminal failure statuses.** `failed` and `timedOut` are distinct in Playwright's JSON reporter; `interrupted` is also possible. When summarising failures from `reporter=json` output, use `select(.status == "failed" or .status == "timedOut" or .status == "interrupted")` — `select(.status == "failed")` alone hides hung tests.
|
|
184
|
+
|
|
185
|
+
Then bucket each failure:
|
|
186
|
+
|
|
177
187
|
- **Flake** — non-deterministic; passes on rerun. Rerun once. If it passes, note it. If it keeps flaking, flag it but don't file a noisy bug.
|
|
178
188
|
- **Test bug** — your test is wrong (bad selector, wrong assertion, timing). Fix the test; don't file anything.
|
|
179
189
|
- **Application defect** — the app does the wrong thing. File it.
|
|
190
|
+
- **Seed-data gap** — the page works, the test's assertion is correct, but the seeded fixture doesn't satisfy the assertion (empty table, no transactions for the day, missing user role). Fix the seed script (or the test's own setup), not the test logic or the product.
|
|
180
191
|
- **Visual diff — intended** — the snapshot changed because the change intentionally changed the UI. Update the baseline and surface it for user approval.
|
|
181
192
|
- **Visual diff — unintended** — a snapshot changed somewhere the change shouldn't have affected. File it as a regression.
|
|
182
193
|
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Common framework × component-library patterns
|
|
2
|
+
|
|
3
|
+
Known gotchas that bite Playwright role-based and text-based locators when the
|
|
4
|
+
SUT uses shadcn/ui, Radix UI, Next.js, or similar component libraries. Each
|
|
5
|
+
entry: the **symptom** an operator hits, the **cause** (why the obvious locator
|
|
6
|
+
misses), the **locator that works**.
|
|
7
|
+
|
|
8
|
+
These are the patterns the e2e-test-engineer skill has tripped over in real
|
|
9
|
+
release-suite triage cycles. The list is intentionally short — only patterns
|
|
10
|
+
that have produced an actual failed-locator triage at least once. Add to this
|
|
11
|
+
file when a new framework × library combination produces a recurrent miss.
|
|
12
|
+
|
|
13
|
+
## shadcn `CardTitle` is a `<div>`, not a heading
|
|
14
|
+
|
|
15
|
+
**Symptom.** `await page.getByRole('heading', { name: /Units of Measurement/i })`
|
|
16
|
+
returns zero matches even though the card visibly shows that title.
|
|
17
|
+
|
|
18
|
+
**Cause.** shadcn/ui's `CardTitle` component renders as a styled `<div>`, not
|
|
19
|
+
`<h1..h6>`. There is no heading role to match.
|
|
20
|
+
|
|
21
|
+
**Locator that works.**
|
|
22
|
+
|
|
23
|
+
```ts
|
|
24
|
+
// Prefer a data-testid on the card.
|
|
25
|
+
await page.getByTestId('card-units-of-measurement').getByText('Units of Measurement');
|
|
26
|
+
// Or scope by exact text inside the card's title slot.
|
|
27
|
+
await page.locator('[data-slot="card-title"]', { hasText: 'Units of Measurement' });
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
If the project owns the card markup, the cheap fix is to wrap the title text
|
|
31
|
+
in a real `<h2>` (or pass `as="h2"`) so role-based locators keep working.
|
|
32
|
+
|
|
33
|
+
## Radix `<Select>` renders two `role="combobox"` nodes
|
|
34
|
+
|
|
35
|
+
**Symptom.** `page.getByRole('combobox').nth(2).click()` collapses onto the
|
|
36
|
+
wrong target when a sibling field is added or reordered.
|
|
37
|
+
|
|
38
|
+
**Cause.** Radix's `<Select>` renders the visible trigger **and** a hidden
|
|
39
|
+
accessibility companion — both report `role="combobox"`. A form with N selects
|
|
40
|
+
has 2N matching nodes, and positional `.nth()` indices shift unpredictably as
|
|
41
|
+
fields are added.
|
|
42
|
+
|
|
43
|
+
**Locator that works.**
|
|
44
|
+
|
|
45
|
+
```ts
|
|
46
|
+
// Anchor by the associated label, not by position.
|
|
47
|
+
await page.getByLabel('Payment method').click();
|
|
48
|
+
// Or by a data-testid on the trigger:
|
|
49
|
+
await page.getByTestId('select-payment-method').click();
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Avoid `.nth(N)` on any role that a Radix primitive renders twice (`combobox`,
|
|
53
|
+
`listbox`, `dialog` in some variants).
|
|
54
|
+
|
|
55
|
+
## Next.js `<Link>` clicks don't trigger network requests
|
|
56
|
+
|
|
57
|
+
**Symptom.** `await page.waitForLoadState('networkidle')` returns immediately
|
|
58
|
+
after `await link.click()`, before the URL has actually changed; the next
|
|
59
|
+
assertion runs against the previous page's DOM.
|
|
60
|
+
|
|
61
|
+
**Cause.** Next.js's `<Link>` performs client-side route transitions via the
|
|
62
|
+
App Router — no network round-trip, so `networkidle` was already idle.
|
|
63
|
+
|
|
64
|
+
**Locator that works.**
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
await Promise.all([
|
|
68
|
+
page.waitForURL(/\/inventory\/snapshots/),
|
|
69
|
+
page.getByRole('link', { name: 'View snapshots' }).click(),
|
|
70
|
+
]);
|
|
71
|
+
// Or, after the click:
|
|
72
|
+
await page.waitForURL(/\/inventory\/snapshots/);
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
`waitForURL` is the right primitive for any client-side navigation (Next.js,
|
|
76
|
+
React Router, Vue Router, SvelteKit). `networkidle` is for full-page loads.
|
|
77
|
+
|
|
78
|
+
## Button-with-Badge in `CardTitle` breaks `getByText(…, { exact: true })`
|
|
79
|
+
|
|
80
|
+
**Symptom.** `await page.getByText('Filter Tabs', { exact: true })` returns
|
|
81
|
+
zero matches on a card whose title visibly reads "Filter Tabs".
|
|
82
|
+
|
|
83
|
+
**Cause.** The title slot contains `<icon> Filter Tabs <Badge>1</Badge>` — the
|
|
84
|
+
badge's text content concatenates into the parent's text, so the exact match
|
|
85
|
+
is against `"Filter Tabs1"`, not `"Filter Tabs"`.
|
|
86
|
+
|
|
87
|
+
**Locator that works.**
|
|
88
|
+
|
|
89
|
+
```ts
|
|
90
|
+
// Drop exact: when the title has decorative siblings.
|
|
91
|
+
await page.getByText('Filter Tabs');
|
|
92
|
+
// Or scope by the specific text node.
|
|
93
|
+
await page.locator('[data-slot="card-title"]').filter({ hasText: 'Filter Tabs' });
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The same pattern bites any title slot that mixes a text label with a
|
|
97
|
+
count-badge, status pill, or icon-with-tooltip sibling. Default to `exact: false`
|
|
98
|
+
for component-library titles, and use `getByTestId` when ambiguity is real.
|
|
@@ -91,6 +91,41 @@ Runs **first**, before any `REQ-XXX` is assigned. It decides which of the six ch
|
|
|
91
91
|
|
|
92
92
|
Only the **tracked** route continues into Phase 1; the others run the Lightweight path below. The off-ramps are deliberate — dragging housekeeping through tracked-change machinery it doesn't need is exactly the failure mode this step exists to prevent — but they are still **driven to completion**, never dumped as a checklist for the operator to run alone.
|
|
93
93
|
|
|
94
|
+
**Worked examples** (one per change-type the skill keeps mis-routing without one):
|
|
95
|
+
|
|
96
|
+
*Tracked feature — REQ-XXX assigned*
|
|
97
|
+
|
|
98
|
+
> - **Change type:** Feature
|
|
99
|
+
> - **Commit type:** feat
|
|
100
|
+
> - **Requirement:** REQ-XXX (new)
|
|
101
|
+
> - **Risk:** MEDIUM
|
|
102
|
+
> - **Path:** Full SDLC Stages 1–5
|
|
103
|
+
> - **Gates/evidence:** plan + RTM row + unit/integration/e2e evidence + UAT four-eyes + Production approval
|
|
104
|
+
> - **Your approvals:** UAT four-eyes + Production approval
|
|
105
|
+
> - **Skipped:** none
|
|
106
|
+
|
|
107
|
+
*Test fix surfaced by suite drift*
|
|
108
|
+
|
|
109
|
+
> - **Change type:** Housekeeping (test maintenance)
|
|
110
|
+
> - **Commit type:** test
|
|
111
|
+
> - **Requirement:** none
|
|
112
|
+
> - **Risk:** LOW
|
|
113
|
+
> - **Path:** Lightweight (gates → PR review → merge)
|
|
114
|
+
> - **Gates/evidence:** quality-gates smoke (default CI Gate 4); no full regression on PR (run via `workflow_dispatch` while iterating, full regression on next nightly)
|
|
115
|
+
> - **Your approvals:** PR review only
|
|
116
|
+
> - **Skipped:** RTM, evidence pack, UAT four-eyes, Production approval
|
|
117
|
+
|
|
118
|
+
*Workflow tweak (CI artifact upload, gate timeout bump, etc.)*
|
|
119
|
+
|
|
120
|
+
> - **Change type:** Housekeeping (CI maintenance)
|
|
121
|
+
> - **Commit type:** ci
|
|
122
|
+
> - **Requirement:** none
|
|
123
|
+
> - **Risk:** LOW
|
|
124
|
+
> - **Path:** Lightweight (gates → verify-via-dispatch → PR review → merge)
|
|
125
|
+
> - **Gates/evidence:** quality-gates smoke + a `gh workflow run <file> --ref <branch>` on the modified workflow before merge (silent CI regressions are the failure mode this catches)
|
|
126
|
+
> - **Your approvals:** PR review only
|
|
127
|
+
> - **Skipped:** RTM, evidence pack, UAT four-eyes, Production approval
|
|
128
|
+
|
|
94
129
|
### Lightweight path (housekeeping / trivial / compliance-doc-only)
|
|
95
130
|
|
|
96
131
|
Reached from Phase 0 for non-tracked change-types. The skill drives this end-to-end; the only difference from the tracked cycle is the absence of *ceremony*, not the absence of *guidance*. It pauses only where a human is genuinely required (PR review, merge).
|
|
@@ -100,9 +135,10 @@ Reached from Phase 0 for non-tracked change-types. The skill drives this end-to-
|
|
|
100
135
|
3. **Run all gates locally** (`npm run lint`, `npx tsc --noEmit`, the test suite, `semgrep`, `npm audit` — or the stack-adapter equivalents). Trivial ≠ unverified; never `--no-verify`.
|
|
101
136
|
4. **Commit** with a housekeeping type and **no** `REQ-XXX` — `docs:` / `chore:` / `ci:` / `build:` / `test:` / `revert:` are exempt from the `[REQ-XXX]` rule; a `compliance:` doc-only change references the existing REQ. `Co-Authored-By: Claude` if AI-assisted.
|
|
102
137
|
5. **Push and open the PR** into `$INTEGRATION_BRANCH` (`gh pr create --base "$INTEGRATION_BRANCH" --head <branch>`). CI runs the same quality gates; `compliance-validation.yml` finds no `REQ-XXX` and skips artifact validation.
|
|
103
|
-
6. **
|
|
104
|
-
7. **
|
|
105
|
-
8. **
|
|
138
|
+
6. **For `ci:` changes, verify-via-dispatch before merging.** `gh workflow run <workflow.yml> --ref <branch>` fires the modified workflow against the PR branch. If the change broke a step, the dispatch run fails loudly and you fix-forward *before* the merge ships the broken gate to `$INTEGRATION_BRANCH`. This is the cheapest insurance against silent CI regressions — a `ci:` change that breaks a gate is most damaging *after* it lands.
|
|
139
|
+
7. **Report honest status** — wait for CI, name any failing check, fix and re-push. Never announce "ready" while a required check is red.
|
|
140
|
+
8. **Guide review → merge.** A human still reviews the PR (separation of duties). There is **no** portal release approval, no UAT four-eyes, no Production gate, and no close-out. Merge once CI is green and the reviewer approves.
|
|
141
|
+
9. **Done.** A housekeeping push produces at most a bare-date release (`vYYYY.MM.DD`) with no approval gate; a doc-only push attaches its docs to the existing `REQ-XXX` release. No further action required — report completion and stop.
|
|
106
142
|
|
|
107
143
|
### Phase 1 — Plan (SDLC stage 1)
|
|
108
144
|
|
|
@@ -127,13 +163,17 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
|
|
|
127
163
|
- CRITICAL — HIGH plus targeted security tests (authz bypass attempts, input fuzzing where applicable).
|
|
128
164
|
3. **For any e2e or visual-regression test work in this step, invoke `e2e-test-engineer`** — do not author e2e tests directly. The orchestrator passes the implementation plan + the diff so far to the e2e-test-engineer skill, which derives scenarios, reconciles with the existing pack, and runs the suite.
|
|
129
165
|
4. **Implement against the plan.** Reference `compliance/plans/REQ-XXX/implementation-plan.md` as you go. Any deviation from the plan must be noted in the plan itself under a `## Plan deviation` section — never silently diverge.
|
|
130
|
-
5. **Run
|
|
166
|
+
5. **Run gates locally, cheap-first.** The gates are not equivalent-cost — `npm run lint` is seconds, `npx playwright test` is 30–60 minutes. Iterate on the fast gates; spend the e2e cost once.
|
|
167
|
+
|
|
168
|
+
**Fast gates** (run on every change, ideally pre-commit):
|
|
131
169
|
- `npm run lint` (or stack-adapter equivalent)
|
|
132
170
|
- `npx tsc --noEmit` (or stack-adapter equivalent)
|
|
133
171
|
- `npx vitest run` (unit/integration)
|
|
134
|
-
- `npx playwright test` (e2e — delegated to `e2e-test-engineer`)
|
|
135
172
|
- `semgrep scan --config auto`
|
|
136
173
|
- `npm audit --audit-level=high` (or stack-adapter equivalent)
|
|
174
|
+
|
|
175
|
+
**E2E gate** — run *once*, after the fast gates are clean:
|
|
176
|
+
- `npx playwright test` (delegated to `e2e-test-engineer`, which has its own focused-iteration discipline for within-e2e fix-and-verify loops)
|
|
137
177
|
6. **On gate failure**, iterate up to N=3 attempts. Each iteration: read the failure output, propose a fix, apply, re-run. On exhausted attempts, halt with the full failure output and surface to the human — never use `--no-verify`, `eslint-disable`, `@ts-expect-error`, `xfail`, or any other bypass.
|
|
138
178
|
7. **Commit** using Conventional Commits with `Ref: REQ-XXX` trailer and `Co-Authored-By: Claude` trailer. One commit per logical step; never amend a commit that's already been pushed.
|
|
139
179
|
8. **Land the work on `$INTEGRATION_BRANCH`.** Push the feature branch, then:
|
|
@@ -150,9 +190,12 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
|
|
|
150
190
|
compliance/evidence/REQ-XXX/
|
|
151
191
|
├── YYYY-MM-DD_e2e-results.json
|
|
152
192
|
├── YYYY-MM-DD_playwright-report/
|
|
193
|
+
├── YYYY-MM-DD_traces/ ← per-test trace.zip + error-context.md
|
|
153
194
|
├── YYYY-MM-DD_unit-coverage/
|
|
154
195
|
└── YYYY-MM-DD_screenshots/*.png
|
|
155
196
|
```
|
|
197
|
+
|
|
198
|
+
Copy Playwright's `test-results/` folder verbatim into `YYYY-MM-DD_traces/` so trace-by-test-name is available for audit without walking the HTML report's hash-name index. For HIGH/CRITICAL releases the traces are part of the audit trail — *"what state was the page in when test X failed and was overridden?"* answers in one `ls` instead of an HTML-report walk.
|
|
156
199
|
3. **Upload each artefact to the portal**:
|
|
157
200
|
```bash
|
|
158
201
|
devaudit push <project-slug> REQ-XXX <evidence-type> <file> \
|
|
@@ -181,10 +224,19 @@ Reached only on the **tracked** route from Phase 0 (the issue is already fetched
|
|
|
181
224
|
- Test plan
|
|
182
225
|
- SDLC checklist
|
|
183
226
|
2. **Verify the UAT reviewer ≠ skill-trigger user** for HIGH/CRITICAL. If they match, halt with a configuration error: "HIGH/CRITICAL risk requires an independent UAT reviewer; the configured reviewer matches the trigger user — fix the four-eyes attestation slot in the implementation plan and re-run."
|
|
227
|
+
|
|
228
|
+
**Solo-operator teams.** On a one-person team, the literal "reviewer ≠ submitter" check is structurally unsatisfiable. The supported interpretation is *actor type, not human identity* — AI tooling (the skill-trigger) and the human operator (the portal-approver) are distinct actors. Document this on the release ticket under `## Sign-off (dual-actor)` with the explicit interpretation, and ensure the human operator has independently reviewed the diff before clicking *Approve Production* in the portal. Without this attestation the four-eyes claim is performative.
|
|
184
229
|
3. **Apply labels** — `awaiting-uat-review`, `risk:<class>`.
|
|
185
230
|
4. **Comment on the issue**: "Implementation complete. PR #M opened. Evidence on portal: <link>. UAT review requested. Resume with `resume REQ-XXX` once UAT approval is granted on the portal."
|
|
186
231
|
5. **Hard stop.** Phase 4 ends here. Do not proceed to merge; the human's next action is reviewing on the portal.
|
|
187
232
|
|
|
233
|
+
**When an external gate hangs or fails for unrelated reasons.** A required gate may fail for reasons outside the change's scope — flaky infra, an unrelated regression test that hangs at hour-plus runtime with no log activity, a known-failing suite. When this happens:
|
|
234
|
+
|
|
235
|
+
1. **Verify it's actually unrelated.** Read the failure (or the lack of one). If it's the change's fault, fix it; this section does not apply.
|
|
236
|
+
2. **Document the rationale on the PR.** A sticky comment naming: which gate, what the failure was, why it's unrelated to the change, what the safety net is (nightly run on `$INTEGRATION_BRANCH`, post-deploy verification, etc.).
|
|
237
|
+
3. **Cancel-and-admin-merge is allowed** when **all three** hold: (a) ≥3 other required gates are green, (b) the change has no scope-overlap with the failing gate (e.g. service-layer fix vs hung UI e2e, or an `E2E: N/A by scope` test-plan), and (c) a fallback verification exists (nightly e2e on `$INTEGRATION_BRANCH`, post-deploy smoke, etc.). If any of the three fail, hold the merge and surface the blocker to the operator.
|
|
238
|
+
4. **Record the decision in the release ticket.** The release ticket's `## Verification` section must mention the cancelled gate by run-ID and the fallback that justifies bypassing it. Auditors look here first.
|
|
239
|
+
|
|
188
240
|
### Phase 5 — Finalise or change-request loop (SDLC stage 5)
|
|
189
241
|
|
|
190
242
|
Invoked separately by the user after UAT activity on the portal. Trigger: "resume REQ-XXX", "REQ-XXX UAT done", or just re-firing the skill on the same issue.
|