@metasession.co/devaudit-cli 0.1.52 → 0.1.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/scripts/upload-evidence.sh +41 -1
- package/sdlc/files/_common/Test_Architecture.md +1 -1
- package/sdlc/files/_common/Test_Policy.md +12 -0
- package/sdlc/files/_common/Test_Strategy.md +18 -0
- package/sdlc/files/_common/skills/adr-author/SKILL.md +1 -1
- package/sdlc/files/_common/skills/e2e-test-engineer/SKILL.md +20 -0
- package/sdlc/files/_common/skills/e2e-test-engineer/references/e2e-regression-3-tier.yml +178 -0
- package/sdlc/files/_common/skills/requirements-aligner/SKILL.md +1 -1
- package/sdlc/files/_common/skills/risk-register-keeper/SKILL.md +1 -1
- package/sdlc/files/_common/skills/sdlc-implementer/SKILL.md +21 -1
- package/sdlc/files/ci/ci.yml.template +40 -14
- package/sdlc/files/ci/compliance-evidence.yml.template +238 -16
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@metasession.co/devaudit-cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.54",
|
|
4
4
|
"description": "DevAudit CLI — installs, syncs, and operates the Metasession SDLC across consumer projects.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"@clack/prompts": "^0.8.2",
|
|
36
|
-
"@metasession.co/devaudit-plugin-sdk": "^0.1.
|
|
36
|
+
"@metasession.co/devaudit-plugin-sdk": "^0.1.54",
|
|
37
37
|
"commander": "^12.1.0",
|
|
38
38
|
"consola": "^3.2.3",
|
|
39
39
|
"env-paths": "^3.0.0",
|
|
@@ -133,6 +133,42 @@ fi
|
|
|
133
133
|
# Strip any trailing slash so we don't double-up later.
|
|
134
134
|
DEVAUDIT_BASE_URL="${DEVAUDIT_BASE_URL%/}"
|
|
135
135
|
|
|
136
|
+
# --- Base-URL drift check (devaudit-installer#143) ---
|
|
137
|
+
# When the portal moves host (e.g. devaudit.metasession.co → devaudit.ai)
|
|
138
|
+
# Cloudflare / the origin replies 301/302 with a Location header pointing
|
|
139
|
+
# at the new host. Every consumer's CI that still uses the old base URL
|
|
140
|
+
# fails every upload until DEVAUDIT_BASE_URL is rotated. We don't want
|
|
141
|
+
# the failure mode to be a silent "evidence didn't upload" — surface the
|
|
142
|
+
# drift loudly at the top of the run so the operator knows to rotate the
|
|
143
|
+
# secret. (We still upload via `curl -L` so the run itself succeeds; the
|
|
144
|
+
# warning is the nudge to fix the secret, not a hard stop.)
|
|
145
|
+
probe_base_url_drift() {
|
|
146
|
+
# `${var:-}` so `set -u` doesn't trip if curl isn't installed or the
|
|
147
|
+
# network is offline. Probe with -I (HEAD); fall back to GET if HEAD
|
|
148
|
+
# is rejected by the upstream proxy. 5s connect + 10s overall is
|
|
149
|
+
# plenty for a redirect-only probe — we never read a body.
|
|
150
|
+
local probe_url="${DEVAUDIT_BASE_URL}/api/health"
|
|
151
|
+
local resp
|
|
152
|
+
resp=$(curl -sI -o /dev/null --max-time 10 --connect-timeout 5 \
|
|
153
|
+
-w "%{http_code} %{redirect_url}" "$probe_url" 2>/dev/null || true)
|
|
154
|
+
local code="${resp%% *}"
|
|
155
|
+
local redirect_url="${resp#* }"
|
|
156
|
+
if [[ "$code" =~ ^30[1278]$ ]] && [ -n "$redirect_url" ] && [ "$redirect_url" != " " ]; then
|
|
157
|
+
local old_host new_host
|
|
158
|
+
old_host=$(printf '%s' "$DEVAUDIT_BASE_URL" | sed -E 's|^https?://([^/]+).*|\1|')
|
|
159
|
+
new_host=$(printf '%s' "$redirect_url" | sed -E 's|^https?://([^/]+).*|\1|')
|
|
160
|
+
if [ -n "$new_host" ] && [ "$old_host" != "$new_host" ]; then
|
|
161
|
+
echo "WARNING: DEVAUDIT_BASE_URL host '${old_host}' redirects to '${new_host}'."
|
|
162
|
+
echo " Rotate the DEVAUDIT_BASE_URL secret in your CI environment to"
|
|
163
|
+
echo " the new host to avoid silent breakage. (Uploads will still"
|
|
164
|
+
echo " succeed this run — curl follows the redirect — but the"
|
|
165
|
+
echo " underlying secret should be updated.)"
|
|
166
|
+
echo " Ref: https://github.com/metasession-dev/DevAudit-Installer/issues/143"
|
|
167
|
+
fi
|
|
168
|
+
fi
|
|
169
|
+
}
|
|
170
|
+
probe_base_url_drift
|
|
171
|
+
|
|
136
172
|
# --- Build metadata JSON ---
|
|
137
173
|
# Assemble entries first; only emit `{ ... }` if at least one field is
|
|
138
174
|
# set. Each entry is a `"key":"value"` JSON pair with the value
|
|
@@ -213,8 +249,12 @@ for FILE in "${FILES[@]}"; do
|
|
|
213
249
|
fi
|
|
214
250
|
FILE_SIZE=$(stat -c%s "$FILE" 2>/dev/null || stat -f%z "$FILE")
|
|
215
251
|
echo -n "Uploading ${FILENAME}... "
|
|
252
|
+
# `-L` follows 3xx redirects (devaudit-installer#143). The portal host
|
|
253
|
+
# has moved before (devaudit.metasession.co → devaudit.ai); without -L
|
|
254
|
+
# every consumer's CI silently fails on a stale base URL. `--max-redirs 3`
|
|
255
|
+
# bounds the follow so a misconfigured redirect loop can't hang CI.
|
|
216
256
|
CURL_ARGS=(
|
|
217
|
-
-X POST "$UPLOAD_URL"
|
|
257
|
+
-X POST -L --max-redirs 3 "$UPLOAD_URL"
|
|
218
258
|
-H "Authorization: Bearer ${DEVAUDIT_API_KEY}"
|
|
219
259
|
-F "file=@${FILE}"
|
|
220
260
|
-F "projectSlug=${PROJECT_SLUG}"
|
|
@@ -39,7 +39,7 @@ These standards apply to all Metasession products, client engagements, and inter
|
|
|
39
39
|
### Speed over Exhaustiveness
|
|
40
40
|
- Fast feedback prioritized (unit tests < 30 seconds)
|
|
41
41
|
- Parallelization and sharding for E2E suites
|
|
42
|
-
- Strategic test selection based on code changes
|
|
42
|
+
- Strategic test selection based on code changes — first concrete implementation is the three-tier E2E gating model (smoke / critical / regression), see Test_Strategy.md § *E2E gating model* (v0.1.53+)
|
|
43
43
|
- Regression suites optimized for execution time
|
|
44
44
|
|
|
45
45
|
### Traceability
|
|
@@ -150,6 +150,18 @@ Testing effort is prioritized by risk level, determined at planning time:
|
|
|
150
150
|
|
|
151
151
|
AI involvement in Medium or High categories raises risk by one level. The Test Strategy defines specific testing depth requirements per level.
|
|
152
152
|
|
|
153
|
+
### E2E gate enforcement (v0.1.53+)
|
|
154
|
+
|
|
155
|
+
The MoSCoW prioritisation of acceptance criteria maps onto three E2E gates, each enforced at a different point in the workflow:
|
|
156
|
+
|
|
157
|
+
- **Must-tier ACs in the smoke subset** must pass on every push to the integration branch. Blocking — a red smoke gate stops the integration hop.
|
|
158
|
+
- **Must-tier ACs in the critical subset** must pass on every PR to the release branch. Blocking — a red critical gate stops the release.
|
|
159
|
+
- **Should/Could-tier ACs (full regression)** must pass on the next post-merge run to the release branch OR a hotfix issue is auto-filed. Not pre-merge blocking — the safety net is post-hoc triage by the operator within working hours.
|
|
160
|
+
|
|
161
|
+
Operator override on a hotfix issue (accept-with-rationale) is logged on the issue itself + carried in the next release's `test-execution-summary.md` design record (devaudit#50). The framework does not permit silently shipping a failing test — every red regression spec ends as either fixed, reverted, or accepted-with-recorded-rationale.
|
|
162
|
+
|
|
163
|
+
See Test_Strategy.md § *System Testing (E2E)* — *E2E gating model* for the tier definitions + cost philosophy.
|
|
164
|
+
|
|
153
165
|
---
|
|
154
166
|
|
|
155
167
|
## Roles & Responsibilities
|
|
@@ -38,6 +38,24 @@ Validates interactions between system components — API contracts, service inte
|
|
|
38
38
|
|
|
39
39
|
End-to-end validation of complete user workflows from UI to database. Primary responsibility of the QA team. Automated using BDD frameworks that map acceptance criteria to executable specifications. Covers 100% of critical user paths.
|
|
40
40
|
|
|
41
|
+
#### E2E gating model — three tiers (devaudit#152 follow-up, v0.1.53)
|
|
42
|
+
|
|
43
|
+
Full E2E regression on every PR is expensive — a 30+ minute wait per release-PR blocks velocity for diminishing marginal safety once smoke covers the headline flows. The framework's gating model maps the existing MoSCoW prioritisation onto three tiers, each gated at a different point in the workflow:
|
|
44
|
+
|
|
45
|
+
| Tier | Location | When it runs | Wall-clock target | Audit role |
|
|
46
|
+
|---|---|---|---|---|
|
|
47
|
+
| **smoke** | `e2e/smoke/*.spec.ts` | every push to `$INTEGRATION_BRANCH` (via `ci.yml`) | ~3–5 min | fast feedback on every change |
|
|
48
|
+
| **critical** | `e2e/smoke/` + `e2e/critical/*.spec.ts` | PR-to-`$RELEASE_BRANCH` (via `e2e-regression.yml`) | ~10–15 min | release-readiness Must gate |
|
|
49
|
+
| **regression** | all `e2e/**/*.spec.ts` | nightly + push-to-`$RELEASE_BRANCH` + `workflow_dispatch` | ~35 min (or your project's full pack) | full audit trail + drift catch |
|
|
50
|
+
|
|
51
|
+
The mapping to MoSCoW: **Must-priority SRS items live in `e2e/smoke/` (fast feedback) and `e2e/critical/` (release gate); Should/Could items live in `e2e/` and are covered by the regression tier.** The classifier is the developer authoring the spec — see `skills/e2e-test-engineer/SKILL.md` Phase 3 for the decision tree.
|
|
52
|
+
|
|
53
|
+
**Cost philosophy.** Smoke protects every push from breaking the headline flow. Critical protects every release from a Must-tier regression. Full regression protects the audit trail + catches drift overnight. We accept that a Should/Could-tier regression *can* slip past the PR gate; we catch it on the next post-merge run + auto-file a hotfix issue. The framework prefers this over a 35-min wait on every release because operator velocity matters and the safety net stays intact.
|
|
54
|
+
|
|
55
|
+
**Post-merge safety net.** Every push to `$RELEASE_BRANCH` re-runs the full regression. On failure, `e2e-regression.yml` auto-files a `bug, priority:high` issue tagging the merge commit + the failing specs. The operator triages within working hours — hotfix forward, revert the commit, or accept-with-rationale if the failure is environmental. No automated revert (false positives + flakes + UAT-data drift are real classes; an operator triages each individually).
|
|
56
|
+
|
|
57
|
+
**Reference workflow.** A copy-pasteable `e2e-regression.yml` shape lives at `skills/e2e-test-engineer/references/e2e-regression-3-tier.yml`. Adoption is opt-in per consumer (the framework doesn't currently sync this workflow; consumers own their own `e2e-regression.yml`).
|
|
58
|
+
|
|
41
59
|
### Acceptance Testing
|
|
42
60
|
|
|
43
61
|
Validates that requirements and acceptance criteria are met from a business perspective. Conducted in staging environments mirroring production. Requires sign-off from Product Managers. May include formal UAT with stakeholders for regulated features.
|
|
@@ -206,7 +206,7 @@ I have reviewed the ADR-worthiness verdict above and confirm:
|
|
|
206
206
|
|
|
207
207
|
**Step 2 — Tag for upload.** The CI's `compliance-evidence.yml` uploads this file as `evidence_type=architecture_decision` (added to META-COMPLY's `EVIDENCE_TYPE_REGISTRY` in the paired sub-PR). The framework-coverage matrix maps this to clauses per `framework-registry-auditor`'s review — see the META-COMPLY-side PR for the final clause attributions (v1 may ship orphan-by-design if the auditor rejects proposed mappings; see [`requirements-aligner`](../requirements-aligner/SKILL.md) for the precedent).
|
|
208
208
|
|
|
209
|
-
**Step 3 —
|
|
209
|
+
**Step 3 — Return to the running `sdlc-implementer` context.** The skill's job ends at the artefact + the operator sign-off. The orchestrator immediately continues with the rest of Stage 3 inline — no pause, no operator nudge needed. (Skills run in the same invocation context; control returns synchronously when this skill exits. See `sdlc-implementer/SKILL.md` § *Sub-skill return semantics*.)
|
|
210
210
|
|
|
211
211
|
### Phase 3 — Per-REQ ad-hoc audit
|
|
212
212
|
|
|
@@ -131,6 +131,26 @@ Resist padding. A new endpoint doesn't need a test that re-verifies login if log
|
|
|
131
131
|
|
|
132
132
|
For each scenario, write a one-line description. Present the full grouped list to the user before writing any code: *"Here's the coverage I'd propose — anything to add or drop?"*
|
|
133
133
|
|
|
134
|
+
#### Classify each spec into a tier (devaudit#152 follow-up, v0.1.53)
|
|
135
|
+
|
|
136
|
+
When designing each scenario, also pick the tier it'll live in. Three tiers map to MoSCoW priority + gating point (see `Test_Strategy.md` § *E2E gating model*):
|
|
137
|
+
|
|
138
|
+
| Tier | File location | Picks this when… |
|
|
139
|
+
|---|---|---|
|
|
140
|
+
| **smoke** | `e2e/smoke/*.spec.ts` | Cross-cutting sanity that proves the app is up: login, basic nav, one canonical CRUD per main domain. Runs on every push to the integration branch. Keep small — total smoke wall-clock target is ~3–5 min. |
|
|
141
|
+
| **critical** | `e2e/critical/*.spec.ts` | Must-priority SRS item that breaks a headline flow if it regresses. Examples: payment authorisation, order completion, admin permission editing, RBAC enforcement on financial surfaces. Runs on PR-to-release-branch. Total critical wall-clock target ~10–15 min (includes smoke). |
|
|
142
|
+
| **regression** | `e2e/<area>/*.spec.ts` | Should/Could-priority SRS item, edge cases, less-load-bearing flows. Runs nightly + post-merge + dispatch. Total full pack can be 30+ min; that's the point of the tier. |
|
|
143
|
+
|
|
144
|
+
Decision tree, applied per scenario:
|
|
145
|
+
|
|
146
|
+
1. **Does the spec prove a Must-priority SRS AC (or a baseline "app is up" sanity check)?** → smoke or critical.
|
|
147
|
+
2. **Within Must: would a regression here break a headline business flow visible to a paying customer or stop a release from shipping?** → critical. Otherwise → smoke.
|
|
148
|
+
3. **Should/Could priority, edge case, advanced flow?** → regression (file under `e2e/<area>/`, not under `e2e/smoke/` or `e2e/critical/`).
|
|
149
|
+
|
|
150
|
+
When you can't decide between critical and regression, default to **regression** — promoting a spec from regression → critical later is cheap (move the file); demoting in the other direction is rarely needed but equally cheap. The cost of putting a Should spec in critical is everyone waiting longer on every PR-to-main for a low-value signal.
|
|
151
|
+
|
|
152
|
+
Record the tier choice in the eventual `test-execution-summary.md` § *Test design* (devaudit#50) — Layers covered should name which tier each new spec landed in. Reviewers verify the tier choice is defensible during the WAIT CHECKPOINT.
|
|
153
|
+
|
|
134
154
|
### Phase 4 — Reconcile with existing tests
|
|
135
155
|
|
|
136
156
|
For the area touched by the change, look at what's already there.
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Reference: three-tier E2E gating workflow (devaudit#152 follow-up, v0.1.53)
|
|
2
|
+
#
|
|
3
|
+
# Copy this into your consumer-owned .github/workflows/e2e-regression.yml
|
|
4
|
+
# to adopt the 3-tier model: smoke (every develop push, fast) / critical
|
|
5
|
+
# (PR-to-main, ~10-15 min target) / regression (nightly + push-to-main +
|
|
6
|
+
# dispatch, full audit trail with auto-issue on failure).
|
|
7
|
+
#
|
|
8
|
+
# The framework does NOT sync this file automatically — your consumer
|
|
9
|
+
# owns its e2e-regression.yml. Apply the patterns below to your own
|
|
10
|
+
# file; keep any consumer-specific env / matrix / runner customisations.
|
|
11
|
+
#
|
|
12
|
+
# Tier definitions:
|
|
13
|
+
# - smoke — runs on develop push via ci.yml (no change here)
|
|
14
|
+
# - critical — Playwright project that selects e2e/smoke/ + e2e/critical/
|
|
15
|
+
# - regression — Playwright project that selects all e2e/**/*.spec.ts
|
|
16
|
+
#
|
|
17
|
+
# playwright.config.ts must define the `critical` project for this to
|
|
18
|
+
# fire; if it doesn't, the gate falls back to the existing `smoke`
|
|
19
|
+
# project so PR-to-main stays green during migration.
|
|
20
|
+
|
|
21
|
+
name: E2E Regression
|
|
22
|
+
|
|
23
|
+
on:
|
|
24
|
+
pull_request:
|
|
25
|
+
branches: [main] # critical-tier gate before merge
|
|
26
|
+
push:
|
|
27
|
+
branches: [main] # full regression after merge; auto-issues on failure
|
|
28
|
+
schedule:
|
|
29
|
+
- cron: '0 2 * * *' # nightly full regression
|
|
30
|
+
workflow_dispatch:
|
|
31
|
+
inputs:
|
|
32
|
+
specs:
|
|
33
|
+
description: 'Optional: space-separated spec paths or --grep pattern for a scoped run. Empty = full regression.'
|
|
34
|
+
required: false
|
|
35
|
+
|
|
36
|
+
permissions:
|
|
37
|
+
contents: read
|
|
38
|
+
issues: write # post-merge auto-issue on regression failure
|
|
39
|
+
|
|
40
|
+
concurrency:
|
|
41
|
+
group: e2e-regression-${{ github.ref }}
|
|
42
|
+
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
|
43
|
+
|
|
44
|
+
jobs:
|
|
45
|
+
e2e:
|
|
46
|
+
name: E2E Regression Tests
|
|
47
|
+
runs-on: ubuntu-latest # adapt to your runner; e.g. self-hosted, ubuntu-24.04
|
|
48
|
+
steps:
|
|
49
|
+
- uses: actions/checkout@v4
|
|
50
|
+
with:
|
|
51
|
+
fetch-depth: 0 # for E2E_NEW_SPECS computation
|
|
52
|
+
|
|
53
|
+
- uses: actions/setup-node@v4
|
|
54
|
+
with:
|
|
55
|
+
node-version: '22' # match your project
|
|
56
|
+
cache: 'npm'
|
|
57
|
+
|
|
58
|
+
- name: Install dependencies
|
|
59
|
+
run: npm ci --legacy-peer-deps
|
|
60
|
+
|
|
61
|
+
- name: Install Playwright browsers
|
|
62
|
+
run: npx playwright install --with-deps chromium
|
|
63
|
+
|
|
64
|
+
# Decide which Playwright project to run based on the trigger.
|
|
65
|
+
# PR-to-main uses critical with smoke fall-back; push-to-main and
|
|
66
|
+
# schedule run the full regression project; workflow_dispatch
|
|
67
|
+
# accepts an optional spec filter.
|
|
68
|
+
- name: Determine E2E project + spec selector
|
|
69
|
+
id: select
|
|
70
|
+
run: |
|
|
71
|
+
set -euo pipefail
|
|
72
|
+
EVENT="${{ github.event_name }}"
|
|
73
|
+
case "$EVENT" in
|
|
74
|
+
pull_request)
|
|
75
|
+
if grep -qE "name:\s*['\"]critical['\"]" playwright.config.ts 2>/dev/null; then
|
|
76
|
+
echo "project=critical" >> "$GITHUB_OUTPUT"
|
|
77
|
+
echo "Using critical-tier project (smoke + e2e/critical/)"
|
|
78
|
+
else
|
|
79
|
+
echo "project=smoke" >> "$GITHUB_OUTPUT"
|
|
80
|
+
echo "::warning::No 'critical' Playwright project defined; falling back to smoke. See e2e-test-engineer/references/e2e-regression-3-tier.yml + the Phase 3 tier-classification guide."
|
|
81
|
+
fi
|
|
82
|
+
echo "specs=" >> "$GITHUB_OUTPUT"
|
|
83
|
+
;;
|
|
84
|
+
push|schedule)
|
|
85
|
+
echo "project=regression" >> "$GITHUB_OUTPUT"
|
|
86
|
+
echo "specs=" >> "$GITHUB_OUTPUT"
|
|
87
|
+
echo "Running full regression project"
|
|
88
|
+
;;
|
|
89
|
+
workflow_dispatch)
|
|
90
|
+
echo "project=regression" >> "$GITHUB_OUTPUT"
|
|
91
|
+
echo "specs=${{ github.event.inputs.specs }}" >> "$GITHUB_OUTPUT"
|
|
92
|
+
if [ -n "${{ github.event.inputs.specs }}" ]; then
|
|
93
|
+
echo "Scoped dispatch: ${{ github.event.inputs.specs }}"
|
|
94
|
+
fi
|
|
95
|
+
;;
|
|
96
|
+
esac
|
|
97
|
+
|
|
98
|
+
- name: Run E2E suite
|
|
99
|
+
id: run
|
|
100
|
+
env:
|
|
101
|
+
PLAYWRIGHT_HTML_REPORTER_OPEN: never
|
|
102
|
+
PLAYWRIGHT_JSON_OUTPUT_NAME: e2e-regression-results.json
|
|
103
|
+
# Add your e2e_env values here as needed (DEVAUDIT_BASE_URL etc.)
|
|
104
|
+
run: |
|
|
105
|
+
set -euo pipefail
|
|
106
|
+
PROJECT="${{ steps.select.outputs.project }}"
|
|
107
|
+
SPECS="${{ steps.select.outputs.specs }}"
|
|
108
|
+
if [ -n "$SPECS" ]; then
|
|
109
|
+
npx playwright test --project="$PROJECT" --reporter=json,html $SPECS
|
|
110
|
+
else
|
|
111
|
+
npx playwright test --project="$PROJECT" --reporter=json,html
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
- uses: actions/upload-artifact@v4
|
|
115
|
+
if: always()
|
|
116
|
+
with:
|
|
117
|
+
name: e2e-regression-report
|
|
118
|
+
path: |
|
|
119
|
+
e2e-regression-results.json
|
|
120
|
+
playwright-report/
|
|
121
|
+
test-results/
|
|
122
|
+
|
|
123
|
+
# ─────────────────────────────────────────────────────────────
|
|
124
|
+
# Post-merge auto-issue on regression failure (push:branches:[main])
|
|
125
|
+
#
|
|
126
|
+
# Catches regressions that slipped past the critical-tier PR gate.
|
|
127
|
+
# Opens a high-priority issue tagging the merge commit + the
|
|
128
|
+
# failing specs so the operator can triage within working hours.
|
|
129
|
+
# No auto-revert — that's intentionally an operator decision.
|
|
130
|
+
# ─────────────────────────────────────────────────────────────
|
|
131
|
+
- name: Open hotfix issue on post-merge regression
|
|
132
|
+
if: failure() && github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
133
|
+
env:
|
|
134
|
+
GH_TOKEN: ${{ github.token }}
|
|
135
|
+
run: |
|
|
136
|
+
set -euo pipefail
|
|
137
|
+
MERGE_SHA="${{ github.sha }}"
|
|
138
|
+
MERGE_SHA_SHORT=$(echo "$MERGE_SHA" | cut -c1-7)
|
|
139
|
+
RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
|
140
|
+
|
|
141
|
+
# Extract failing spec names from the JSON reporter output if available.
|
|
142
|
+
FAILING=""
|
|
143
|
+
if [ -f e2e-regression-results.json ]; then
|
|
144
|
+
FAILING=$(jq -r '
|
|
145
|
+
[.. | objects | select(.status == "failed" or .status == "timedOut") | .title // empty]
|
|
146
|
+
| unique | .[]
|
|
147
|
+
' e2e-regression-results.json 2>/dev/null | head -20 || true)
|
|
148
|
+
fi
|
|
149
|
+
if [ -z "$FAILING" ]; then
|
|
150
|
+
FAILING="(see the failing run logs — could not parse spec titles from reporter output)"
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
BODY=$(cat <<EOF
|
|
154
|
+
## Post-merge regression caught on \`main\`
|
|
155
|
+
|
|
156
|
+
The full regression suite failed on the post-merge run for commit \`${MERGE_SHA_SHORT}\`. The critical-tier PR gate let this slip through.
|
|
157
|
+
|
|
158
|
+
**Failing specs (best-effort extracted from the JSON reporter):**
|
|
159
|
+
|
|
160
|
+
\`\`\`
|
|
161
|
+
${FAILING}
|
|
162
|
+
\`\`\`
|
|
163
|
+
|
|
164
|
+
**Triage actions:**
|
|
165
|
+
|
|
166
|
+
- [ ] Read the run log: ${RUN_URL}
|
|
167
|
+
- [ ] Pull \`e2e-regression-report\` artifact from the run; inspect \`test-results/<spec>/error-context.md\` for page state at failure
|
|
168
|
+
- [ ] Decide: hotfix on \`main\`, revert \`${MERGE_SHA_SHORT}\`, or accept-with-rationale if the failure is environmental
|
|
169
|
+
- [ ] If the failing spec is a Must-tier candidate that should have caught this pre-merge, move it from \`e2e/\` to \`e2e/critical/\` so the next PR-to-main runs it
|
|
170
|
+
|
|
171
|
+
**Auto-filed by:** \`e2e-regression.yml\` (devaudit#152 3-tier gating, v0.1.53+)
|
|
172
|
+
EOF
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
gh issue create \
|
|
176
|
+
--title "[hotfix] Post-merge regression on \`${MERGE_SHA_SHORT}\` — full E2E failed" \
|
|
177
|
+
--body "$BODY" \
|
|
178
|
+
--label "bug,priority:high"
|
|
@@ -132,7 +132,7 @@ I have reviewed the AC-to-SRS-item traces above and confirm:
|
|
|
132
132
|
|
|
133
133
|
**Step 2 — Tag for upload.** The CI's `compliance-evidence.yml` uploads this file as `evidence_type=srs_alignment` (added to META-COMPLY's `EVIDENCE_TYPE_REGISTRY` in the paired sub-PR). The framework-coverage matrix then closes `ISO29119.3.4` (Test Plan — requirements traceability) and `SOC2.CC2.1` (Communication of policies — when paired with INSTRUCTIONS.md) for this REQ.
|
|
134
134
|
|
|
135
|
-
**Step 3 —
|
|
135
|
+
**Step 3 — Return to the running `sdlc-implementer` context.** The skill's job ends at the artefact + the operator sign-off. The orchestrator immediately continues with the rest of Stage 3 (security summary, evidence upload, release ticket) inline — no pause, no operator nudge needed. (Skills run in the same invocation context; control returns synchronously when this skill exits. See `sdlc-implementer/SKILL.md` § *Sub-skill return semantics*.)
|
|
136
136
|
|
|
137
137
|
### Phase 3 — Per-REQ ad-hoc audit
|
|
138
138
|
|
|
@@ -163,7 +163,7 @@ I have reviewed the risk register entries above and confirm:
|
|
|
163
163
|
|
|
164
164
|
**Step 2 — Tag for upload.** The CI's `compliance-evidence.yml` uploads this file as `evidence_type=risk_assessment` (added to META-COMPLY's `EVIDENCE_TYPE_REGISTRY` in the paired sub-PR). The framework-coverage matrix attribution depends on `framework-registry-auditor`'s review — see the META-COMPLY-side PR for final clause closures.
|
|
165
165
|
|
|
166
|
-
**Step 3 —
|
|
166
|
+
**Step 3 — Return to the running `sdlc-implementer` context.** The skill's job ends at the artefact + the operator sign-off. The orchestrator immediately continues with the rest of Stage 3 inline — no pause, no operator nudge needed. (Skills run in the same invocation context; control returns synchronously when this skill exits. See `sdlc-implementer/SKILL.md` § *Sub-skill return semantics*.)
|
|
167
167
|
|
|
168
168
|
### Phase 4 — `solo_with_gap` approval check
|
|
169
169
|
|
|
@@ -45,6 +45,20 @@ The orchestrator MUST invoke `e2e-test-engineer` for end-to-end and visual-regre
|
|
|
45
45
|
|
|
46
46
|
Unit-test and integration-test work stays with this skill until a counterpart unit-test skill ships. The full sub-skill call graph lives at [`references/call-graph.md`](./references/call-graph.md).
|
|
47
47
|
|
|
48
|
+
## Sub-skill return semantics (devaudit-installer#144)
|
|
49
|
+
|
|
50
|
+
**Sub-skills return findings synchronously; do not wait for operator confirmation between sub-skill returns.** Invoking a Skill loads its instructions into this same invocation context — there is no separate agent, no separate process. When a sub-skill emits its final summary and stops, this orchestrator's next step runs immediately, in the same turn if possible.
|
|
51
|
+
|
|
52
|
+
The literal phrasing _"Returns to the running `sdlc-implementer` context"_ at the tail of each sub-skill (`requirements-aligner`, `adr-author`, `risk-register-keeper`, and any future siblings) does **not** mean "pause and wait for the operator to nudge you" — it means "you have control again; keep going with the parent workflow." A chain of three sub-skill calls in Phase 1 (steps 6 → 7 → 8) or Phase 3 (steps 1 → 2 → 3) is a single flowing sequence; do not stop between them.
|
|
53
|
+
|
|
54
|
+
The only pauses in the whole workflow are the explicitly-named checkpoints:
|
|
55
|
+
|
|
56
|
+
- **Phase 1 step 11** — pause for human approval **iff** risk class is HIGH or CRITICAL (or `--require-plan-approval` is set).
|
|
57
|
+
- **Phase 4 step 5** — hard stop, release PR opened, awaiting UAT review on the portal.
|
|
58
|
+
- **Phase 5** — invoked separately by the user (`resume REQ-XXX`).
|
|
59
|
+
|
|
60
|
+
Everything else is silent continuation. The rule is **opt-in-to-pause, not opt-out-of-pause**. If you find yourself stopping after a sub-skill's "Return to the running `sdlc-implementer` context" line and waiting for the operator to ask _"is anything happening?"_ — that is the bug this section exists to prevent. Keep going.
|
|
61
|
+
|
|
48
62
|
## SDLC navigability — LAST/NEXT status sticky (devaudit#131)
|
|
49
63
|
|
|
50
64
|
Long-running SDLC issues accumulate dozens of comments across multiple Claude Code sessions. The operator returning to the thread should be able to answer two questions in under five seconds:
|
|
@@ -204,13 +218,19 @@ _Workflow tweak (CI artifact upload, gate timeout bump, etc.)_
|
|
|
204
218
|
|
|
205
219
|
Reached from Phase 0 for non-tracked change-types. The skill drives this end-to-end; the only difference from the tracked cycle is the absence of _ceremony_, not the absence of _guidance_. It pauses only where a human is genuinely required (PR review, merge).
|
|
206
220
|
|
|
221
|
+
**CI trigger shape — read once before step 7.** The DevAudit-Installer-generated `ci.yml.template` defaults to **post-merge-only** triggers (`push: branches: [<integration>]`, no `pull_request:` trigger). On these projects there will be **no PR-time checks** to wait for — review + merge is the gate, and the post-merge CI run on the integration branch is the actual quality gate. A consumer who has explicitly added a `pull_request:` trigger has PR-time CI in addition. The skill must adapt step 7's wording to whichever shape the project uses; never poll a PR for checks that the template doesn't trigger.
|
|
222
|
+
|
|
207
223
|
1. **Branch off `$INTEGRATION_BRANCH`** with a housekeeping prefix — `chore/…`, `docs/…`, `ci/…`, `build/…`, `test/…`, or `compliance/…` for a doc-only change against an existing REQ.
|
|
208
224
|
2. **Make the change**, single-purpose. If it turns out to touch runtime behaviour in `app/` / `lib/`, stop and reclassify as tracked — the commit-type rule is the backstop.
|
|
209
225
|
3. **Run all gates locally** (`npm run lint`, `npx tsc --noEmit`, the test suite, `semgrep`, `npm audit` — or the stack-adapter equivalents). Trivial ≠ unverified; never `--no-verify`.
|
|
210
226
|
4. **Commit** with a housekeeping type and **no** `REQ-XXX` — `docs:` / `chore:` / `ci:` / `build:` / `test:` / `revert:` are exempt from the `[REQ-XXX]` rule; a `compliance:` doc-only change references the existing REQ. `Co-Authored-By: Claude` if AI-assisted.
|
|
211
227
|
5. **Push and open the PR** into `$INTEGRATION_BRANCH` (`gh pr create --base "$INTEGRATION_BRANCH" --head <branch>`). CI runs the same quality gates; `compliance-validation.yml` finds no `REQ-XXX` and skips artifact validation.
|
|
212
228
|
6. **For `ci:` changes, verify-via-dispatch before merging.** `gh workflow run <workflow.yml> --ref <branch>` fires the modified workflow against the PR branch. If the change broke a step, the dispatch run fails loudly and you fix-forward _before_ the merge ships the broken gate to `$INTEGRATION_BRANCH`. This is the cheapest insurance against silent CI regressions — a `ci:` change that breaks a gate is most damaging _after_ it lands.
|
|
213
|
-
7. **Report honest status
|
|
229
|
+
7. **Report honest status — adapt to the project's CI trigger shape (devaudit-installer#145).** Check whether `.github/workflows/ci.yml` has a `pull_request:` trigger.
|
|
230
|
+
- **PR-time CI present** — wait for CI to settle, name any failing check, fix and re-push. Never announce "ready" while a required check is red.
|
|
231
|
+
- **Post-merge-only CI (the DevAudit-Installer default — `push: branches: [<integration>]` with no `pull_request:` trigger)** — say so explicitly in the LAST/NEXT sticky: _"no PR-time checks will fire; review + merge is the gate; CI runs post-merge on `$INTEGRATION_BRANCH`."_ Don't poll the PR for checks that won't arrive. The post-merge run (CI Pipeline + Compliance Evidence Upload on the integration branch) is the actual gate; address it via fix-forward if it fails.
|
|
232
|
+
|
|
233
|
+
Either way, never bypass a gate (no `--no-verify`, no `--admin` merge of a red required check); the only difference is **where** you wait for the gate to fire — before merge vs. after merge.
|
|
214
234
|
8. **Guide review → merge.** A human still reviews the PR (separation of duties). There is **no** portal release approval, no UAT four-eyes, no Production gate, and no close-out. Merge once CI is green and the reviewer approves.
|
|
215
235
|
9. **Done.** A housekeeping push produces at most a bare-date release (`vYYYY.MM.DD`) with no approval gate; a doc-only push attaches its docs to the existing `REQ-XXX` release. No further action required — report completion and stop.
|
|
216
236
|
|
|
@@ -484,8 +484,21 @@ jobs:
|
|
|
484
484
|
# moment each acceptance criterion is demonstrated, NOT the Playwright
|
|
485
485
|
# report's trailing/failure capture. evidenceType `screenshot` →
|
|
486
486
|
# image/png renders inline. Only when a pending release ticket defines
|
|
487
|
-
# the in-scope REQ(s); skipped on ordinary dev pushes.
|
|
488
|
-
#
|
|
487
|
+
# the in-scope REQ(s); skipped on ordinary dev pushes.
|
|
488
|
+
#
|
|
489
|
+
# devaudit-installer#147 — per-REQ glob scoping + UPLOAD_FAILURES
|
|
490
|
+
# accounting. Previously the SHOTS glob was project-wide
|
|
491
|
+
# (`ci-evidence/compliance/evidence/*/screenshots/*.png`) and the
|
|
492
|
+
# for-REQ loop uploaded that cross-product against every in-scope
|
|
493
|
+
# REQ. Out-of-scope REQs' legacy PNGs whose filenames don't match
|
|
494
|
+
# `REQ-XXX-AC<n>-<slug>.png` then 400'd at the portal — and the
|
|
495
|
+
# `|| echo "::warning::..."` swallowed the failure so the step
|
|
496
|
+
# reported SUCCESS while screenshots were quietly missing. Now each
|
|
497
|
+
# in-scope REQ globs its OWN screenshots/ directory; out-of-scope
|
|
498
|
+
# legacy folders are intentionally ignored. To re-upload an older
|
|
499
|
+
# REQ's screenshots, add its release ticket back under
|
|
500
|
+
# compliance/pending-releases/. Failures now bump UPLOAD_FAILURES
|
|
501
|
+
# so the step exits non-zero and the release PR turns red.
|
|
489
502
|
SHOT_REQS=()
|
|
490
503
|
if [ -d compliance/pending-releases ]; then
|
|
491
504
|
for TICKET in compliance/pending-releases/RELEASE-TICKET-REQ-*.md; do
|
|
@@ -494,14 +507,8 @@ jobs:
|
|
|
494
507
|
done
|
|
495
508
|
fi
|
|
496
509
|
shopt -s nullglob
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
# copies under compliance/evidence/ are redundant here — globbing both
|
|
500
|
-
# uploaded every image twice (deduped on display, but wasteful + rate-limit
|
|
501
|
-
# pressure) and swept in legacy screenshots from unrelated past releases.
|
|
502
|
-
SHOTS=(ci-evidence/compliance/evidence/*/screenshots/*.png)
|
|
503
|
-
if [ "${#SHOT_REQS[@]}" -gt 0 ] && [ "${#SHOTS[@]}" -gt 0 ]; then
|
|
504
|
-
echo "Uploading ${#SHOTS[@]} evidence screenshot(s) for: ${SHOT_REQS[*]}"
|
|
510
|
+
if [ "${#SHOT_REQS[@]}" -gt 0 ]; then
|
|
511
|
+
echo "Uploading per-AC evidence screenshots for: ${SHOT_REQS[*]}"
|
|
505
512
|
SHOT_TMP="$(mktemp -d)"
|
|
506
513
|
for REQ in "${SHOT_REQS[@]}"; do
|
|
507
514
|
# Per-REQ release metadata for the portal (no-clobbered on existing rows):
|
|
@@ -518,7 +525,16 @@ jobs:
|
|
|
518
525
|
REQ_META=()
|
|
519
526
|
[ -n "$REQ_TITLE" ] && REQ_META+=(--release-title "$REQ_TITLE")
|
|
520
527
|
[ -n "$REQ_CT" ] && REQ_META+=(--change-type "$REQ_CT")
|
|
521
|
-
|
|
528
|
+
# devaudit-installer#147 — per-REQ scoped glob. ONLY this REQ's
|
|
529
|
+
# own screenshots/ directory; legacy folders for REQs not in
|
|
530
|
+
# SHOT_REQS are intentionally ignored.
|
|
531
|
+
REQ_SHOTS=(ci-evidence/compliance/evidence/"$REQ"/screenshots/*.png)
|
|
532
|
+
if [ "${#REQ_SHOTS[@]}" -eq 0 ]; then
|
|
533
|
+
echo "No per-AC screenshots for ${REQ} (none captured by evidenceShot this run)"
|
|
534
|
+
continue
|
|
535
|
+
fi
|
|
536
|
+
echo "Uploading ${#REQ_SHOTS[@]} screenshot(s) for ${REQ}"
|
|
537
|
+
for PNG in "${REQ_SHOTS[@]}"; do
|
|
522
538
|
# The basename is the canonical evidenceShot filename
|
|
523
539
|
# `REQ-XXX-AC<n>-<slug>.png`. The portal validates this
|
|
524
540
|
# shape on upload — anything else is rejected with 400.
|
|
@@ -540,12 +556,22 @@ jobs:
|
|
|
540
556
|
fi
|
|
541
557
|
fi
|
|
542
558
|
|
|
543
|
-
bash scripts/upload-evidence.sh \
|
|
559
|
+
if bash scripts/upload-evidence.sh \
|
|
544
560
|
{{PROJECT_SLUG}} "$REQ" screenshot "$NAMED" \
|
|
545
561
|
--category test_report ${FLAGS} --release "$REQ" \
|
|
546
562
|
"${REQ_META[@]}" \
|
|
547
|
-
"${ORIGIN_META[@]}"
|
|
548
|
-
|
|
563
|
+
"${ORIGIN_META[@]}"
|
|
564
|
+
then
|
|
565
|
+
:
|
|
566
|
+
else
|
|
567
|
+
# devaudit-installer#147 — feed the same UPLOAD_FAILURES
|
|
568
|
+
# counter that gate uploads use, so a screenshot
|
|
569
|
+
# rejected by the portal's filename validator (or any
|
|
570
|
+
# other failure mode) turns the step RED instead of
|
|
571
|
+
# silently warning.
|
|
572
|
+
echo "::warning::evidence screenshot upload failed: ${PNG} -> ${REQ}"
|
|
573
|
+
UPLOAD_FAILURES=$((UPLOAD_FAILURES + 1))
|
|
574
|
+
fi
|
|
549
575
|
done
|
|
550
576
|
done
|
|
551
577
|
fi
|