argusqa-os 9.7.6 → 9.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/glama.json +2 -2
- package/package.json +6 -6
- package/src/cli/pr-validate.js +275 -56
- package/src/mcp-server.js +142 -26
- package/src/orchestration/crawl-and-report.js +1 -1
- package/src/orchestration/orchestrator.js +34 -0
- package/src/utils/audit-depth.js +148 -0
- package/src/utils/deploy-preview.js +210 -0
- package/src/utils/github-api.js +242 -0
- package/src/utils/github-reporter.js +251 -39
- package/src/utils/html-reporter.js +283 -92
- package/src/utils/import-graph.js +290 -0
- package/src/utils/parallel-crawler.js +202 -0
- package/src/utils/pr-baseline.js +230 -0
- package/src/utils/pr-diff-analyzer.js +378 -40
- package/src/utils/route-discoverer.js +25 -3
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/argusqa-os)
|
|
6
6
|
[](https://glama.ai/mcp/servers/ironclawdevs27/Argus)
|
|
7
|
-
[](test-harness/)
|
|
8
8
|
[](LICENSE)
|
|
9
9
|
|
|
10
10
|
**Argus catches the bugs your test suite misses — visual regressions, API loops, CSS drift, console noise, accessibility failures, and more — and delivers rich reports to Slack (or a local HTML dashboard).**
|
|
@@ -109,7 +109,7 @@ Ask Claude (or any MCP client) — no terminal required:
|
|
|
109
109
|
| `argus_last_report` | Return last JSON report without re-running |
|
|
110
110
|
| `argus_design_audit` | Figma URL → 13 design-token finding types (color, spacing, typography, shadows, etc.) |
|
|
111
111
|
| `argus_visual_diff` | Screenshot baseline comparison. Pass `updateBaseline: true` to reset. |
|
|
112
|
-
| `argus_pr_validate` | Fetch GitHub PR diff → map changed files to affected routes → targeted audit → `{ blocked, findings }` |
|
|
112
|
+
| `argus_pr_validate` | Fetch GitHub PR diff → map changed files to affected routes → targeted audit → baseline-aware block decision (blocks on findings the PR *introduces*) + idempotent PR comment + Check Run → `{ blocked, findings, baseline, reporting }` |
|
|
113
113
|
|
|
114
114
|
**Example prompts:**
|
|
115
115
|
|
|
@@ -217,8 +217,8 @@ npm run report:html # Generate reports/report.html from last JSON audit
|
|
|
217
217
|
npm run report:pdf # Export HTML report to A4 PDF (requires: npm install puppeteer)
|
|
218
218
|
npm run server # Start Slack slash-command server (port 3001)
|
|
219
219
|
npm run init # Interactive setup wizard
|
|
220
|
-
npm run test:unit #
|
|
221
|
-
npm run test:harness #
|
|
220
|
+
npm run test:unit # 366 unit tests — no Chrome required
|
|
221
|
+
npm run test:harness # 166-block correctness harness — requires Chrome
|
|
222
222
|
npm run test:harness:log # same, but tees full output to harness-results.txt
|
|
223
223
|
npm run test:coverage # merged unit + harness coverage gate (requires Chrome)
|
|
224
224
|
```
|
|
@@ -283,7 +283,7 @@ The included [workflow](.github/workflows/argus.yml) runs on push to `main`, dai
|
|
|
283
283
|
| `ARGUS_RETRY_ATTEMPTS` | `3` | Max retries for `navigate`/`fill` MCP calls |
|
|
284
284
|
| `ARGUS_WATCH_INTERVAL_MS` | `1000` | Watch mode poll interval (ms) |
|
|
285
285
|
| `ARGUS_WATCH_UI_PORT` | `3002` | Watch mode web dashboard port |
|
|
286
|
-
| `ARGUS_SOURCE_DIR` | — | App source path — enables env-var / feature-flag / dead-route analysis |
|
|
286
|
+
| `ARGUS_SOURCE_DIR` | — | App source path — enables env-var / feature-flag / dead-route analysis **and** framework-aware PR route mapping (import-graph: a changed component/stylesheet → only the routes that render it) |
|
|
287
287
|
| `ARGUS_ENV_FILE` | — | Path to app `.env` for codebase cross-reference |
|
|
288
288
|
| `SCREENSHOT_DIFF_THRESHOLD` | `0.5` | Pixel diff % threshold for environment comparison |
|
|
289
289
|
| `GITHUB_TOKEN` | — | For PR comments + Check Runs |
|
|
@@ -343,7 +343,7 @@ Argus is a **complementary layer**, not a replacement for unit or E2E tests:
|
|
|
343
343
|
|
|
344
344
|
## Known Limitations
|
|
345
345
|
|
|
346
|
-
All
|
|
346
|
+
All 961 harness assertions pass (`961/961`) — there are currently no known MCP- or Chrome-layer restrictions. Lighthouse now runs in headless (after the `lighthouse_audit` argument fix); the remaining soft assertions (perf traces, GC-dependent heap-growth) are promoted to counted hard assertions only in the weekly strict-soft lane (`harness-strict.yml`) via `ARGUS_HARNESS_STRICT_SOFT`.
|
|
347
347
|
|
|
348
348
|
---
|
|
349
349
|
|
|
@@ -362,8 +362,8 @@ src/
|
|
|
362
362
|
chrome-launcher.js — npm run chrome / argus-chrome — launches Chrome with correct flags
|
|
363
363
|
doctor.js — npm run doctor / argus-doctor — pre-flight checks
|
|
364
364
|
pr-validate.js — headless CI entry point for GitHub Actions
|
|
365
|
-
test-harness/ —
|
|
366
|
-
test/unit/ —
|
|
365
|
+
test-harness/ — 166-block correctness harness, 961 hard assertions, 63 fixture pages
|
|
366
|
+
test/unit/ — 366 Vitest unit tests (no Chrome required)
|
|
367
367
|
landing/ — Product landing page (React 19 + Vite + Tailwind)
|
|
368
368
|
```
|
|
369
369
|
|
|
@@ -374,7 +374,7 @@ Full source map → [CLAUDE.md](CLAUDE.md) · MCP/DSL reference → [SKILL.md](S
|
|
|
374
374
|
## Contributing
|
|
375
375
|
|
|
376
376
|
1. Fork the repo and create a branch
|
|
377
|
-
2. `npm run test:unit` — verify without Chrome (
|
|
377
|
+
2. `npm run test:unit` — verify without Chrome (366 tests)
|
|
378
378
|
3. `npm run test:harness` — full integration coverage (requires Chrome on port 9222)
|
|
379
379
|
4. Open a PR — Argus audits itself via the CI workflow
|
|
380
380
|
|
package/glama.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://glama.ai/mcp/schemas/server.json",
|
|
3
3
|
"name": "argus",
|
|
4
|
-
"description": "AI-powered QA harness that audits web apps via Chrome DevTools Protocol. Catches JS errors, network failures, a11y violations, SEO issues, security headers, CSS regressions, and more — directly from Claude conversations. 9 MCP tools: argus_audit (fast 8-analyzer pass), argus_audit_full (Lighthouse + memory + responsive), argus_compare (dev vs staging diff), argus_last_report (retrieve last JSON report), argus_watch_snapshot (live tab snapshot without navigating), argus_get_context (LLM-optimized context + fix loop with snapshot_id diff), argus_design_audit (Figma design fidelity — 13 finding types), argus_visual_diff (screenshot baseline comparison, updateBaseline flag), argus_pr_validate (PR diff → affected routes → targeted audit → blocked flag). Every finding is post-processed with intelligent baseline filtering (cross-run noise classifier) and root cause linking (recent git commits mapped to new findings).
|
|
4
|
+
"description": "AI-powered QA harness that audits web apps via Chrome DevTools Protocol. Catches JS errors, network failures, a11y violations, SEO issues, security headers, CSS regressions, and more — directly from Claude conversations. 9 MCP tools: argus_audit (fast 8-analyzer pass), argus_audit_full (Lighthouse + memory + responsive), argus_compare (dev vs staging diff), argus_last_report (retrieve last JSON report), argus_watch_snapshot (live tab snapshot without navigating), argus_get_context (LLM-optimized context + fix loop with snapshot_id diff), argus_design_audit (Figma design fidelity — 13 finding types), argus_visual_diff (screenshot baseline comparison, updateBaseline flag), argus_pr_validate (PR diff → affected routes → baseline-aware targeted audit + PR comment/Check Run → blocked flag). Every finding is post-processed with intelligent baseline filtering (cross-run noise classifier) and root cause linking (recent git commits mapped to new findings). 166 test blocks, 961 hard assertions, 67 detection categories.",
|
|
5
5
|
"maintainers": ["ironclawdevs27"],
|
|
6
6
|
"tools": [
|
|
7
7
|
{
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
},
|
|
39
39
|
{
|
|
40
40
|
"name": "argus_pr_validate",
|
|
41
|
-
"description": "Targeted QA audit driven by a GitHub pull request diff. Fetches the PR's changed files, maps them to affected routes in your target config using path-slug heuristics (infrastructure changes trigger a full audit), then audits only those routes. Returns { findings, affectedRoutes, changedFiles, perRoute, summary, blocked, blockOn }. Use in CI to gate merges — blocked:true when findings meet the blockOn threshold (none/warning/critical, default: critical). Requires Chrome on --remote-debugging-port=9222. GITHUB_TOKEN env var recommended for private repos."
|
|
41
|
+
"description": "Targeted QA audit driven by a GitHub pull request diff. Fetches the PR's changed files, maps them to affected routes in your target config using path-slug heuristics (infrastructure changes trigger a full audit) — or, when ARGUS_SOURCE_DIR points at the checked-out app source, framework-aware import-graph mapping that narrows a changed component or stylesheet to only the routes whose pages import it (Next.js convention + monorepo-aware, conservative-fallback on any ambiguity so a regression is never missed), then audits only those routes. The audit target is resolved per-PR: an explicit targetUrl, else the PR's deploy-preview URL (ARGUS_PREVIEW_URL or opt-in GitHub-Deployments auto-detection), else TARGET_DEV_URL. Routes are audited with bounded concurrency (ARGUS_CONCURRENCY) and each route audit is timeout-bounded so a hung audit blocks rather than silently passing. Returns { findings, affectedRoutes, changedFiles, perRoute, summary, blocked, blockOn, baseline, reporting }. Blocking is baseline-aware: it gates on the findings the PR introduces vs a stored per-branch baseline (reports/baselines/<base-branch>.json, restored via actions/cache), failing safe to absolute counts when no baseline is available. When GITHUB_TOKEN and a resolvable PR are present it also posts/updates an Argus PR comment (surfacing new/persisting/resolved counts) and a GitHub Check Run (the same reporting the CI Action produces) — best-effort, never alters the block decision. Use in CI to gate merges — blocked:true when findings meet the blockOn threshold (none/warning/critical, default: critical). Requires Chrome on --remote-debugging-port=9222. GITHUB_TOKEN env var recommended for private repos."
|
|
42
42
|
}
|
|
43
43
|
]
|
|
44
44
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "argusqa-os",
|
|
3
|
-
"version": "9.
|
|
3
|
+
"version": "9.8.0",
|
|
4
4
|
"mcpName": "io.github.ironclawdevs27/argus",
|
|
5
5
|
"description": "Argus — AI-powered automated dev-testing platform using Chrome DevTools MCP and Claude Code",
|
|
6
6
|
"keywords": [
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
"test": "npm run test:unit && npm run test:harness",
|
|
56
56
|
"coverage:unit": "vitest run test/unit --coverage",
|
|
57
57
|
"coverage:harness": "c8 npm run test:harness",
|
|
58
|
-
"coverage:gate": "node scripts/coverage-gate.mjs --min-lines
|
|
58
|
+
"coverage:gate": "node scripts/coverage-gate.mjs --min-lines 70 --allow-uncovered src/mcp-server.js,src/orchestration/env-comparison.js,src/server/index.js",
|
|
59
59
|
"test:coverage": "npm run coverage:harness && npm run coverage:unit && npm run coverage:gate",
|
|
60
60
|
"report:html": "node src/utils/html-reporter.js",
|
|
61
61
|
"report:pdf": "node src/utils/pdf-exporter.js",
|
|
@@ -64,9 +64,9 @@
|
|
|
64
64
|
"dependencies": {
|
|
65
65
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
66
66
|
"@opentelemetry/api": "^1.9.1",
|
|
67
|
-
"@opentelemetry/sdk-node": "^0.
|
|
68
|
-
"@slack/web-api": "^7.
|
|
69
|
-
"axe-core": "^4.12.
|
|
67
|
+
"@opentelemetry/sdk-node": "^0.219.0",
|
|
68
|
+
"@slack/web-api": "^7.17.0",
|
|
69
|
+
"axe-core": "^4.12.1",
|
|
70
70
|
"dotenv": "^17.4.2",
|
|
71
71
|
"express": "^5.2.1",
|
|
72
72
|
"pino": "^10.3.1",
|
|
@@ -77,7 +77,7 @@
|
|
|
77
77
|
},
|
|
78
78
|
"devDependencies": {
|
|
79
79
|
"@vitest/coverage-v8": "^4.1.8",
|
|
80
|
-
"c8": "^
|
|
80
|
+
"c8": "^11.0.0",
|
|
81
81
|
"fast-check": "^4.8.0",
|
|
82
82
|
"istanbul-lib-coverage": "^3.2.2",
|
|
83
83
|
"vitest": "^4.1.8"
|
package/src/cli/pr-validate.js
CHANGED
|
@@ -25,8 +25,16 @@ import fs from 'fs';
|
|
|
25
25
|
import path from 'path';
|
|
26
26
|
import { fileURLToPath } from 'url';
|
|
27
27
|
import { createMcpClient } from '../utils/mcp-client.js';
|
|
28
|
-
import {
|
|
29
|
-
import {
|
|
28
|
+
import { crawlRouteWithDepth } from '../orchestration/crawl-and-report.js';
|
|
29
|
+
import { resolveAuditDepth, selectAnalyzers } from '../utils/audit-depth.js';
|
|
30
|
+
import { auditRoutesConcurrently, auditRouteWithRetry, routeResilienceFromEnv } from '../utils/parallel-crawler.js';
|
|
31
|
+
import { fetchPrFiles, mapFilesToRoutesDeep, resolveAnnotationTarget } from '../utils/pr-diff-analyzer.js';
|
|
32
|
+
import { resolveTargetUrl } from '../utils/deploy-preview.js';
|
|
33
|
+
import { reportPrValidation } from '../utils/github-reporter.js';
|
|
34
|
+
import { getCurrentBranch } from '../utils/baseline-manager.js';
|
|
35
|
+
import {
|
|
36
|
+
decidePrBlock, resolvePrBaselineFile, loadPrBaseline, savePrBaseline, tagFindingNovelty, severityTally,
|
|
37
|
+
} from '../utils/pr-baseline.js';
|
|
30
38
|
|
|
31
39
|
// ── Exported helpers (testable without Chrome) ────────────────────────────────
|
|
32
40
|
|
|
@@ -41,10 +49,13 @@ import { fetchPrFiles, mapFilesToRoutes } from '../utils/pr-diff-analyzer.js';
|
|
|
41
49
|
* @param {Array<object>} opts.findings
|
|
42
50
|
* @param {string[]} opts.changedFiles
|
|
43
51
|
* @param {string} opts.blockOn critical | warning | none
|
|
52
|
+
* @param {object} [opts.baseline] baseline-aware diff status (Phase B1). When available:
|
|
53
|
+
* { available: true, newCritical, newWarning, newInfo, persisting, resolved }.
|
|
54
|
+
* When not: { available: false, note }. Omit entirely for non-baseline callers.
|
|
44
55
|
* @param {string} [opts.error] top-level error message (startup / fetch failure)
|
|
45
56
|
* @returns {string}
|
|
46
57
|
*/
|
|
47
|
-
export function buildStepSummary({ blocked, summary, affectedRoutes, perRoute, findings, changedFiles, blockOn, error }) {
|
|
58
|
+
export function buildStepSummary({ blocked, summary, affectedRoutes, perRoute, findings, changedFiles, blockOn, baseline, error }) {
|
|
48
59
|
const icon = blocked ? '🔴' : summary.critical + summary.warning === 0 ? '✅' : '⚠️';
|
|
49
60
|
const status = blocked ? 'BLOCKED — merge prevented' : 'PASSED';
|
|
50
61
|
|
|
@@ -62,6 +73,20 @@ export function buildStepSummary({ blocked, summary, affectedRoutes, perRoute, f
|
|
|
62
73
|
md += `| Routes audited | ${affectedRoutes.length} |\n`;
|
|
63
74
|
md += `| Files changed | ${changedFiles.length} |\n\n`;
|
|
64
75
|
|
|
76
|
+
// Baseline-aware blocking status (Phase B1). Either the head-vs-base diff counts, or the
|
|
77
|
+
// fail-safe note when no per-branch baseline was available to diff against.
|
|
78
|
+
if (baseline) {
|
|
79
|
+
if (baseline.available) {
|
|
80
|
+
md += `### Baseline diff\n\n`;
|
|
81
|
+
md += `Blocking on **new** findings this PR introduces (vs the base-branch baseline).\n\n`;
|
|
82
|
+
md += `| | 🔴 Critical | ⚠️ Warning | ℹ️ Info |\n|--|------------|-----------|--------|\n`;
|
|
83
|
+
md += `| New | ${baseline.newCritical} | ${baseline.newWarning} | ${baseline.newInfo} |\n\n`;
|
|
84
|
+
md += `_${baseline.persisting} persisting · ${baseline.resolved} resolved._\n\n`;
|
|
85
|
+
} else {
|
|
86
|
+
md += `> ⚠️ ${baseline.note ?? 'Baseline unavailable — blocking on absolute finding counts.'}\n\n`;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
65
90
|
if (perRoute.length > 0) {
|
|
66
91
|
md += `### Route Breakdown\n\n`;
|
|
67
92
|
md += `| Route | 🔴 Critical | ⚠️ Warning | ℹ️ Info |\n|-------|------------|-----------|--------|\n`;
|
|
@@ -152,6 +177,37 @@ export function normalizeRoutePaths(routes) {
|
|
|
152
177
|
});
|
|
153
178
|
}
|
|
154
179
|
|
|
180
|
+
/**
|
|
181
|
+
* All-routes-failed guard (safety-critical — must never false-PASS). True when at least one
|
|
182
|
+
* route was audited and EVERY audited route errored (the app was unreachable, or every audit
|
|
183
|
+
* timed out). The caller throws on this → exit 1 → merge blocked. A timed-out route audit is
|
|
184
|
+
* recorded as a route error (perRoute[i].error set, via auditRouteWithRetry, D4), so a hung
|
|
185
|
+
* audit feeds this guard and can never become a silent pass. A PARTIAL failure (some routes ok)
|
|
186
|
+
* does NOT trip the guard: those errors are surfaced in the summary/annotations and the normal
|
|
187
|
+
* finding-based block decision applies.
|
|
188
|
+
*
|
|
189
|
+
* @param {Array<{ error?: string }>} perRoute
|
|
190
|
+
* @returns {boolean}
|
|
191
|
+
*/
|
|
192
|
+
export function allRoutesFailed(perRoute) {
|
|
193
|
+
return Array.isArray(perRoute) && perRoute.length > 0 && perRoute.every(r => r && r.error);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* The CLI's audit-decision → process exit-code mapping (the CI merge gate). SAFETY-CRITICAL:
|
|
198
|
+
* exit 1 blocks the merge, exit 0 allows it. Returns 1 when the PR is blocked (findings at or
|
|
199
|
+
* above the block-on threshold — the baseline-aware decidePrBlock decision) OR the audit failed
|
|
200
|
+
* (every route errored / a startup/fetch error — both surface via the catch as `failed`); 0 only
|
|
201
|
+
* when a completed audit passed. Extracted as a pure exported fn (like allRoutesFailed) so the
|
|
202
|
+
* full block-decision → exit matrix is unit- + mutation-testable without driving Chrome.
|
|
203
|
+
*
|
|
204
|
+
* @param {{ blocked?: boolean, failed?: boolean }} [d]
|
|
205
|
+
* @returns {0 | 1}
|
|
206
|
+
*/
|
|
207
|
+
export function prExitCode({ blocked = false, failed = false } = {}) {
|
|
208
|
+
return (blocked || failed) ? 1 : 0;
|
|
209
|
+
}
|
|
210
|
+
|
|
155
211
|
// ── Route loader ──────────────────────────────────────────────────────────────
|
|
156
212
|
|
|
157
213
|
async function loadRoutes() {
|
|
@@ -204,7 +260,6 @@ if (process.argv[1] === _thisFile) {
|
|
|
204
260
|
|
|
205
261
|
async function main() {
|
|
206
262
|
const prUrl = process.env.ARGUS_PR_URL;
|
|
207
|
-
const targetUrl = process.env.TARGET_DEV_URL ?? 'http://localhost:3000';
|
|
208
263
|
const blockOn = (process.env.ARGUS_BLOCK_ON ?? 'critical').toLowerCase().trim();
|
|
209
264
|
const token = process.env.GITHUB_TOKEN;
|
|
210
265
|
|
|
@@ -218,16 +273,34 @@ async function main() {
|
|
|
218
273
|
process.exit(1);
|
|
219
274
|
}
|
|
220
275
|
|
|
276
|
+
// Resolve the audit target (D3 — deploy-preview auto-detection). Prefer a per-PR deploy
|
|
277
|
+
// preview over the static TARGET_DEV_URL, always degrading gracefully back to it. Default
|
|
278
|
+
// (no ARGUS_PREVIEW_URL / ARGUS_PREVIEW_DETECT) → TARGET_DEV_URL exactly, with NO extra
|
|
279
|
+
// network call — byte-identical to the prior behaviour. The head SHA (ARGUS_PR_HEAD_SHA,
|
|
280
|
+
// the PR head, not the runner's merge-commit GITHUB_SHA) keys the GitHub Deployments probe.
|
|
281
|
+
const headSha = process.env.ARGUS_PR_HEAD_SHA || process.env.GITHUB_SHA;
|
|
282
|
+
const { url: targetUrl, source: targetSource } = await resolveTargetUrl({
|
|
283
|
+
env: process.env, prUrl, headSha, token,
|
|
284
|
+
});
|
|
285
|
+
if (targetSource !== 'target-dev-url') {
|
|
286
|
+
console.log(`[argus] Audit target resolved via ${targetSource}: ${targetUrl}`);
|
|
287
|
+
}
|
|
288
|
+
|
|
221
289
|
let mcp;
|
|
222
290
|
const changedFiles = [];
|
|
223
291
|
const affectedRoutes = [];
|
|
224
292
|
const allFindings = [];
|
|
225
293
|
const perRoute = [];
|
|
294
|
+
const routeFindings = []; // [{ path, findings }] — feeds the baseline-aware diff (B1)
|
|
226
295
|
|
|
227
296
|
try {
|
|
228
297
|
// Step 1: Fetch the PR file list from GitHub
|
|
229
298
|
console.log(`[argus] Fetching PR diff: ${prUrl}`);
|
|
230
|
-
|
|
299
|
+
// prFiles carries { filename, status, patch } per file; `files` is the filename-only
|
|
300
|
+
// view used for slug mapping + the string[] output contract. prFiles.patch feeds the
|
|
301
|
+
// file:line annotations (Phase A3).
|
|
302
|
+
const prFiles = await fetchPrFiles(prUrl, token);
|
|
303
|
+
const files = prFiles.map(f => f.filename);
|
|
231
304
|
changedFiles.push(...files);
|
|
232
305
|
console.log(`[argus] ${files.length} changed file(s)`);
|
|
233
306
|
if (files.length >= 300) {
|
|
@@ -236,9 +309,14 @@ async function main() {
|
|
|
236
309
|
console.log('::warning::PR has 300+ changed files — Argus analyzed the first 300. Routes affected by later files may be missed.');
|
|
237
310
|
}
|
|
238
311
|
|
|
239
|
-
// Step 2: Map changed files to affected routes
|
|
312
|
+
// Step 2: Map changed files to affected routes. When ARGUS_SOURCE_DIR points at the
|
|
313
|
+
// checked-out app source, mapFilesToRoutesDeep (C1) maps a changed component to only the
|
|
314
|
+
// routes whose page files import it (static import graph + Next.js convention); without
|
|
315
|
+
// it — or on any resolution ambiguity — it falls back to the conservative slug heuristic
|
|
316
|
+
// (all routes on no-match), so a regression is never narrowed away. Opt-in: unset
|
|
317
|
+
// ARGUS_SOURCE_DIR keeps the prior slug-only behaviour exactly.
|
|
240
318
|
const routes = await loadRoutes();
|
|
241
|
-
const affected =
|
|
319
|
+
const affected = mapFilesToRoutesDeep(files, routes, { sourceDir: process.env.ARGUS_SOURCE_DIR });
|
|
242
320
|
affectedRoutes.push(...affected);
|
|
243
321
|
|
|
244
322
|
if (affected.length === 0) {
|
|
@@ -263,10 +341,34 @@ async function main() {
|
|
|
263
341
|
mcp = await createMcpClient();
|
|
264
342
|
console.log('[argus] Chrome connected.');
|
|
265
343
|
|
|
266
|
-
//
|
|
344
|
+
// INTENTIONAL DIVERGENCE from the MCP tool (src/mcp-server.js handlePrValidate): the CLI
|
|
345
|
+
// audits a routes-file (CI safety + speed); the MCP tool audits config/targets.js (dev
|
|
346
|
+
// convenience). That ROUTE-SOURCE divergence is by design (PR_VALIDATOR A4/E4). Everything
|
|
347
|
+
// downstream of the route list is SHARED so the two paths agree (E4 — CLI↔MCP parity):
|
|
348
|
+
// • AUDIT DEPTH does NOT diverge — both run crawlRouteCheap by default and share ONE opt-in
|
|
349
|
+
// depth policy (ARGUS_PR_AUDIT_DEPTH → selectAnalyzers, D2).
|
|
350
|
+
// • The BLOCK DECISION is the shared decidePrBlock, fed a summary built by the shared
|
|
351
|
+
// severityTally (Step 6 below) — for the same findings + baseline + blockOn the two paths
|
|
352
|
+
// reach the IDENTICAL `blocked`/reason (decidePrBlock owns the none|warning|critical matrix
|
|
353
|
+
// AND normalizes blockOn casing, so neither path re-implements or pre-normalizes the gate).
|
|
354
|
+
// • Both paths REPORT through the SAME shared helper — reportPrValidation (Step 9 below).
|
|
355
|
+
//
|
|
356
|
+
// Step 5: Audit each affected route via crawlRouteWithDepth (cheap pass + the selected
|
|
357
|
+
// expensive analyzers, if any — see Step 5 depth resolution below).
|
|
267
358
|
// Preserve path prefix (e.g. /project/ in GitHub Pages) — .origin would strip it
|
|
268
359
|
const baseUrl = targetUrl.replace(/\/$/, '');
|
|
269
360
|
|
|
361
|
+
// Selective analyzer depth (D2). The shared policy maps ARGUS_PR_AUDIT_DEPTH + the PR's
|
|
362
|
+
// changed file types to the expensive analyzers to also run on each affected route.
|
|
363
|
+
// Default 'cheap' → empty list → crawlRouteWithDepth returns the crawlRouteCheap result
|
|
364
|
+
// unchanged (byte-identical to before). Computed once per PR (selection is per-PR, not
|
|
365
|
+
// per-route — every route gets the same depth).
|
|
366
|
+
const auditDepth = resolveAuditDepth(process.env.ARGUS_PR_AUDIT_DEPTH);
|
|
367
|
+
const depthAnalyzers = selectAnalyzers({ depth: auditDepth, changedFiles: files });
|
|
368
|
+
if (depthAnalyzers.length > 0) {
|
|
369
|
+
console.log(`[argus] Audit depth: ${auditDepth} → also running expensive analyzers: ${depthAnalyzers.join(', ')}`);
|
|
370
|
+
}
|
|
371
|
+
|
|
270
372
|
// Normalize route paths — crawlRouteCheap builds URLs via string concat (baseUrl + route.path)
|
|
271
373
|
// so paths without a leading slash produce malformed URLs like https://example.comlogin
|
|
272
374
|
const normalizedAffected = normalizeRoutePaths(affected).map((r, i) => {
|
|
@@ -276,63 +378,161 @@ async function main() {
|
|
|
276
378
|
return r;
|
|
277
379
|
});
|
|
278
380
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
381
|
+
// Audit each affected route. Bounded-concurrency by ARGUS_CONCURRENCY (default 1 = sequential,
|
|
382
|
+
// byte-identical to the prior loop). Each parallel lane gets its OWN Chrome client —
|
|
383
|
+
// crawlRouteCheap mutates page-navigation state, so concurrent crawls must never share a
|
|
384
|
+
// connection — and auditRoutesConcurrently returns the per-route results in ROUTE order, so the
|
|
385
|
+
// aggregate findings + the baseline-aware block decision are identical to a sequential run (only
|
|
386
|
+
// wall-clock changes). Mirrors the orchestrator's parallel route crawling (D7.3).
|
|
387
|
+
const rawConcurrency = parseInt(process.env.ARGUS_CONCURRENCY ?? '1', 10);
|
|
388
|
+
const concurrency = Math.min(10, Math.max(1, Number.isNaN(rawConcurrency) ? 1 : rawConcurrency));
|
|
389
|
+
if (concurrency > 1) {
|
|
390
|
+
console.log(`[argus] Parallel mode: concurrency=${concurrency} over ${normalizedAffected.length} route(s)`);
|
|
391
|
+
}
|
|
282
392
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
393
|
+
// Per-route timeout + retry (D4). Each route audit is bounded by ARGUS_ROUTE_TIMEOUT_MS
|
|
394
|
+
// (default 120000 ms) and optionally retried ARGUS_ROUTE_RETRIES times. A timed-out audit
|
|
395
|
+
// throws → it is recorded as a route ERROR below (ok:false), feeding the all-routes-failed
|
|
396
|
+
// guard — a hung audit can never silently pass. The bound only ever BLOCKS, never PASSES.
|
|
397
|
+
const { timeoutMs: routeTimeoutMs, retries: routeRetries } = routeResilienceFromEnv();
|
|
398
|
+
if (routeTimeoutMs > 0 || routeRetries > 0) {
|
|
399
|
+
console.log(`[argus] Per-route audit: timeout ${routeTimeoutMs > 0 ? `${routeTimeoutMs}ms` : 'off'}, ${routeRetries} retr${routeRetries === 1 ? 'y' : 'ies'}`);
|
|
400
|
+
}
|
|
287
401
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
402
|
+
const routeResults = await auditRoutesConcurrently(normalizedAffected, {
|
|
403
|
+
concurrency,
|
|
404
|
+
primaryClient: mcp,
|
|
405
|
+
createClient: createMcpClient,
|
|
406
|
+
crawlRoute: async (route, client) => {
|
|
407
|
+
const url = `${baseUrl}${route.path}`;
|
|
408
|
+
console.log(`[argus] → Auditing ${url}`);
|
|
409
|
+
try {
|
|
410
|
+
const raw = await auditRouteWithRetry(
|
|
411
|
+
() => crawlRouteWithDepth(route, baseUrl, client, depthAnalyzers),
|
|
412
|
+
{
|
|
413
|
+
timeoutMs: routeTimeoutMs,
|
|
414
|
+
retries: routeRetries,
|
|
415
|
+
label: `Route audit ${route.path}`,
|
|
416
|
+
onRetry: (attempt, err) =>
|
|
417
|
+
console.log(`::warning::Route ${route.path} audit attempt ${attempt} failed (${String(err.message).replace(/\n/g, ' ')}) — retrying`),
|
|
418
|
+
},
|
|
419
|
+
);
|
|
420
|
+
return { route, ok: true, raw };
|
|
421
|
+
} catch (routeErr) {
|
|
422
|
+
return { route, ok: false, error: routeErr.message };
|
|
423
|
+
}
|
|
424
|
+
},
|
|
425
|
+
});
|
|
292
426
|
|
|
293
|
-
|
|
427
|
+
// Aggregate the ordered results sequentially — bookkeeping + annotations stay deterministic and
|
|
428
|
+
// identical to the prior sequential loop (results are in route order regardless of completion).
|
|
429
|
+
for (const item of routeResults) {
|
|
430
|
+
const route = item.route;
|
|
431
|
+
const url = `${baseUrl}${route.path}`;
|
|
294
432
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
console.log(`::warning::${String(f.message ?? '').replace(/\n/g, ' ')} [${f.type}] on ${url}`);
|
|
301
|
-
}
|
|
433
|
+
if (!item.ok) {
|
|
434
|
+
console.error(`::warning::Audit failed for ${url}: ${item.error}`);
|
|
435
|
+
perRoute.push({ route: route.path, critical: 0, warning: 0, info: 0, error: item.error });
|
|
436
|
+
continue;
|
|
437
|
+
}
|
|
302
438
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
439
|
+
const findings = Array.isArray(item.raw.errors) ? item.raw.errors : [];
|
|
440
|
+
allFindings.push(...findings);
|
|
441
|
+
routeFindings.push({ path: route.path, findings });
|
|
442
|
+
|
|
443
|
+
const critical = findings.filter(f => f.severity === 'critical').length;
|
|
444
|
+
const warning = findings.filter(f => f.severity === 'warning').length;
|
|
445
|
+
const info = findings.filter(f => f.severity === 'info').length;
|
|
446
|
+
perRoute.push({ route: route.path, critical, warning, info });
|
|
447
|
+
|
|
448
|
+
console.log(`[argus] ${url}: ${critical} critical, ${warning} warning, ${info} info`);
|
|
449
|
+
|
|
450
|
+
// Emit inline GitHub Actions annotations for visible CI feedback (Phase A3).
|
|
451
|
+
// When a changed file SPECIFICALLY maps to this route and has a real added line in
|
|
452
|
+
// its patch, the annotation is anchored at file=path,line=N so it renders inline on
|
|
453
|
+
// the PR "Files changed" tab; otherwise it stays a route-level annotation. The line
|
|
454
|
+
// is never fabricated — resolveAnnotationTarget returns null unless the line is a
|
|
455
|
+
// genuine diff line (see pr-diff-analyzer.js).
|
|
456
|
+
const annTarget = resolveAnnotationTarget(route.path, prFiles);
|
|
457
|
+
const loc = annTarget ? ` file=${annTarget.path},line=${annTarget.line}` : '';
|
|
458
|
+
for (const f of findings.filter(g => g.severity === 'critical')) {
|
|
459
|
+
console.log(`::error${loc}::${String(f.message ?? '').replace(/\n/g, ' ')} [${f.type}] on ${url}`);
|
|
460
|
+
}
|
|
461
|
+
for (const f of findings.filter(g => g.severity === 'warning')) {
|
|
462
|
+
console.log(`::warning${loc}::${String(f.message ?? '').replace(/\n/g, ' ')} [${f.type}] on ${url}`);
|
|
306
463
|
}
|
|
307
464
|
}
|
|
308
465
|
|
|
309
|
-
// Guard: if
|
|
310
|
-
//
|
|
311
|
-
//
|
|
312
|
-
|
|
313
|
-
if (
|
|
466
|
+
// Guard: if EVERY audited route errored, the app was unreachable after the preflight check
|
|
467
|
+
// (e.g. the app died between check and crawl) or every audit timed out (D4). Throwing here
|
|
468
|
+
// exits 1, which correctly blocks the merge — a hung/unreachable app never false-passes. The
|
|
469
|
+
// decision lives in the exported allRoutesFailed() so it is unit- + mutation-testable.
|
|
470
|
+
if (allRoutesFailed(perRoute)) {
|
|
314
471
|
throw new Error(
|
|
315
|
-
`All ${perRoute.length} route audit(s) failed — Chrome could not reach the app. ` +
|
|
472
|
+
`All ${perRoute.length} route audit(s) failed — Chrome could not reach the app or every audit timed out. ` +
|
|
316
473
|
`Ensure TARGET_DEV_URL is accessible throughout the job. ` +
|
|
317
474
|
`First error: ${perRoute[0].error}`,
|
|
318
475
|
);
|
|
319
476
|
}
|
|
320
477
|
|
|
321
|
-
// Step 6: Compute aggregate summary
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
478
|
+
// Step 6: Compute the aggregate (absolute) severity summary that feeds the block decision.
|
|
479
|
+
// Built via the shared severityTally so the CLI and the MCP tool (handlePrValidate) construct
|
|
480
|
+
// the decidePrBlock `summary` input IDENTICALLY — the two paths cannot diverge on the block
|
|
481
|
+
// decision for the same findings (PR_VALIDATOR plan E4 — CLI↔MCP parity).
|
|
482
|
+
const summary = severityTally(allFindings);
|
|
483
|
+
|
|
484
|
+
// Step 6a: Baseline-aware merge-block decision (Phase B1). Diff the PR-head findings
|
|
485
|
+
// against the stored base-branch baseline (restored via the actions/cache pattern, keyed
|
|
486
|
+
// on GITHUB_BASE_REF) and gate on the findings this PR INTRODUCES. Fail safe: when no
|
|
487
|
+
// baseline is resolvable the decision blocks on absolute counts (pre-B1 behaviour) and
|
|
488
|
+
// the step summary says so — it never silently passes a broken app. The block-on matrix
|
|
489
|
+
// (none|warning|critical) lives in the shared decidePrBlock, so the CLI and the MCP tool
|
|
490
|
+
// (handlePrValidate) cannot diverge on the block semantics.
|
|
491
|
+
const outputDir = process.env.REPORT_OUTPUT_DIR || './reports';
|
|
492
|
+
const baselineFile = resolvePrBaselineFile({ outputDir });
|
|
493
|
+
const baseline = baselineFile ? loadPrBaseline(baselineFile) : null;
|
|
494
|
+
const decision = decidePrBlock({ routeFindings, summary, blockOn, baseline });
|
|
495
|
+
const blocked = decision.blocked;
|
|
496
|
+
|
|
497
|
+
// B2: tag each finding new-vs-persisting off the same baseline, so the PR comment surfaces
|
|
498
|
+
// ONLY the findings this PR introduced (tags ride on the shared objects in result.findings).
|
|
499
|
+
tagFindingNovelty(routeFindings, baseline);
|
|
500
|
+
|
|
501
|
+
const baselineInfo = decision.baselineAvailable
|
|
502
|
+
? {
|
|
503
|
+
available: true,
|
|
504
|
+
newCritical: decision.newSummary.critical,
|
|
505
|
+
newWarning: decision.newSummary.warning,
|
|
506
|
+
newInfo: decision.newSummary.info,
|
|
507
|
+
persisting: decision.persistingCount,
|
|
508
|
+
resolved: decision.resolvedCount,
|
|
509
|
+
}
|
|
510
|
+
: { available: false, note: decision.note };
|
|
511
|
+
|
|
512
|
+
if (decision.baselineAvailable) {
|
|
513
|
+
console.log(`[argus] Baseline diff (${baselineFile}): ${baselineInfo.newCritical} critical / ${baselineInfo.newWarning} warning new, ${baselineInfo.persisting} persisting, ${baselineInfo.resolved} resolved`);
|
|
514
|
+
} else {
|
|
515
|
+
console.log(`[argus] ${decision.note}`);
|
|
516
|
+
}
|
|
327
517
|
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
518
|
+
// Step 6b: Optionally update this branch's baseline (ARGUS_UPDATE_BASELINE). A base-branch
|
|
519
|
+
// run uses this to populate the cache the PR runs diff against; default off → no write,
|
|
520
|
+
// no behaviour change.
|
|
521
|
+
if (/^(1|true|yes|on)$/i.test(process.env.ARGUS_UPDATE_BASELINE || '')) {
|
|
522
|
+
try {
|
|
523
|
+
const writeFile = resolvePrBaselineFile({ outputDir, baseRef: getCurrentBranch() });
|
|
524
|
+
if (writeFile) {
|
|
525
|
+
savePrBaseline(writeFile, routeFindings);
|
|
526
|
+
console.log(`[argus] Baseline updated: ${writeFile}`);
|
|
527
|
+
}
|
|
528
|
+
} catch (baseErr) {
|
|
529
|
+
console.error(`::warning::Argus baseline write failed: ${baseErr.message}`);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
332
532
|
|
|
333
533
|
// Step 7: Write GitHub Actions outputs and step summary
|
|
334
534
|
writeGithubOutputs({ blocked, summary, affectedRoutes: normalizedAffected });
|
|
335
|
-
writeStepSummary(buildStepSummary({ blocked, summary, affectedRoutes: normalizedAffected, perRoute, findings: allFindings, changedFiles: files, blockOn }));
|
|
535
|
+
writeStepSummary(buildStepSummary({ blocked, summary, affectedRoutes: normalizedAffected, perRoute, findings: allFindings, changedFiles: files, blockOn, baseline: baselineInfo }));
|
|
336
536
|
|
|
337
537
|
// Step 8: Emit JSON result to stdout for downstream pipeline steps
|
|
338
538
|
const result = {
|
|
@@ -344,26 +544,45 @@ async function main() {
|
|
|
344
544
|
summary,
|
|
345
545
|
blocked,
|
|
346
546
|
blockOn,
|
|
547
|
+
baseline: baselineInfo,
|
|
347
548
|
};
|
|
348
549
|
console.log(JSON.stringify(result, null, 2));
|
|
349
550
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
551
|
+
// Step 9: Post/update the Argus PR comment + Check Run (Phase A). Idempotent — updates
|
|
552
|
+
// the single marker-tagged comment in place; the Check Run conclusion maps to the block
|
|
553
|
+
// decision. Reporting is best-effort: a missing token or a GitHub API failure must never
|
|
554
|
+
// crash the run or change the merge decision (the exit code below is the real gate).
|
|
555
|
+
try {
|
|
556
|
+
const reported = await reportPrValidation(result, { prUrl });
|
|
557
|
+
if (reported.posted) {
|
|
558
|
+
console.log(`[argus] PR comment posted/updated.${reported.checked ? ' Check Run completed.' : ''}`);
|
|
559
|
+
} else {
|
|
560
|
+
console.log(`[argus] PR reporting skipped: ${reported.reason}`);
|
|
561
|
+
}
|
|
562
|
+
} catch (reportErr) {
|
|
563
|
+
console.error(`::warning::Argus PR reporting failed: ${reportErr.message}`);
|
|
356
564
|
}
|
|
357
565
|
|
|
358
|
-
|
|
359
|
-
|
|
566
|
+
// The block decision → exit code (the merge gate) goes through the single prExitCode
|
|
567
|
+
// mapping so the CLI and its tests can never disagree on what a decision exits with.
|
|
568
|
+
const exitCode = prExitCode({ blocked });
|
|
569
|
+
if (blocked) {
|
|
570
|
+
// decision.reason is non-null when blocked and reflects NEW counts when a baseline was
|
|
571
|
+
// available, ABSOLUTE counts otherwise (see decidePrBlock — "new" vs "total").
|
|
572
|
+
console.error(`::error::Argus PR Validator: ${decision.reason}. Merge blocked (block-on=${blockOn}).`);
|
|
573
|
+
} else {
|
|
574
|
+
console.log(`[argus] ✓ Audit passed — ${summary.critical} critical, ${summary.warning} warning, ${summary.info} info.`);
|
|
575
|
+
}
|
|
576
|
+
process.exit(exitCode);
|
|
360
577
|
|
|
361
578
|
} catch (err) {
|
|
362
579
|
const summary = { critical: 0, warning: 0, info: 0 };
|
|
363
580
|
console.error(`::error::Argus PR validation failed: ${err.message}`);
|
|
364
581
|
writeGithubOutputs({ blocked: false, summary, affectedRoutes: [] });
|
|
365
582
|
writeStepSummary(buildStepSummary({ blocked: false, summary, affectedRoutes: [], perRoute: [], findings: [], changedFiles, blockOn, error: err.message }));
|
|
366
|
-
|
|
583
|
+
// A failed audit (all routes errored, or a startup/fetch error) is conservative: exit 1 =
|
|
584
|
+
// merge blocked, never a false PASS. Routed through prExitCode for one exit-code source.
|
|
585
|
+
process.exit(prExitCode({ failed: true }));
|
|
367
586
|
|
|
368
587
|
} finally {
|
|
369
588
|
if (mcp) {
|