argusqa-os 9.7.4 → 9.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/glama.json +1 -1
- package/package.json +1 -1
- package/src/adapters/browser.js +59 -4
- package/src/orchestration/orchestrator.js +5 -46
- package/src/utils/mcp-client.js +7 -4
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/argusqa-os)
|
|
6
6
|
[](https://glama.ai/mcp/servers/ironclawdevs27/Argus)
|
|
7
|
-
[](test-harness/)
|
|
8
8
|
[](LICENSE)
|
|
9
9
|
|
|
10
10
|
**Argus catches the bugs your test suite misses — visual regressions, API loops, CSS drift, console noise, accessibility failures, and more — and delivers rich reports to Slack (or a local HTML dashboard).**
|
|
@@ -218,7 +218,7 @@ npm run report:pdf # Export HTML report to A4 PDF (requires: npm install pup
|
|
|
218
218
|
npm run server # Start Slack slash-command server (port 3001)
|
|
219
219
|
npm run init # Interactive setup wizard
|
|
220
220
|
npm run test:unit # 61 unit tests — no Chrome required
|
|
221
|
-
npm run test:harness #
|
|
221
|
+
npm run test:harness # 144-block correctness harness — requires Chrome
|
|
222
222
|
npm run test:harness:log # same, but tees full output to harness-results.txt
|
|
223
223
|
```
|
|
224
224
|
|
|
@@ -342,7 +342,7 @@ Argus is a **complementary layer**, not a replacement for unit or E2E tests:
|
|
|
342
342
|
|
|
343
343
|
## Known Limitations
|
|
344
344
|
|
|
345
|
-
All
|
|
345
|
+
All 738 harness assertions pass (`738/738`) — there are currently no known MCP- or Chrome-layer restrictions. Soft assertions (Lighthouse, performance traces) still require non-headless Chrome and are skipped in headless CI.
|
|
346
346
|
|
|
347
347
|
---
|
|
348
348
|
|
|
@@ -361,7 +361,7 @@ src/
|
|
|
361
361
|
chrome-launcher.js — npm run chrome / argus-chrome — launches Chrome with correct flags
|
|
362
362
|
doctor.js — npm run doctor / argus-doctor — pre-flight checks
|
|
363
363
|
pr-validate.js — headless CI entry point for GitHub Actions
|
|
364
|
-
test-harness/ —
|
|
364
|
+
test-harness/ — 144-block correctness harness, 738 hard assertions, 60 fixture pages
|
|
365
365
|
test/unit/ — 61 Vitest unit tests (no Chrome required)
|
|
366
366
|
landing/ — Product landing page (React 19 + Vite + Tailwind)
|
|
367
367
|
```
|
package/glama.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://glama.ai/mcp/schemas/server.json",
|
|
3
3
|
"name": "argus",
|
|
4
|
-
"description": "AI-powered QA harness that audits web apps via Chrome DevTools Protocol. Catches JS errors, network failures, a11y violations, SEO issues, security headers, CSS regressions, and more — directly from Claude conversations. 9 MCP tools: argus_audit (fast 8-analyzer pass), argus_audit_full (Lighthouse + memory + responsive), argus_compare (dev vs staging diff), argus_last_report (retrieve last JSON report), argus_watch_snapshot (live tab snapshot without navigating), argus_get_context (LLM-optimized context + fix loop with snapshot_id diff), argus_design_audit (Figma design fidelity — 13 finding types), argus_visual_diff (screenshot baseline comparison, updateBaseline flag), argus_pr_validate (PR diff → affected routes → targeted audit → blocked flag). Every finding is post-processed with intelligent baseline filtering (cross-run noise classifier) and root cause linking (recent git commits mapped to new findings).
|
|
4
|
+
"description": "AI-powered QA harness that audits web apps via Chrome DevTools Protocol. Catches JS errors, network failures, a11y violations, SEO issues, security headers, CSS regressions, and more — directly from Claude conversations. 9 MCP tools: argus_audit (fast 8-analyzer pass), argus_audit_full (Lighthouse + memory + responsive), argus_compare (dev vs staging diff), argus_last_report (retrieve last JSON report), argus_watch_snapshot (live tab snapshot without navigating), argus_get_context (LLM-optimized context + fix loop with snapshot_id diff), argus_design_audit (Figma design fidelity — 13 finding types), argus_visual_diff (screenshot baseline comparison, updateBaseline flag), argus_pr_validate (PR diff → affected routes → targeted audit → blocked flag). Every finding is post-processed with intelligent baseline filtering (cross-run noise classifier) and root cause linking (recent git commits mapped to new findings). 144 test blocks, 738 hard assertions, 67 detection categories.",
|
|
5
5
|
"maintainers": ["ironclawdevs27"],
|
|
6
6
|
"tools": [
|
|
7
7
|
{
|
package/package.json
CHANGED
package/src/adapters/browser.js
CHANGED
|
@@ -21,7 +21,24 @@ export class CdpBrowserAdapter {
|
|
|
21
21
|
constructor(mcp) { this._mcp = mcp; }
|
|
22
22
|
|
|
23
23
|
// ── Navigation ──────────────────────────────────────────────────────────────
|
|
24
|
-
|
|
24
|
+
// navigate_page reports failures as RESOLVED text ("Unable to navigate ...
|
|
25
|
+
// net::ERR_CONNECTION_REFUSED", "Could not connect to Chrome ..."), never as a
|
|
26
|
+
// thrown error. Unchecked, a dead target or dead browser produced a "clean"
|
|
27
|
+
// audit: analyzers ran against chrome-error://chromewebdata and emitted bogus
|
|
28
|
+
// findings (or none), and CI gates passed with Chrome down. Throw so failures
|
|
29
|
+
// propagate through the existing crawl error path.
|
|
30
|
+
navigate(url) {
|
|
31
|
+
return withRetry(async () => {
|
|
32
|
+
const resp = await this._mcp.navigate_page({ url });
|
|
33
|
+
if (typeof resp === 'string' &&
|
|
34
|
+
(resp.includes('Unable to navigate') ||
|
|
35
|
+
resp.includes('Could not connect to Chrome') ||
|
|
36
|
+
resp.includes('A dialog is open'))) {
|
|
37
|
+
throw new Error(`navigate(${url}) failed: ${resp.split('\n')[0].slice(0, 200)}`);
|
|
38
|
+
}
|
|
39
|
+
return resp;
|
|
40
|
+
}, { label: `navigate(${url})` });
|
|
41
|
+
}
|
|
25
42
|
|
|
26
43
|
// ── Evaluation & snapshots ──────────────────────────────────────────────────
|
|
27
44
|
evaluate(fn) { return this._mcp.evaluate_script({ function: fn }); }
|
|
@@ -41,14 +58,52 @@ export class CdpBrowserAdapter {
|
|
|
41
58
|
hover(uid) { return this._mcp.hover({ uid }); }
|
|
42
59
|
drag(src, tgt) { return this._mcp.drag({ from_uid: src, to_uid: tgt }); }
|
|
43
60
|
uploadFile(uid, filePath) { return this._mcp.upload_file({ uid, filePath }); }
|
|
44
|
-
|
|
45
|
-
|
|
61
|
+
// handle_dialog wire schema is { action: 'accept'|'dismiss', promptText? } — sending
|
|
62
|
+
// { accept: bool } is rejected by the tool's input validation (and the rejection comes
|
|
63
|
+
// back as a resolved error-text response, so the failure was silent in production).
|
|
64
|
+
handleDialog(accept, promptText = '') {
|
|
65
|
+
const args = { action: accept ? 'accept' : 'dismiss' };
|
|
66
|
+
if (promptText) args.promptText = promptText;
|
|
67
|
+
return this._mcp.handle_dialog(args);
|
|
68
|
+
}
|
|
69
|
+
// wait_for requires text as a non-empty string ARRAY. A bare string is rejected by
|
|
70
|
+
// input validation, and { state: 'networkidle' } is not part of the tool's schema at
|
|
71
|
+
// all — both shapes used to resolve to error text and silently wait for nothing.
|
|
72
|
+
waitFor(opts = {}) {
|
|
73
|
+
if (typeof opts.text === 'string') opts = { ...opts, text: [opts.text] };
|
|
74
|
+
if (opts.state === 'networkidle') return this.#waitForNetworkIdle();
|
|
75
|
+
return this._mcp.wait_for(opts);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Bounded network-quiet poll: resolves once the page's resource-timing entry count
|
|
79
|
+
// is stable across two consecutive 250 ms polls, or after 3 s — whichever is first.
|
|
80
|
+
async #waitForNetworkIdle() {
|
|
81
|
+
let prev = -1;
|
|
82
|
+
for (let i = 0; i < 12; i++) {
|
|
83
|
+
const raw = await this.evaluate(`() => performance.getEntriesByType('resource').length`);
|
|
84
|
+
const count = Number(typeof raw === 'object' ? raw?.result ?? 0 : raw) || 0;
|
|
85
|
+
if (count === prev) return;
|
|
86
|
+
prev = count;
|
|
87
|
+
await new Promise(r => setTimeout(r, 250));
|
|
88
|
+
}
|
|
89
|
+
}
|
|
46
90
|
|
|
47
91
|
// ── Viewport ────────────────────────────────────────────────────────────────
|
|
48
92
|
emulate(viewport) { return this._mcp.emulate({ viewport }); }
|
|
49
93
|
emulateCpu(rate) { return this._mcp.emulate({ cpuThrottlingRate: rate }); }
|
|
50
94
|
emulateColorScheme(scheme) { return this._mcp.emulate({ colorScheme: scheme }); }
|
|
51
|
-
|
|
95
|
+
// chrome-devtools-mcp@1.1.1's emulate tool has no reduced-motion capability — the
|
|
96
|
+
// unsupported argument comes back as RESOLVED error text ("Unknown argument"), not a
|
|
97
|
+
// thrown error, so callers' graceful-skip catch paths (motion-analyzer) never ran and
|
|
98
|
+
// analysis proceeded unemulated. Surface it as a real error; if a future upstream
|
|
99
|
+
// version adds the argument, the call succeeds and emulation lights up automatically.
|
|
100
|
+
async emulateReducedMotion(pref) {
|
|
101
|
+
const resp = await this._mcp.emulate({ reducedMotion: pref });
|
|
102
|
+
if (typeof resp === 'string' && resp.includes('Unknown argument')) {
|
|
103
|
+
throw new Error(`emulate does not support reducedMotion in this chrome-devtools-mcp version: ${resp.slice(0, 120)}`);
|
|
104
|
+
}
|
|
105
|
+
return resp;
|
|
106
|
+
}
|
|
52
107
|
resize(w, h) { return this._mcp.resize_page({ width: w, height: h }); }
|
|
53
108
|
|
|
54
109
|
// ── Network & performance ───────────────────────────────────────────────────
|
|
@@ -370,54 +370,13 @@ function analyzeNetworkPerformance(perfEntries, pageUrl) {
|
|
|
370
370
|
return bugs;
|
|
371
371
|
}
|
|
372
372
|
|
|
373
|
-
// ── Performance Budgets ────────────────────────────────────────────────────────
|
|
374
|
-
|
|
375
|
-
async function checkPerformanceBudgets(browser, url) {
|
|
376
|
-
const violations = [];
|
|
377
|
-
|
|
378
|
-
try {
|
|
379
|
-
await browser.startTrace();
|
|
380
|
-
await new Promise(r => setTimeout(r, 3000));
|
|
381
|
-
const trace = await browser.stopTrace();
|
|
382
|
-
const insights = await browser.analyzeInsight({ insightSetId: trace?.insightSetId ?? trace?.id ?? trace });
|
|
383
|
-
|
|
384
|
-
const metrics = insights?.metrics ?? insights?.performanceMetrics ?? {};
|
|
385
|
-
|
|
386
|
-
const checks = [
|
|
387
|
-
{ key: 'LCP', value: metrics.largestContentfulPaint ?? metrics.LCP, budget: thresholds.perf.LCP, unit: 'ms' },
|
|
388
|
-
{ key: 'CLS', value: metrics.cumulativeLayoutShift ?? metrics.CLS, budget: thresholds.perf.CLS, unit: '' },
|
|
389
|
-
{ key: 'FID', value: metrics.totalBlockingTime ?? metrics.TBT ?? metrics.FID, budget: thresholds.perf.FID, unit: 'ms' },
|
|
390
|
-
{ key: 'TTFB', value: metrics.timeToFirstByte ?? metrics.TTFB, budget: thresholds.perf.TTFB, unit: 'ms' },
|
|
391
|
-
];
|
|
392
|
-
|
|
393
|
-
for (const { key, value, budget, unit } of checks) {
|
|
394
|
-
if (value == null) continue;
|
|
395
|
-
if (value > budget) {
|
|
396
|
-
violations.push({
|
|
397
|
-
type: 'performance_budget',
|
|
398
|
-
metric: key,
|
|
399
|
-
value: `${value}${unit}`,
|
|
400
|
-
budget: `${budget}${unit}`,
|
|
401
|
-
message: `Performance budget exceeded: ${key} = ${value}${unit} (budget: ${budget}${unit})`,
|
|
402
|
-
severity: 'warning',
|
|
403
|
-
url,
|
|
404
|
-
});
|
|
405
|
-
}
|
|
406
|
-
}
|
|
407
|
-
} catch (err) {
|
|
408
|
-
logger.warn(`[ARGUS] Performance trace skipped for ${url}: ${err.message}`);
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
return violations;
|
|
412
|
-
}
|
|
413
|
-
|
|
414
373
|
// ── Cheap Crawl (called ×2 for flakiness detection) ───────────────────────────
|
|
415
374
|
|
|
416
375
|
/**
|
|
417
376
|
* Cheap detections for one route.
|
|
418
377
|
* Runs: console, network, JS errors, blank page, API frequency, contracts,
|
|
419
378
|
* SEO, security, content, CSS, debugger statements, duplicate ids, screenshot.
|
|
420
|
-
* Does NOT run: Lighthouse,
|
|
379
|
+
* Does NOT run: Lighthouse, network perf, redirect chain, broken links, cache headers.
|
|
421
380
|
*/
|
|
422
381
|
export async function crawlRouteCheap(route, baseUrl, mcp) {
|
|
423
382
|
const browser = new CdpBrowserAdapter(mcp);
|
|
@@ -757,8 +716,11 @@ export async function crawlRouteCheap(route, baseUrl, mcp) {
|
|
|
757
716
|
|
|
758
717
|
/**
|
|
759
718
|
* Expensive/deterministic analyzers for one route — called ONCE per route.
|
|
760
|
-
* Runs: network perf, redirect chain,
|
|
719
|
+
* Runs: network perf, redirect chain, Lighthouse,
|
|
761
720
|
* broken internal links, cache headers.
|
|
721
|
+
* (Core Web Vitals — LCP/CLS/TTFB — are emitted by the web-vitals-analyzer
|
|
722
|
+
* registerExpensive plugin, which is headless-compatible; the old trace-based
|
|
723
|
+
* perf-budget path was removed as dead + redundant.)
|
|
762
724
|
*/
|
|
763
725
|
export async function crawlRouteExpensive(route, baseUrl, mcp) {
|
|
764
726
|
const browser = new CdpBrowserAdapter(mcp);
|
|
@@ -805,9 +767,6 @@ export async function crawlRouteExpensive(route, baseUrl, mcp) {
|
|
|
805
767
|
logger.warn(`[ARGUS] Redirect chain check skipped for ${url}: ${err.message}`);
|
|
806
768
|
}
|
|
807
769
|
|
|
808
|
-
// Performance budget check
|
|
809
|
-
errors.push(...(await checkPerformanceBudgets(browser, url)));
|
|
810
|
-
|
|
811
770
|
// Full Lighthouse audit (capped at LIGHTHOUSE_TIMEOUT_MS to prevent indefinite hang)
|
|
812
771
|
errors.push(...(await Promise.race([
|
|
813
772
|
checkLighthouse(browser, url),
|
package/src/utils/mcp-client.js
CHANGED
|
@@ -181,11 +181,14 @@ export async function createMcpClient() {
|
|
|
181
181
|
// MCP returns { content: [{ type, text|data }] } — extract the value
|
|
182
182
|
const content = result?.content;
|
|
183
183
|
if (Array.isArray(content) && content.length > 0) {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
184
|
+
// take_screenshot returns [text caption, image] — the image is NOT content[0],
|
|
185
|
+
// so scan the whole array for it. Reading only content[0] returned the caption
|
|
186
|
+
// string and starved every screenshot consumer of image data.
|
|
187
|
+
const img = content.find(c => c.type === 'image');
|
|
188
|
+
if (img) {
|
|
189
|
+
return { data: img.data, mimeType: img.mimeType ?? 'image/png' };
|
|
188
190
|
}
|
|
191
|
+
const item = content[0];
|
|
189
192
|
if (item.type === 'text') {
|
|
190
193
|
const text = item.text;
|
|
191
194
|
// chrome-devtools-mcp wraps evaluate_script results in a markdown code block:
|