argusqa-os 9.3.1 โ 9.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/glama.json +29 -1
- package/package.json +1 -1
- package/src/adapters/browser.js +4 -0
- package/src/mcp-server.js +57 -14
package/README.md
CHANGED
|
@@ -79,7 +79,7 @@ The `landing/` directory contains the product landing page (React + Vite + Tailw
|
|
|
79
79
|
|
|
80
80
|
| ๐ด Critical / ๐ก Warning / ๐ต Info | โ๏ธ | ๐งช | ๐ |
|
|
81
81
|
| :---: | :---: | :---: | :---: |
|
|
82
|
-
| **114 distinct issue types detected** | **24 analysis engines** | **
|
|
82
|
+
| **114 distinct issue types detected** | **24 analysis engines** | **367 test assertions** | **84 test blocks** |
|
|
83
83
|
|
|
84
84
|
</div>
|
|
85
85
|
|
|
@@ -943,7 +943,7 @@ argus/
|
|
|
943
943
|
โ โโโ README.md # Setup guide, Supabase SQL schema, env vars, deployment
|
|
944
944
|
โโโ scripts/
|
|
945
945
|
โ โโโ dispatch-report.js # Standalone Slack re-dispatch script (re-posts last report.json to Slack)
|
|
946
|
-
โโโ test-harness/ # Fixture server + test runner (
|
|
946
|
+
โโโ test-harness/ # Fixture server + test runner (84 blocks, 367 hard assertions, 54 fixture pages)
|
|
947
947
|
โ โโโ README.md
|
|
948
948
|
โ โโโ server.js # Express fixture server (ports 3100 dev / 3101 staging)
|
|
949
949
|
โ โโโ harness-config.js # Route definitions + expected findings
|
|
@@ -988,7 +988,7 @@ argus/
|
|
|
988
988
|
|
|
989
989
|
## Known MCP Tool Limitations
|
|
990
990
|
|
|
991
|
-
The Chrome DevTools MCP behavioral constraints below cause **3 permanent test failures** in the harness (`
|
|
991
|
+
The Chrome DevTools MCP behavioral constraints below cause **3 permanent test failures** in the harness (`364/367` pass). These are MCP-layer restrictions โ they cannot be fixed in Argus code. `validate.js` now exits with code 0 when only these 3 failures remain, making the CI harness gate reliable.
|
|
992
992
|
|
|
993
993
|
> **`type_text` clarification**: `type_text` does fire DOM `input` events when the element is properly focused first with `mcp.click({ uid })`. Always use uid-based focus โ passing `{ selector }` to `mcp.click` silently does nothing.
|
|
994
994
|
|
package/glama.json
CHANGED
|
@@ -1,4 +1,32 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://glama.ai/mcp/schemas/server.json",
|
|
3
|
-
"
|
|
3
|
+
"name": "argus",
|
|
4
|
+
"description": "AI-powered QA harness that audits web apps via Chrome DevTools Protocol. Catches JS errors, network failures, a11y violations, SEO issues, security headers, CSS regressions, and more โ directly from Claude conversations. 6 MCP tools: argus_audit (fast 8-analyzer pass), argus_audit_full (Lighthouse + memory + responsive), argus_compare (dev vs staging diff), argus_last_report (retrieve last JSON report), argus_watch_snapshot (live tab snapshot without navigating), argus_get_context (LLM-optimized context + fix loop with snapshot_id diff). 84 test blocks, 367 hard assertions, 54 detection categories.",
|
|
5
|
+
"maintainers": ["ironclawdevs27"],
|
|
6
|
+
"tools": [
|
|
7
|
+
{
|
|
8
|
+
"name": "argus_audit",
|
|
9
|
+
"description": "Fast QA audit โ JS errors, network failures (4xx/5xx), API frequency loops, CSS cascade issues, SEO violations, security headers, accessibility, and content. Returns { findings, summary }. Supports cache: true to skip re-crawl on repeat calls."
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"name": "argus_audit_full",
|
|
13
|
+
"description": "Deep QA audit โ extends argus_audit with Lighthouse performance/accessibility scoring, responsive layout checks at 4 viewports, memory leak detection via heap snapshot, and accessibility tree analysis."
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"name": "argus_compare",
|
|
17
|
+
"description": "Diffs dev vs staging environments side-by-side. Captures screenshots, runs all analyzers on each, and surfaces regressions โ findings present in staging but not dev, or with changed severity."
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"name": "argus_last_report",
|
|
21
|
+
"description": "Returns the most recent Argus JSON report from the reports/ directory without re-running a scan."
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "argus_watch_snapshot",
|
|
25
|
+
"description": "Snapshots the currently open Chrome tab without navigating โ captures console errors, network failures, CORS blocks, and auth failures in one poll. Accepts optional tabId to inspect a specific tab."
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"name": "argus_get_context",
|
|
29
|
+
"description": "LLM-optimized diagnostic context for the open Chrome tab. Returns snapshot_id for fix-loop diffing: pass it back on the next call to get resolved/new_issues/persisting arrays. Accepts optional tabId for multi-tab workflows."
|
|
30
|
+
}
|
|
31
|
+
]
|
|
4
32
|
}
|
package/package.json
CHANGED
package/src/adapters/browser.js
CHANGED
|
@@ -56,6 +56,10 @@ export class CdpBrowserAdapter {
|
|
|
56
56
|
stopTrace() { return this._mcp.performance_stop_trace({}); }
|
|
57
57
|
analyzeInsight(opts) { return this._mcp.performance_analyze_insight(opts); }
|
|
58
58
|
|
|
59
|
+
// โโ Tab management โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
60
|
+
listPages() { return this._mcp.list_pages({}); }
|
|
61
|
+
selectPage(tabId) { return this._mcp.select_page({ pageId: tabId }); }
|
|
62
|
+
|
|
59
63
|
// โโ Lifecycle โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
60
64
|
close() { return this._mcp.close(); }
|
|
61
65
|
|
package/src/mcp-server.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* Argus MCP Server (v9.
|
|
3
|
+
* Argus MCP Server (v9.4.1)
|
|
4
4
|
*
|
|
5
5
|
* Exposes Argus as an MCP server so Claude (or any MCP client) can call
|
|
6
6
|
* argus_audit, argus_audit_full, argus_compare, argus_last_report, and
|
|
@@ -38,6 +38,10 @@ const REPORTS_DIR = path.resolve(process.cwd(), 'reports');
|
|
|
38
38
|
const snapshotStore = new Map();
|
|
39
39
|
const MAX_SNAPSHOTS = 20;
|
|
40
40
|
|
|
41
|
+
// Audit cache: stores argus_audit results keyed by URL so cache:true skips re-crawl.
|
|
42
|
+
const auditCache = new Map();
|
|
43
|
+
const MAX_AUDIT_CACHE = 20;
|
|
44
|
+
|
|
41
45
|
function storeSnapshot(id, findings) {
|
|
42
46
|
snapshotStore.set(id, findings);
|
|
43
47
|
if (snapshotStore.size > MAX_SNAPSHOTS) {
|
|
@@ -45,17 +49,25 @@ function storeSnapshot(id, findings) {
|
|
|
45
49
|
}
|
|
46
50
|
}
|
|
47
51
|
|
|
52
|
+
function cacheAudit(url, result) {
|
|
53
|
+
auditCache.set(url, { result, ts: Date.now() });
|
|
54
|
+
if (auditCache.size > MAX_AUDIT_CACHE) {
|
|
55
|
+
auditCache.delete(auditCache.keys().next().value);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
48
59
|
// โโ Tool definitions โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
49
60
|
|
|
50
61
|
const TOOLS = [
|
|
51
62
|
{
|
|
52
63
|
name: 'argus_audit',
|
|
53
|
-
description: 'Fast QA audit on a URL via Chrome DevTools Protocol. Runs 8 analyzers in one pass: JS errors, unhandled rejections, network failures (4xx/5xx), API frequency loops, CSS cascade issues, SEO violations, security header checks, and accessibility. Returns { findings: [{severity, type, message, url}], summary: {critical, warning, info} }. Use for CI smoke tests and pre-deploy gates. For Lighthouse scoring and memory leak detection, use argus_audit_full. Requires Chrome running with --remote-debugging-port=9222.',
|
|
64
|
+
description: 'Fast QA audit on a URL via Chrome DevTools Protocol. Runs 8 analyzers in one pass: JS errors, unhandled rejections, network failures (4xx/5xx), API frequency loops, CSS cascade issues, SEO violations, security header checks, and accessibility. Returns { findings: [{severity, type, message, url}], summary: {critical, warning, info} }. Use for CI smoke tests and pre-deploy gates. Pass cache: true to skip re-crawl on repeat calls to the same URL within a session โ useful in tight fix loops. For Lighthouse scoring and memory leak detection, use argus_audit_full. Requires Chrome running with --remote-debugging-port=9222.',
|
|
54
65
|
inputSchema: {
|
|
55
66
|
type: 'object',
|
|
56
67
|
properties: {
|
|
57
68
|
url: { type: 'string', description: 'Full URL to audit, including protocol and path (e.g. http://localhost:3000/checkout). Must be reachable by the running Chrome instance.' },
|
|
58
69
|
critical: { type: 'boolean', description: 'When true, console.error calls are escalated to critical severity. Set true for business-critical routes (login, checkout, dashboard) where any error is a blocker.', default: false },
|
|
70
|
+
cache: { type: 'boolean', description: 'When true, returns the cached result for this URL if one exists (from a previous argus_audit call in this session) without re-crawling. Use in fix loops to cheaply re-read the last audit while iterating on a fix. Cache is per-session, max 20 entries, LRU eviction.', default: false },
|
|
59
71
|
},
|
|
60
72
|
required: ['url'],
|
|
61
73
|
},
|
|
@@ -84,22 +96,24 @@ const TOOLS = [
|
|
|
84
96
|
},
|
|
85
97
|
{
|
|
86
98
|
name: 'argus_watch_snapshot',
|
|
87
|
-
description: 'Snapshots the currently open Chrome tab without navigating โ captures console errors, network failures (4xx/5xx), CORS blocks, and auth failures in one poll. Returns { findings: [{severity, type, message, url}], newConsole, newNetwork }. Use during active development to inspect what is happening on the current page without running a full audit. Requires Chrome on --remote-debugging-port=9222 with a page already open.',
|
|
99
|
+
description: 'Snapshots the currently open Chrome tab without navigating โ captures console errors, network failures (4xx/5xx), CORS blocks, and auth failures in one poll. Returns { findings: [{severity, type, message, url}], newConsole, newNetwork }. Use during active development to inspect what is happening on the current page without running a full audit. Pass tabId to inspect a specific tab (get IDs from argus_get_context or list_pages). Without tabId, reads the active tab. Requires Chrome on --remote-debugging-port=9222 with a page already open.',
|
|
88
100
|
inputSchema: {
|
|
89
101
|
type: 'object',
|
|
90
102
|
properties: {
|
|
91
|
-
url:
|
|
103
|
+
url: { type: 'string', description: 'Optional base URL to attribute findings to (default: TARGET_DEV_URL env var). Does not navigate โ reads the currently open Chrome tab.' },
|
|
104
|
+
tabId: { type: 'string', description: 'Optional Chrome page/tab ID (e.g. from a prior argus_get_context response). When provided, switches focus to that tab before snapshotting โ useful for SPAs that spawn new windows or multi-tab flows.' },
|
|
92
105
|
},
|
|
93
106
|
},
|
|
94
107
|
},
|
|
95
108
|
{
|
|
96
109
|
name: 'argus_get_context',
|
|
97
|
-
description: 'Captures everything currently broken on the open Chrome tab and formats it as a diagnostic context for Claude to read and suggest fixes. Does NOT navigate โ reads the live tab state after user interactions, in authenticated sessions, or mid-flow. Returns { snapshot_id, summary, url, timestamp, critical_issues, warnings, js_errors, network_failures, console_errors, recent_requests }. Fix loop: pass the snapshot_id from a previous call as snapshot_id to get a diff โ the response will include resolved (cleared since last snapshot), new_issues (appeared since last snapshot), and persisting (unchanged). Workflow: call argus_get_context โ Claude suggests fix โ apply fix โ call argus_get_context with snapshot_id โ verify resolved array is non-empty. Requires Chrome on --remote-debugging-port=9222.',
|
|
110
|
+
description: 'Captures everything currently broken on the open Chrome tab and formats it as a diagnostic context for Claude to read and suggest fixes. Does NOT navigate โ reads the live tab state after user interactions, in authenticated sessions, or mid-flow. Returns { snapshot_id, summary, url, timestamp, critical_issues, warnings, js_errors, network_failures, console_errors, recent_requests, open_tabs }. Fix loop: pass the snapshot_id from a previous call as snapshot_id to get a diff โ the response will include resolved (cleared since last snapshot), new_issues (appeared since last snapshot), and persisting (unchanged). Multi-tab: pass tabId to inspect a specific tab, or omit to read the active tab. The open_tabs array always lists all currently open Chrome tabs. Workflow: call argus_get_context โ Claude suggests fix โ apply fix โ call argus_get_context with snapshot_id โ verify resolved array is non-empty. Requires Chrome on --remote-debugging-port=9222.',
|
|
98
111
|
inputSchema: {
|
|
99
112
|
type: 'object',
|
|
100
113
|
properties: {
|
|
101
|
-
url:
|
|
114
|
+
url: { type: 'string', description: 'Optional base URL to attribute findings to (default: TARGET_DEV_URL env var). Does not navigate โ inspects the currently open Chrome tab.' },
|
|
102
115
|
snapshot_id: { type: 'string', description: 'Optional snapshot_id from a previous argus_get_context call. When provided, the response includes resolved/new_issues/persisting arrays showing what changed since that snapshot.' },
|
|
116
|
+
tabId: { type: 'string', description: 'Optional Chrome page/tab ID. When provided, switches focus to that specific tab before capturing context โ useful for SPAs that spawn new windows (e.g. OAuth popups, checkout flows). Get tab IDs from the open_tabs array in a prior argus_get_context response.' },
|
|
103
117
|
},
|
|
104
118
|
},
|
|
105
119
|
},
|
|
@@ -118,12 +132,29 @@ async function withMcp(fn) {
|
|
|
118
132
|
|
|
119
133
|
// โโ Tool handlers โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
120
134
|
|
|
121
|
-
async function handleAudit({ url, critical = false }) {
|
|
135
|
+
async function handleAudit({ url, critical = false, cache = false }) {
|
|
136
|
+
if (cache && auditCache.has(url)) {
|
|
137
|
+
const { result, ts } = auditCache.get(url);
|
|
138
|
+
return { content: [{ type: 'text', text: JSON.stringify({ ...result, _cached: true, _cachedAt: new Date(ts).toISOString() }, null, 2) }] };
|
|
139
|
+
}
|
|
122
140
|
return withMcp(async (mcp) => {
|
|
123
|
-
const parsed
|
|
124
|
-
const route
|
|
125
|
-
const
|
|
126
|
-
|
|
141
|
+
const parsed = new URL(url);
|
|
142
|
+
const route = { path: parsed.pathname + parsed.search + parsed.hash, name: 'audit', critical };
|
|
143
|
+
const raw = await crawlRouteCheap(route, parsed.origin, mcp);
|
|
144
|
+
const findings = Array.isArray(raw.errors) ? raw.errors : [];
|
|
145
|
+
const result = {
|
|
146
|
+
findings,
|
|
147
|
+
summary: {
|
|
148
|
+
critical: findings.filter(f => f.severity === 'critical').length,
|
|
149
|
+
warning: findings.filter(f => f.severity === 'warning').length,
|
|
150
|
+
info: findings.filter(f => f.severity === 'info').length,
|
|
151
|
+
},
|
|
152
|
+
url: raw.url,
|
|
153
|
+
pageTitle: raw.pageTitle,
|
|
154
|
+
screenshot: raw.screenshot,
|
|
155
|
+
};
|
|
156
|
+
if (cache) cacheAudit(url, result);
|
|
157
|
+
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
127
158
|
});
|
|
128
159
|
}
|
|
129
160
|
|
|
@@ -146,9 +177,10 @@ async function handleCompare() {
|
|
|
146
177
|
});
|
|
147
178
|
}
|
|
148
179
|
|
|
149
|
-
async function handleWatchSnapshot({ url } = {}) {
|
|
180
|
+
async function handleWatchSnapshot({ url, tabId } = {}) {
|
|
150
181
|
return withMcp(async (mcp) => {
|
|
151
182
|
const browser = new CdpBrowserAdapter(mcp);
|
|
183
|
+
if (tabId) await browser.selectPage(tabId);
|
|
152
184
|
const baseUrl = url ?? process.env.TARGET_DEV_URL ?? 'http://localhost:3000';
|
|
153
185
|
const session = new WatchSession(browser, baseUrl);
|
|
154
186
|
const result = await session.poll();
|
|
@@ -156,13 +188,23 @@ async function handleWatchSnapshot({ url } = {}) {
|
|
|
156
188
|
});
|
|
157
189
|
}
|
|
158
190
|
|
|
159
|
-
async function handleGetContext({ url, snapshot_id: prevId } = {}) {
|
|
191
|
+
async function handleGetContext({ url, snapshot_id: prevId, tabId } = {}) {
|
|
160
192
|
return withMcp(async (mcp) => {
|
|
161
193
|
const browser = new CdpBrowserAdapter(mcp);
|
|
194
|
+
if (tabId) await browser.selectPage(tabId);
|
|
162
195
|
const baseUrl = url ?? process.env.TARGET_DEV_URL ?? 'http://localhost:3000';
|
|
163
196
|
const session = new WatchSession(browser, baseUrl);
|
|
164
197
|
const { findings, newConsole, newNetwork } = await session.poll();
|
|
165
198
|
|
|
199
|
+
// List all open tabs so the caller can target a specific tab on the next call.
|
|
200
|
+
let open_tabs = [];
|
|
201
|
+
try {
|
|
202
|
+
const pages = await browser.listPages();
|
|
203
|
+
if (Array.isArray(pages)) {
|
|
204
|
+
open_tabs = pages.map(p => ({ id: p.id ?? p.pageId, url: p.url, title: p.title }));
|
|
205
|
+
}
|
|
206
|
+
} catch { /* list_pages not available in all Chrome configs โ degrade gracefully */ }
|
|
207
|
+
|
|
166
208
|
const newId = Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
|
|
167
209
|
storeSnapshot(newId, findings);
|
|
168
210
|
|
|
@@ -215,6 +257,7 @@ async function handleGetContext({ url, snapshot_id: prevId } = {}) {
|
|
|
215
257
|
network_failures: findings.filter(f => f.type === 'network-error' || f.type === 'cors-error' || f.type === 'auth-error'),
|
|
216
258
|
console_errors: newConsole.filter(m => m.level === 'error' || m.level === 'warning'),
|
|
217
259
|
recent_requests: newNetwork.slice(-20),
|
|
260
|
+
open_tabs,
|
|
218
261
|
...(isDiff ? { resolved, new_issues, persisting } : {}),
|
|
219
262
|
};
|
|
220
263
|
|
|
@@ -240,7 +283,7 @@ async function handleLastReport() {
|
|
|
240
283
|
// โโ Server bootstrap โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
241
284
|
|
|
242
285
|
const server = new Server(
|
|
243
|
-
{ name: 'argus', version: '9.
|
|
286
|
+
{ name: 'argus', version: '9.4.1' },
|
|
244
287
|
{ capabilities: { tools: {} } },
|
|
245
288
|
);
|
|
246
289
|
|