argusqa-os 9.6.6 → 9.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/mcp-server.js CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * Argus MCP Server (v9.6.6)
3
+ * Argus MCP Server
4
4
  *
5
5
  * Exposes Argus as an MCP server so Claude (or any MCP client) can call
6
6
  * argus_audit, argus_audit_full, argus_compare, argus_last_report, and
@@ -24,8 +24,11 @@ import {
24
24
  } from '@modelcontextprotocol/sdk/types.js';
25
25
  import fs from 'fs';
26
26
  import path from 'path';
27
+ import { createRequire } from 'module';
27
28
 
28
29
  import { createMcpClient } from './utils/mcp-client.js';
30
+ import { childLogger } from './utils/logger.js';
31
+ import { parseListPagesResponse } from './utils/mcp-parsers.js';
29
32
  import { crawlRouteCheap, runCrawl } from './orchestration/crawl-and-report.js';
30
33
  import { runComparison } from './orchestration/env-comparison.js';
31
34
  import { WatchSession } from './orchestration/watch-mode.js';
@@ -33,7 +36,14 @@ import { CdpBrowserAdapter } from './adapters/browser.js';
33
36
  import { getFigmaFrame } from './adapters/figma.js';
34
37
  import { analyzeDesignFidelity } from './utils/design-fidelity-analyzer.js';
35
38
  import { analyzeVisualRegression } from './utils/visual-diff-analyzer.js';
36
- import { parsePrUrl, fetchPrFiles, mapFilesToRoutes } from './utils/pr-diff-analyzer.js';
39
+ import { fetchPrFiles, mapFilesToRoutes } from './utils/pr-diff-analyzer.js';
40
+
41
+ const logger = childLogger('mcp-server');
42
+
43
+ // Read version from package.json so the MCP server always self-reports the
44
+ // published package version (a hardcoded string here drifted in the past).
45
+ const require_ = createRequire(import.meta.url);
46
+ const pkg = require_('../package.json');
37
47
 
38
48
  const REPORTS_DIR = path.resolve(process.cwd(), 'reports');
39
49
 
@@ -65,7 +75,7 @@ function cacheAudit(url, result) {
65
75
  const TOOLS = [
66
76
  {
67
77
  name: 'argus_audit',
68
- description: 'Fast QA audit on a URL via Chrome DevTools Protocol. Runs 8 analyzers in one pass: JS errors, unhandled rejections, network failures (4xx/5xx), API frequency loops, CSS cascade issues, SEO violations, security header checks, and accessibility. Returns { findings: [{severity, type, message, url}], summary: {critical, warning, info} }. Use for CI smoke tests and pre-deploy gates. Pass cache: true to skip re-crawl on repeat calls to the same URL within a session — useful in tight fix loops. For Lighthouse scoring and memory leak detection, use argus_audit_full. Requires Chrome running with --remote-debugging-port=9222.',
78
+ description: 'Fast QA audit on a URL via Chrome DevTools Protocol. One-pass detection sweep: JS errors, unhandled rejections, network failures (4xx/5xx), CORS errors, API frequency loops, slow APIs and blocking third-party requests, API contract violations, sync XHR, document.write, long tasks, service worker failures, debugger statements, duplicate IDs, SEO violations, security header checks, content quality, Chrome DevTools Issues panel, and HTTPS enforcement. Returns { findings: [{severity, type, message, url}], summary: {critical, warning, info} }. Use for CI smoke tests and pre-deploy gates. Pass cache: true to skip re-crawl on repeat calls to the same URL within a session — useful in tight fix loops. For Lighthouse scoring, CSS analysis, responsive checks, and memory leak detection, use argus_audit_full. Requires Chrome running with --remote-debugging-port=9222.',
69
79
  inputSchema: {
70
80
  type: 'object',
71
81
  properties: {
@@ -181,6 +191,9 @@ async function withMcp(fn) {
181
191
  async function handleAudit({ url, critical = false, cache = false }) {
182
192
  if (cache && auditCache.has(url)) {
183
193
  const { result, ts } = auditCache.get(url);
194
+ // Refresh recency on read so eviction is true LRU, not insertion-order FIFO.
195
+ auditCache.delete(url);
196
+ auditCache.set(url, { result, ts });
184
197
  return { content: [{ type: 'text', text: JSON.stringify({ ...result, _cached: true, _cachedAt: new Date(ts).toISOString() }, null, 2) }] };
185
198
  }
186
199
  return withMcp(async (mcp) => {
@@ -243,12 +256,13 @@ async function handleGetContext({ url, snapshot_id: prevId, tabId } = {}) {
243
256
  const { findings, newConsole, newNetwork } = await session.poll();
244
257
 
245
258
  // List all open tabs so the caller can target a specific tab on the next call.
259
+ // list_pages returns markdown text ("## Pages\n1: <url> [selected]") — parse
260
+ // it like every other MCP response; treating it as a structured array left
261
+ // open_tabs permanently empty.
246
262
  let open_tabs = [];
247
263
  try {
248
- const pages = await browser.listPages();
249
- if (Array.isArray(pages)) {
250
- open_tabs = pages.map(p => ({ id: p.id ?? p.pageId, url: p.url, title: p.title }));
251
- }
264
+ const pages = parseListPagesResponse(await browser.listPages());
265
+ open_tabs = pages.map(p => ({ id: p.id, url: p.url, selected: p.selected }));
252
266
  } catch { /* list_pages not available in all Chrome configs — degrade gracefully */ }
253
267
 
254
268
  const newId = Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
@@ -397,8 +411,12 @@ async function handlePrValidate({ prUrl, targetUrl, githubToken, blockOn } = {})
397
411
  const allFindings = [];
398
412
  const perRoute = [];
399
413
 
414
+ // Preserve any path prefix in the target URL (e.g. http://host/app) — new URL()
415
+ // with a leading-slash path would drop it. Mirrors src/cli/pr-validate.js.
416
+ const baseUrl = String(base).replace(/\/$/, '');
400
417
  for (const route of affectedRoutes) {
401
- const url = new URL(route.path, base).href;
418
+ const routePath = String(route.path ?? '/').startsWith('/') ? route.path : `/${route.path}`;
419
+ const url = `${baseUrl}${routePath}`;
402
420
  const res = await handleAudit({ url, critical: route.critical ?? false });
403
421
  const data = JSON.parse(res.content[0].text);
404
422
  allFindings.push(...(data.findings ?? []));
@@ -447,7 +465,7 @@ async function handleLastReport() {
447
465
  // ── Server bootstrap ──────────────────────────────────────────────────────────
448
466
 
449
467
  const server = new Server(
450
- { name: 'argus', version: '9.6.6' },
468
+ { name: 'argus', version: pkg.version },
451
469
  { capabilities: { tools: {} } },
452
470
  );
453
471
 
@@ -20,10 +20,11 @@ import { SECURITY_ANALYSIS_SCRIPT, parseSecurityAnalysisResult, analyzeSecurityC
20
20
  import { CONTENT_ANALYSIS_SCRIPT, parseContentAnalysisResult } from '../utils/content-analyzer.js';
21
21
  import { runLoginFlow, saveSession, restoreSession, hasSession, refreshSession } from '../utils/session-manager.js';
22
22
  import { mergeRunResults } from '../utils/flakiness-detector.js';
23
- import { runAllFlows, normalizeArray, waitForSelector } from '../utils/flow-runner.js';
23
+ import { runAllFlows, waitForSelector } from '../utils/flow-runner.js';
24
24
  import { analyzeApiFrequency } from '../utils/api-frequency.js';
25
25
  import { slugify } from '../utils/slug.js';
26
26
  import { unwrapEval, createMcpClient } from '../utils/mcp-client.js';
27
+ import { parseConsoleMsgResponse } from '../utils/mcp-parsers.js';
27
28
  import { CdpBrowserAdapter } from '../adapters/browser.js';
28
29
  import { getFigmaFrame } from '../adapters/figma.js';
29
30
  import { chunkArray } from '../utils/parallel-crawler.js';
@@ -435,9 +436,9 @@ export async function crawlRouteCheap(route, baseUrl, mcp) {
435
436
  const consoleBaseline = (await browser.listConsole().catch(() => [])).length;
436
437
  const baselineNetList = await browser.listNetwork().catch(() => []);
437
438
  const networkMaxReqId = baselineNetList.reduce((max, r) => Math.max(max, r._reqid ?? 0), 0);
438
- // listConsoleRaw returns raw MCP responsenormalizeArray required before .length
439
+ // listConsoleRaw returns markdown text ("msgid=N [issue] text") parse like console messages
439
440
  const issuesBaselineRaw = await browser.listConsoleRaw({ types: ['issue'] }).catch(() => null);
440
- const issuesBaseline = normalizeArray(issuesBaselineRaw).length;
441
+ const issuesBaseline = parseConsoleMsgResponse(issuesBaselineRaw).length;
441
442
 
442
443
  // 1. Navigate
443
444
  await browser.navigate(url);
@@ -710,11 +711,12 @@ export async function crawlRouteCheap(route, baseUrl, mcp) {
710
711
  logger.warn(`[ARGUS] Content analysis skipped for ${url}: ${err.message}`);
711
712
  }
712
713
 
713
- // 9e. Chrome DevTools Issues panel
714
+ // 9e. Chrome DevTools Issues panel — same reset-per-navigation guard as console (D5)
714
715
  try {
715
- const issueRaw = await browser.listConsoleRaw({ types: ['issue'] });
716
- const issues = normalizeArray(issueRaw).slice(issuesBaseline);
717
- result.errors.push(...parseIssues(issues, url, route.critical));
716
+ const issueRaw = await browser.listConsoleRaw({ types: ['issue'] });
717
+ const allIssues = parseConsoleMsgResponse(issueRaw);
718
+ const issuesSliceAt = allIssues.length > issuesBaseline ? issuesBaseline : 0;
719
+ result.errors.push(...parseIssues(allIssues.slice(issuesSliceAt), url, route.critical));
718
720
  } catch (err) {
719
721
  logger.warn(`[ARGUS] Issues analysis skipped for ${url}: ${err.message}`);
720
722
  }
@@ -13,6 +13,8 @@ import path from 'path';
13
13
  import { childLogger } from '../utils/logger.js';
14
14
  import { applyOverrides } from '../utils/severity-overrides.js';
15
15
  import { loadBaseline, saveBaseline, applyBaseline, appendTrend, getCurrentBranch } from '../utils/baseline-manager.js';
16
+ import { loadRunHistory, recordRunHistory, applyNoiseFilter } from '../utils/noise-filter.js';
17
+ import { getRecentChanges, linkRootCauses } from '../utils/root-cause-linker.js';
16
18
 
17
19
  const logger = childLogger('report-processor');
18
20
 
@@ -104,6 +106,29 @@ export async function processReport(report, { outputDir, severityOverrides }) {
104
106
  logger.info('[ARGUS] First run — no baseline to compare; all findings treated as new');
105
107
  }
106
108
 
109
+ // 3a. Intelligent baseline filtering — downgrade cross-run flip-flopping findings
110
+ // to info. Best-effort; disable with ARGUS_NOISE_FILTER=0.
111
+ const historyPath = path.join(outputDir, 'baselines', `${safeBranch}-history.json`);
112
+ if (process.env.ARGUS_NOISE_FILTER !== '0') {
113
+ try {
114
+ const history = loadRunHistory(historyPath);
115
+ const { noisyCount } = applyNoiseFilter(report, history);
116
+ if (noisyCount > 0) rebuildSummary(report); // downgrades change severity counts
117
+ } catch (err) {
118
+ logger.warn(`[ARGUS] Noise filter skipped: ${err.message}`);
119
+ }
120
+ }
121
+
122
+ // 3b. Root cause linking — annotate new findings with recent git changes that
123
+ // map to their route. Best-effort; disable with ARGUS_ROOT_CAUSE=0.
124
+ if (process.env.ARGUS_ROOT_CAUSE !== '0') {
125
+ try {
126
+ linkRootCauses(report, getRecentChanges());
127
+ } catch (err) {
128
+ logger.warn(`[ARGUS] Root cause linking skipped: ${err.message}`);
129
+ }
130
+ }
131
+
107
132
  // 4. Write JSON report
108
133
  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
109
134
  const reportPath = path.join(outputDir, `error-report-${timestamp}.json`);
@@ -115,8 +140,15 @@ export async function processReport(report, { outputDir, severityOverrides }) {
115
140
  }
116
141
  logger.info(`[ARGUS] Report written: ${reportPath}`);
117
142
 
118
- // 5. Persist baseline + append trend entry
143
+ // 5. Persist baseline + run history + append trend entry
119
144
  saveBaseline(baselinePath, report);
145
+ if (process.env.ARGUS_NOISE_FILTER !== '0') {
146
+ try {
147
+ recordRunHistory(historyPath, report);
148
+ } catch (err) {
149
+ logger.warn(`[ARGUS] Run history write skipped: ${err.message}`);
150
+ }
151
+ }
120
152
  appendTrend(trendsPath, {
121
153
  runAt: report.generatedAt,
122
154
  baseUrl: report.baseUrl,
@@ -298,6 +298,13 @@ function classifyNetworkReq(req, url) {
298
298
  * the interval-based runWatchMode() entry point.
299
299
  */
300
300
  export class WatchSession {
301
+ // Long-run safety caps: a watch session left running for hours against an app
302
+ // with cache-busted polling URLs would otherwise grow the dedup sets without
303
+ // bound. When a set exceeds its cap the oldest fifth is evicted (Sets iterate
304
+ // in insertion order) — worst case a very old message is re-reported once.
305
+ static MAX_SEEN_KEYS = 5000;
306
+ static MAX_ALL_FINDINGS = 2000;
307
+
301
308
  constructor(browser, baseUrl) {
302
309
  this._browser = browser;
303
310
  this._baseUrl = baseUrl;
@@ -306,6 +313,14 @@ export class WatchSession {
306
313
  this._allFindings = [];
307
314
  }
308
315
 
316
+ /** Evict the oldest 20% of a dedup set once it exceeds the cap. */
317
+ static _trimSeen(set) {
318
+ if (set.size <= WatchSession.MAX_SEEN_KEYS) return;
319
+ const drop = Math.floor(WatchSession.MAX_SEEN_KEYS / 5);
320
+ const it = set.values();
321
+ for (let i = 0; i < drop; i++) set.delete(it.next().value);
322
+ }
323
+
309
324
  /**
310
325
  * Run one poll cycle.
311
326
  *
@@ -350,6 +365,11 @@ export class WatchSession {
350
365
  findings.push(...analyzeSecurityNetwork(newNetwork, this._baseUrl));
351
366
 
352
367
  this._allFindings.push(...findings);
368
+ if (this._allFindings.length > WatchSession.MAX_ALL_FINDINGS) {
369
+ this._allFindings = this._allFindings.slice(-WatchSession.MAX_ALL_FINDINGS);
370
+ }
371
+ WatchSession._trimSeen(this._seenConsole);
372
+ WatchSession._trimSeen(this._seenNetwork);
353
373
  return { findings, newConsole, newNetwork };
354
374
  }
355
375
 
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ARGUS Deep Accessibility Analyzer (Sprint 4 — A12)
2
+ * ARGUS Deep Accessibility Analyzer (A12)
3
3
  *
4
4
  * Extends Argus accessibility coverage via two mechanisms:
5
5
  *
@@ -125,6 +125,32 @@ function loadSchema(contract) {
125
125
  return null;
126
126
  }
127
127
 
128
+ /**
129
+ * Extract and JSON-parse the response body from a get_network_request result.
130
+ *
131
+ * chrome-devtools-mcp returns the request detail as markdown text with the
132
+ * body under a "### Response Body" section — the dominant production shape.
133
+ * Structured shapes ({ responseBody } / { body }) are kept for legacy clients.
134
+ *
135
+ * @param {any} raw - Raw value returned by browser.getNetworkRequest()
136
+ * @returns {any|null} Parsed JSON body, or null when absent
137
+ * @throws {SyntaxError} when a body section exists but is not valid JSON
138
+ */
139
+ export function extractResponseBody(raw) {
140
+ if (raw == null) return null;
141
+ if (typeof raw === 'object') {
142
+ const text = raw.responseBody ?? raw.body ?? null;
143
+ if (text == null) return null;
144
+ return typeof text === 'string' ? JSON.parse(text) : text;
145
+ }
146
+ const text = String(raw);
147
+ const m = text.match(/### Response Body\s*\n([\s\S]*?)(?=\n###? |$)/);
148
+ if (!m) return null;
149
+ const section = m[1].trim();
150
+ if (!section) return null;
151
+ return JSON.parse(section);
152
+ }
153
+
128
154
  /**
129
155
  * Validate captured network requests against apiContracts[].
130
156
  * For each request that matches a contract, fetches the response body via
@@ -153,8 +179,7 @@ export async function validateApiContracts(networkReqs, browser, contracts, page
153
179
  let body = null;
154
180
  try {
155
181
  const raw = await browser.getNetworkRequest(req.id ?? req.requestId);
156
- const text = raw?.responseBody ?? raw?.body ?? null;
157
- if (text) body = JSON.parse(text);
182
+ body = extractResponseBody(raw);
158
183
  } catch {
159
184
  continue; // body unavailable — skip validation for this request
160
185
  }
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ARGUS Design Fidelity Analyzer (Sprint 2 — D9: Design Fidelity)
2
+ * ARGUS Design Fidelity Analyzer (D9: Design Fidelity)
3
3
  *
4
4
  * Compares a live page's computed CSS against every property extracted by
5
5
  * src/adapters/figma.js. Requires pre-fetched figmaData — analysis is skipped
@@ -102,12 +102,26 @@ export async function resolveUidForSelector(browser, selector) {
102
102
  const fence = text.match(/```(?:json|text)?\s*([\s\S]*?)\s*```/);
103
103
  if (fence) text = fence[1];
104
104
 
105
+ // Pass 1 — exact accessible-name match across ALL identifiers before any
106
+ // substring matching. Substring matches can hit unrelated nodes whose text
107
+ // merely mentions the identifier (e.g. a paragraph documenting "#drag-source"
108
+ // matches the id "drag-source" and wins over the real element's text node).
105
109
  for (const identifier of identifiers) {
106
110
  const esc = identifier.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
107
- // Current snapshot format: "uid=N_M role "accessible name" [attrs]"
108
- // uid precedes the role and accessible name; MCP tools expect just the N_M part (no "uid=" prefix).
109
111
  // Prefer interactive element lines (combobox, button, etc.) over StaticText label
110
112
  // nodes — both may share the same accessible name (e.g. a <label> and its <select>).
113
+ const e1 = text.match(new RegExp(`uid=([^\\s]+)\\s+(?!StaticText)[^\\n]*"${esc}"`, 'm'));
114
+ if (e1) return e1[1];
115
+ const e1b = text.match(new RegExp(`uid=([^\\s]+)[^\\n]*"${esc}"`, 'm'));
116
+ if (e1b) return e1b[1];
117
+ }
118
+
119
+ // Pass 2 — substring fallback (accessible names that embed the identifier,
120
+ // e.g. truncated textContent or label text with surrounding punctuation).
121
+ for (const identifier of identifiers) {
122
+ const esc = identifier.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
123
+ // Current snapshot format: "uid=N_M role "accessible name" [attrs]"
124
+ // uid precedes the role and accessible name; MCP tools expect just the N_M part (no "uid=" prefix).
111
125
  const m1 = text.match(new RegExp(`uid=([^\\s]+)\\s+(?!StaticText)[^\\n]*"[^"]*${esc}`, 'm'));
112
126
  if (m1) return m1[1];
113
127
  // Fallback: accept StaticText nodes (e.g. draggable divs whose only a11y node is text)
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ARGUS Font Loading Analyzer (Sprint 5c — A10)
2
+ * ARGUS Font Loading Analyzer (A10)
3
3
  *
4
4
  * Detects web font performance and reliability issues that cause invisible
5
5
  * text (FOIT), layout shifts (FOUT/CLS), or deliver fonts in suboptimal formats.
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ARGUS Form Validation Analyzer (Sprint 5d — A11)
2
+ * ARGUS Form Validation Analyzer (A11)
3
3
  *
4
4
  * Detects accessibility and security gaps in HTML forms — one of the most
5
5
  * commonly broken areas in web apps.
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ARGUS HAR Network Baseline Recorder (Sprint 5 — N1)
2
+ * ARGUS HAR Network Baseline Recorder (N1)
3
3
  *
4
4
  * Records all network requests made during a page load as a HAR-style
5
5
  * baseline. On first run, saves the baseline. On subsequent runs, diffs
@@ -23,7 +23,7 @@
23
23
  * analyzeIssues(browser, url, isCritical) — standalone navigator for direct harness use.
24
24
  */
25
25
 
26
- import { normalizeArray } from './flow-runner.js';
26
+ import { parseConsoleMsgResponse } from './mcp-parsers.js';
27
27
 
28
28
  // ── Issue classifiers ─────────────────────────────────────────────────────────
29
29
 
@@ -112,7 +112,8 @@ function classifyIssue(issue, url, isCritical) {
112
112
  * Parse a pre-fetched, already-baseline-sliced issues array into findings.
113
113
  * Pure function — used by crawlRouteCheap after the D5 baseline-slice.
114
114
  *
115
- * @param {object[]} issues - Issues from list_console_messages({ types: ['issue'] })
115
+ * @param {object[]} issues - Parsed issue objects ({ level, text }) from
116
+ * parseConsoleMsgResponse(list_console_messages({ types: ['issue'] }))
116
117
  * @param {string} url - Page URL (used as finding context)
117
118
  * @param {boolean} isCritical
118
119
  * @returns {object[]}
@@ -127,11 +128,12 @@ export function parseIssues(issues, url, isCritical = false) {
127
128
  }
128
129
 
129
130
  /**
130
- * Standalone issues analyzer — navigates to a URL, baselines the current
131
- * Issues count, queries the panel after load, and returns findings.
131
+ * Standalone issues analyzer — navigates to a URL, queries the Issues
132
+ * panel after load, and returns findings.
132
133
  *
133
- * Used by the test harness and any standalone caller. Baselines before
134
- * navigation (D5 pattern) so pre-existing issues from prior pages are excluded.
134
+ * Used by the test harness and any standalone caller. No baseline slice is
135
+ * needed: list_console_messages resets per navigation, so the post-navigation
136
+ * response contains only the current page's issues.
135
137
  *
136
138
  * @param {object} browser
137
139
  * @param {string} url
@@ -141,14 +143,6 @@ export function parseIssues(issues, url, isCritical = false) {
141
143
  export async function analyzeIssues(browser, url, isCritical = false) {
142
144
  const findings = [];
143
145
 
144
- let baseline = 0;
145
- try {
146
- const priorRaw = await browser.listConsoleRaw({ types: ['issue'], includePreservedMessages: true });
147
- baseline = normalizeArray(priorRaw).length;
148
- } catch {
149
- // Issues API may not be available — baseline stays 0
150
- }
151
-
152
146
  try {
153
147
  await browser.navigate(url);
154
148
  await new Promise(r => setTimeout(r, 1000));
@@ -157,11 +151,10 @@ export async function analyzeIssues(browser, url, isCritical = false) {
157
151
  }
158
152
 
159
153
  try {
160
- const raw = await browser.listConsoleRaw({
161
- types: ['issue'],
162
- includePreservedMessages: true,
163
- });
164
- const issues = normalizeArray(raw).slice(baseline);
154
+ // Response is markdown text ("msgid=N [issue] text") — same format as
155
+ // console messages. parseConsoleMsgResponse extracts { level, text }.
156
+ const raw = await browser.listConsoleRaw({ types: ['issue'] });
157
+ const issues = parseConsoleMsgResponse(raw);
165
158
  findings.push(...parseIssues(issues, url, isCritical));
166
159
  } catch {
167
160
  // Issues API not available in this chrome-devtools-mcp build — silent skip
@@ -55,3 +55,23 @@ export function parseNetworkReqResponse(raw) {
55
55
  }
56
56
  return reqs;
57
57
  }
58
+
59
+ /**
60
+ * Parse the text response from list_pages.
61
+ * Format: "## Pages\n1: http://host/page.html [selected]\n2: about:blank"
62
+ * The numeric prefix is the pageId that select_page expects (as a number).
63
+ * @param {any} raw - Raw value returned by the MCP tool
64
+ * @returns {Array<{ id: number, url: string, selected: boolean }>}
65
+ */
66
+ export function parseListPagesResponse(raw) {
67
+ if (!raw) return [];
68
+ if (Array.isArray(raw)) return raw;
69
+ if (typeof raw !== 'string') return [];
70
+ const pages = [];
71
+ const re = /^(\d+):\s+(\S+)(\s+\[selected\])?\s*$/gm;
72
+ let m;
73
+ while ((m = re.exec(raw)) !== null) {
74
+ pages.push({ id: Number(m[1]), url: m[2], selected: Boolean(m[3]) });
75
+ }
76
+ return pages;
77
+ }
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ARGUS Motion & Animation Accessibility Analyzer (Sprint 5b — A9)
2
+ * ARGUS Motion & Animation Accessibility Analyzer (A9)
3
3
  *
4
4
  * Detects pages that trigger motion/animation without respecting the user's
5
5
  * `prefers-reduced-motion` OS preference — a WCAG 2.1 SC 2.3.3 (AAA) violation
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Intelligent Baseline Filtering — cross-run noise classifier.
3
+ *
4
+ * Pure algorithmic false-positive filter: no external API, no per-run cost.
5
+ * Tracks which finding keys appeared on which routes across the last N runs
6
+ * (reports/baselines/<branch>-history.json) and flags findings that flip-flop
7
+ * between present and absent as "noisy". Noisy findings are downgraded to
8
+ * severity "info" (never suppressed — visibility is kept) and annotated with
9
+ * `noisy: true`, `noiseScore`, and `originalSeverity`.
10
+ *
11
+ * Distinct from flakiness-detector.js (B4), which compares two crawls WITHIN
12
+ * one run. This module classifies across run HISTORY, catching findings that
13
+ * are stable within a run but unstable between runs (timing-dependent ads,
14
+ * third-party scripts, A/B-tested content).
15
+ *
16
+ * Disable with ARGUS_NOISE_FILTER=0.
17
+ */
18
+
19
+ import fs from 'fs';
20
+ import path from 'path';
21
+ import { findingKey } from './flakiness-detector.js';
22
+ import { childLogger } from './logger.js';
23
+
24
+ const logger = childLogger('noise-filter');
25
+
26
+ /** Minimum recorded runs for a route before its findings can be classified noisy. */
27
+ export const NOISE_MIN_RUNS = 4;
28
+ /** Presence-flip ratio (transitions / (runs - 1)) at or above which a finding is noisy. */
29
+ export const NOISE_FLIP_THRESHOLD = 0.4;
30
+ /** Maximum run entries kept in the history file. */
31
+ export const MAX_HISTORY_RUNS = 20;
32
+
33
+ /**
34
+ * Load run history from disk. Returns [] when the file is absent or corrupt.
35
+ *
36
+ * @param {string} historyFile
37
+ * @returns {Array<{ runAt: string, routes: Record<string, string[]> }>}
38
+ */
39
+ export function loadRunHistory(historyFile) {
40
+ if (!fs.existsSync(historyFile)) return [];
41
+ try {
42
+ const parsed = JSON.parse(fs.readFileSync(historyFile, 'utf8'));
43
+ return Array.isArray(parsed) ? parsed : [];
44
+ } catch {
45
+ return [];
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Append the current report's finding keys as one run entry, capped at maxRuns.
51
+ * Atomic write (tmp + rename) — same pattern as baseline-manager.
52
+ *
53
+ * @param {string} historyFile
54
+ * @param {object} report - { generatedAt, routes: [{ url, errors }] }
55
+ * @param {number} [maxRuns]
56
+ */
57
+ export function recordRunHistory(historyFile, report, maxRuns = MAX_HISTORY_RUNS) {
58
+ const dir = path.dirname(historyFile);
59
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
60
+
61
+ const entry = { runAt: report.generatedAt ?? new Date().toISOString(), routes: {} };
62
+ for (const routeResult of (report.routes ?? [])) {
63
+ entry.routes[routeResult.url] = (routeResult.errors ?? []).map(findingKey);
64
+ }
65
+
66
+ let history = loadRunHistory(historyFile);
67
+ history.push(entry);
68
+ if (history.length > maxRuns) history = history.slice(-maxRuns);
69
+
70
+ const tmp = `${historyFile}.${process.pid}.${Date.now()}.tmp`;
71
+ fs.writeFileSync(tmp, JSON.stringify(history, null, 2)); // lgtm[js/network-data-to-file] — intentional: Argus persists crawl history to a local baseline file by design
72
+ fs.renameSync(tmp, historyFile);
73
+ }
74
+
75
+ /**
76
+ * Compute per-finding noise scores from run history.
77
+ *
78
+ * For every route, builds a presence series per finding key across the runs in
79
+ * which that route was crawled, then scores `transitions / (runs - 1)` — 0 for
80
+ * a finding that is always present (or always absent), 1 for one that flips on
81
+ * every consecutive run pair.
82
+ *
83
+ * @param {Array<{ routes: Record<string, string[]> }>} history
84
+ * @returns {Map<string, { score: number, runs: number, transitions: number }>}
85
+ * keyed by `${url}::${findingKey}`
86
+ */
87
+ export function computeNoiseScores(history) {
88
+ const scores = new Map();
89
+ if (!Array.isArray(history) || history.length < 2) return scores;
90
+
91
+ // url → array of Set(keys), one per run that crawled the url (run order preserved)
92
+ const routeSeries = new Map();
93
+ for (const run of history) {
94
+ for (const [url, keys] of Object.entries(run.routes ?? {})) {
95
+ if (!routeSeries.has(url)) routeSeries.set(url, []);
96
+ routeSeries.get(url).push(new Set(keys));
97
+ }
98
+ }
99
+
100
+ for (const [url, series] of routeSeries) {
101
+ if (series.length < 2) continue;
102
+ const allKeys = new Set();
103
+ for (const runKeys of series) for (const k of runKeys) allKeys.add(k);
104
+
105
+ for (const key of allKeys) {
106
+ let transitions = 0;
107
+ for (let i = 1; i < series.length; i++) {
108
+ if (series[i].has(key) !== series[i - 1].has(key)) transitions++;
109
+ }
110
+ scores.set(`${url}::${key}`, {
111
+ score: transitions / (series.length - 1),
112
+ runs: series.length,
113
+ transitions,
114
+ });
115
+ }
116
+ }
117
+ return scores;
118
+ }
119
+
120
+ /**
121
+ * Annotate and downgrade noisy findings in the report (mutates in place).
122
+ *
123
+ * A finding is noisy when its route has ≥ minRuns of history AND its presence
124
+ * flip ratio ≥ flipThreshold. Noisy findings get `noisy: true`, `noiseScore`,
125
+ * `originalSeverity`, and severity downgraded to "info". Caller is responsible
126
+ * for rebuilding report.summary afterwards.
127
+ *
128
+ * @param {object} report
129
+ * @param {Array} history - From loadRunHistory()
130
+ * @param {object} [opts]
131
+ * @param {number} [opts.minRuns]
132
+ * @param {number} [opts.flipThreshold]
133
+ * @returns {{ noisyCount: number }}
134
+ */
135
+ export function applyNoiseFilter(report, history, { minRuns = NOISE_MIN_RUNS, flipThreshold = NOISE_FLIP_THRESHOLD } = {}) {
136
+ const scores = computeNoiseScores(history);
137
+ let noisyCount = 0;
138
+ if (scores.size === 0) return { noisyCount };
139
+
140
+ for (const routeResult of (report.routes ?? [])) {
141
+ for (const finding of (routeResult.errors ?? [])) {
142
+ const entry = scores.get(`${routeResult.url}::${findingKey(finding)}`);
143
+ if (!entry || entry.runs < minRuns || entry.score < flipThreshold) continue;
144
+
145
+ finding.noisy = true;
146
+ finding.noiseScore = Math.round(entry.score * 100) / 100;
147
+ if (finding.severity !== 'info') {
148
+ finding.originalSeverity = finding.severity;
149
+ finding.severity = 'info';
150
+ }
151
+ noisyCount++;
152
+ }
153
+ }
154
+
155
+ if (noisyCount > 0) {
156
+ logger.info(`[ARGUS] Noise filter: ${noisyCount} flip-flopping finding(s) downgraded to info`);
157
+ }
158
+ return { noisyCount };
159
+ }