npm - argusqa-os - Versions diffs - 9.6.6 → 9.7.4 - Mend

argusqa-os 9.6.6 → 9.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +394 -384
package/glama.json +2 -2
package/package.json +77 -71
package/src/adapters/browser.js +11 -3
package/src/cli/chrome-launcher.js +175 -0
package/src/cli/doctor.js +133 -0
package/src/cli/pr-validate.js +25 -6
package/src/mcp-server.js +27 -9
package/src/orchestration/orchestrator.js +9 -7
package/src/orchestration/report-processor.js +33 -1
package/src/orchestration/watch-mode.js +20 -0
package/src/utils/a11y-deep-analyzer.js +1 -1
package/src/utils/contract-validator.js +27 -2
package/src/utils/design-fidelity-analyzer.js +1 -1
package/src/utils/flow-runner.js +16 -2
package/src/utils/font-analyzer.js +1 -1
package/src/utils/form-analyzer.js +1 -1
package/src/utils/har-recorder.js +1 -1
package/src/utils/issues-analyzer.js +12 -19
package/src/utils/mcp-parsers.js +20 -0
package/src/utils/motion-analyzer.js +1 -1
package/src/utils/noise-filter.js +159 -0
package/src/utils/pdf-exporter.js +146 -0
package/src/utils/pr-diff-analyzer.js +11 -2
package/src/utils/root-cause-linker.js +175 -0
package/src/utils/screen-recorder.js +250 -0
package/src/utils/security-analyzer.js +132 -1
package/src/utils/theme-analyzer.js +1 -1
package/src/utils/visual-diff-analyzer.js +1 -1
package/src/utils/web-vitals-analyzer.js +1 -1

package/src/mcp-server.js CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env node
 /**
- * Argus MCP Server (v9.6.6)
+ * Argus MCP Server
  *
  * Exposes Argus as an MCP server so Claude (or any MCP client) can call
  * argus_audit, argus_audit_full, argus_compare, argus_last_report, and
@@ -24,8 +24,11 @@ import {
 } from '@modelcontextprotocol/sdk/types.js';
 import fs   from 'fs';
 import path from 'path';
+import { createRequire } from 'module';
 import { createMcpClient }                    from './utils/mcp-client.js';
+import { childLogger }                        from './utils/logger.js';
+import { parseListPagesResponse }             from './utils/mcp-parsers.js';
 import { crawlRouteCheap, runCrawl }          from './orchestration/crawl-and-report.js';
 import { runComparison }                      from './orchestration/env-comparison.js';
 import { WatchSession }                       from './orchestration/watch-mode.js';
@@ -33,7 +36,14 @@ import { CdpBrowserAdapter }                  from './adapters/browser.js';
 import { getFigmaFrame }                      from './adapters/figma.js';
 import { analyzeDesignFidelity }             from './utils/design-fidelity-analyzer.js';
 import { analyzeVisualRegression }           from './utils/visual-diff-analyzer.js';
-import { parsePrUrl, fetchPrFiles, mapFilesToRoutes } from './utils/pr-diff-analyzer.js';
+import { fetchPrFiles, mapFilesToRoutes } from './utils/pr-diff-analyzer.js';
+const logger = childLogger('mcp-server');
+// Read version from package.json so the MCP server always self-reports the
+// published package version (a hardcoded string here drifted in the past).
+const require_ = createRequire(import.meta.url);
+const pkg = require_('../package.json');
 const REPORTS_DIR = path.resolve(process.cwd(), 'reports');
@@ -65,7 +75,7 @@ function cacheAudit(url, result) {
 const TOOLS = [
   {
     name: 'argus_audit',
-    description: 'Fast QA audit on a URL via Chrome DevTools Protocol. Runs 8 analyzers in one pass: JS errors, unhandled rejections, network failures (4xx/5xx), API frequency loops, CSS cascade issues, SEO violations, security header checks, and accessibility. Returns { findings: [{severity, type, message, url}], summary: {critical, warning, info} }. Use for CI smoke tests and pre-deploy gates. Pass cache: true to skip re-crawl on repeat calls to the same URL within a session — useful in tight fix loops. For Lighthouse scoring and memory leak detection, use argus_audit_full. Requires Chrome running with --remote-debugging-port=9222.',
+    description: 'Fast QA audit on a URL via Chrome DevTools Protocol. One-pass detection sweep: JS errors, unhandled rejections, network failures (4xx/5xx), CORS errors, API frequency loops, slow APIs and blocking third-party requests, API contract violations, sync XHR, document.write, long tasks, service worker failures, debugger statements, duplicate IDs, SEO violations, security header checks, content quality, Chrome DevTools Issues panel, and HTTPS enforcement. Returns { findings: [{severity, type, message, url}], summary: {critical, warning, info} }. Use for CI smoke tests and pre-deploy gates. Pass cache: true to skip re-crawl on repeat calls to the same URL within a session — useful in tight fix loops. For Lighthouse scoring, CSS analysis, responsive checks, and memory leak detection, use argus_audit_full. Requires Chrome running with --remote-debugging-port=9222.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -181,6 +191,9 @@ async function withMcp(fn) {
 async function handleAudit({ url, critical = false, cache = false }) {
   if (cache && auditCache.has(url)) {
     const { result, ts } = auditCache.get(url);
+    // Refresh recency on read so eviction is true LRU, not insertion-order FIFO.
+    auditCache.delete(url);
+    auditCache.set(url, { result, ts });
     return { content: [{ type: 'text', text: JSON.stringify({ ...result, _cached: true, _cachedAt: new Date(ts).toISOString() }, null, 2) }] };
   }
   return withMcp(async (mcp) => {
@@ -243,12 +256,13 @@ async function handleGetContext({ url, snapshot_id: prevId, tabId } = {}) {
     const { findings, newConsole, newNetwork } = await session.poll();
     // List all open tabs so the caller can target a specific tab on the next call.
+    // list_pages returns markdown text ("## Pages\n1: <url> [selected]") — parse
+    // it like every other MCP response; treating it as a structured array left
+    // open_tabs permanently empty.
     let open_tabs = [];
     try {
-      const pages = await browser.listPages();
-      if (Array.isArray(pages)) {
-        open_tabs = pages.map(p => ({ id: p.id ?? p.pageId, url: p.url, title: p.title }));
-      }
+      const pages = parseListPagesResponse(await browser.listPages());
+      open_tabs = pages.map(p => ({ id: p.id, url: p.url, selected: p.selected }));
     } catch { /* list_pages not available in all Chrome configs — degrade gracefully */ }
     const newId = Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
@@ -397,8 +411,12 @@ async function handlePrValidate({ prUrl, targetUrl, githubToken, blockOn } = {})
   const allFindings = [];
   const perRoute    = [];
+  // Preserve any path prefix in the target URL (e.g. http://host/app) — new URL()
+  // with a leading-slash path would drop it. Mirrors src/cli/pr-validate.js.
+  const baseUrl = String(base).replace(/\/$/, '');
   for (const route of affectedRoutes) {
-    const url = new URL(route.path, base).href;
+    const routePath = String(route.path ?? '/').startsWith('/') ? route.path : `/${route.path}`;
+    const url = `${baseUrl}${routePath}`;
     const res = await handleAudit({ url, critical: route.critical ?? false });
     const data = JSON.parse(res.content[0].text);
     allFindings.push(...(data.findings ?? []));
@@ -447,7 +465,7 @@ async function handleLastReport() {
 // ── Server bootstrap ──────────────────────────────────────────────────────────
 const server = new Server(
-  { name: 'argus', version: '9.6.6' },
+  { name: 'argus', version: pkg.version },
   { capabilities: { tools: {} } },
 );

package/src/orchestration/orchestrator.js CHANGED Viewed

@@ -20,10 +20,11 @@ import { SECURITY_ANALYSIS_SCRIPT, parseSecurityAnalysisResult, analyzeSecurityC
 import { CONTENT_ANALYSIS_SCRIPT, parseContentAnalysisResult }          from '../utils/content-analyzer.js';
 import { runLoginFlow, saveSession, restoreSession, hasSession, refreshSession } from '../utils/session-manager.js';
 import { mergeRunResults }                                               from '../utils/flakiness-detector.js';
-import { runAllFlows, normalizeArray, waitForSelector }                  from '../utils/flow-runner.js';
+import { runAllFlows, waitForSelector }                                  from '../utils/flow-runner.js';
 import { analyzeApiFrequency }                                           from '../utils/api-frequency.js';
 import { slugify }                                                       from '../utils/slug.js';
 import { unwrapEval, createMcpClient }                                   from '../utils/mcp-client.js';
+import { parseConsoleMsgResponse }                                       from '../utils/mcp-parsers.js';
 import { CdpBrowserAdapter }                                             from '../adapters/browser.js';
 import { getFigmaFrame }                                                 from '../adapters/figma.js';
 import { chunkArray }                                                    from '../utils/parallel-crawler.js';
@@ -435,9 +436,9 @@ export async function crawlRouteCheap(route, baseUrl, mcp) {
   const consoleBaseline = (await browser.listConsole().catch(() => [])).length;
   const baselineNetList = await browser.listNetwork().catch(() => []);
   const networkMaxReqId = baselineNetList.reduce((max, r) => Math.max(max, r._reqid ?? 0), 0);
-  // listConsoleRaw returns raw MCP response — normalizeArray required before .length
+  // listConsoleRaw returns markdown text ("msgid=N [issue] text") — parse like console messages
   const issuesBaselineRaw = await browser.listConsoleRaw({ types: ['issue'] }).catch(() => null);
-  const issuesBaseline    = normalizeArray(issuesBaselineRaw).length;
+  const issuesBaseline    = parseConsoleMsgResponse(issuesBaselineRaw).length;
   // 1. Navigate
   await browser.navigate(url);
@@ -710,11 +711,12 @@ export async function crawlRouteCheap(route, baseUrl, mcp) {
     logger.warn(`[ARGUS] Content analysis skipped for ${url}: ${err.message}`);
   }
-  // 9e. Chrome DevTools Issues panel
+  // 9e. Chrome DevTools Issues panel — same reset-per-navigation guard as console (D5)
   try {
-    const issueRaw = await browser.listConsoleRaw({ types: ['issue'] });
-    const issues   = normalizeArray(issueRaw).slice(issuesBaseline);
-    result.errors.push(...parseIssues(issues, url, route.critical));
+    const issueRaw  = await browser.listConsoleRaw({ types: ['issue'] });
+    const allIssues = parseConsoleMsgResponse(issueRaw);
+    const issuesSliceAt = allIssues.length > issuesBaseline ? issuesBaseline : 0;
+    result.errors.push(...parseIssues(allIssues.slice(issuesSliceAt), url, route.critical));
   } catch (err) {
     logger.warn(`[ARGUS] Issues analysis skipped for ${url}: ${err.message}`);
   }

package/src/orchestration/report-processor.js CHANGED Viewed

@@ -13,6 +13,8 @@ import path from 'path';
 import { childLogger } from '../utils/logger.js';
 import { applyOverrides }                                                  from '../utils/severity-overrides.js';
 import { loadBaseline, saveBaseline, applyBaseline, appendTrend, getCurrentBranch } from '../utils/baseline-manager.js';
+import { loadRunHistory, recordRunHistory, applyNoiseFilter }              from '../utils/noise-filter.js';
+import { getRecentChanges, linkRootCauses }                                from '../utils/root-cause-linker.js';
 const logger = childLogger('report-processor');
@@ -104,6 +106,29 @@ export async function processReport(report, { outputDir, severityOverrides }) {
     logger.info('[ARGUS] First run — no baseline to compare; all findings treated as new');
   }
+  // 3a. Intelligent baseline filtering — downgrade cross-run flip-flopping findings
+  //     to info. Best-effort; disable with ARGUS_NOISE_FILTER=0.
+  const historyPath = path.join(outputDir, 'baselines', `${safeBranch}-history.json`);
+  if (process.env.ARGUS_NOISE_FILTER !== '0') {
+    try {
+      const history = loadRunHistory(historyPath);
+      const { noisyCount } = applyNoiseFilter(report, history);
+      if (noisyCount > 0) rebuildSummary(report); // downgrades change severity counts
+    } catch (err) {
+      logger.warn(`[ARGUS] Noise filter skipped: ${err.message}`);
+    }
+  }
+  // 3b. Root cause linking — annotate new findings with recent git changes that
+  //     map to their route. Best-effort; disable with ARGUS_ROOT_CAUSE=0.
+  if (process.env.ARGUS_ROOT_CAUSE !== '0') {
+    try {
+      linkRootCauses(report, getRecentChanges());
+    } catch (err) {
+      logger.warn(`[ARGUS] Root cause linking skipped: ${err.message}`);
+    }
+  }
   // 4. Write JSON report
   const timestamp  = new Date().toISOString().replace(/[:.]/g, '-');
   const reportPath = path.join(outputDir, `error-report-${timestamp}.json`);
@@ -115,8 +140,15 @@ export async function processReport(report, { outputDir, severityOverrides }) {
   }
   logger.info(`[ARGUS] Report written: ${reportPath}`);
-  // 5. Persist baseline + append trend entry
+  // 5. Persist baseline + run history + append trend entry
   saveBaseline(baselinePath, report);
+  if (process.env.ARGUS_NOISE_FILTER !== '0') {
+    try {
+      recordRunHistory(historyPath, report);
+    } catch (err) {
+      logger.warn(`[ARGUS] Run history write skipped: ${err.message}`);
+    }
+  }
   appendTrend(trendsPath, {
     runAt:                report.generatedAt,
     baseUrl:              report.baseUrl,

package/src/orchestration/watch-mode.js CHANGED Viewed

@@ -298,6 +298,13 @@ function classifyNetworkReq(req, url) {
  * the interval-based runWatchMode() entry point.
  */
 export class WatchSession {
+  // Long-run safety caps: a watch session left running for hours against an app
+  // with cache-busted polling URLs would otherwise grow the dedup sets without
+  // bound. When a set exceeds its cap the oldest fifth is evicted (Sets iterate
+  // in insertion order) — worst case a very old message is re-reported once.
+  static MAX_SEEN_KEYS    = 5000;
+  static MAX_ALL_FINDINGS = 2000;
   constructor(browser, baseUrl) {
     this._browser     = browser;
     this._baseUrl     = baseUrl;
@@ -306,6 +313,14 @@ export class WatchSession {
     this._allFindings = [];
   }
+  /** Evict the oldest 20% of a dedup set once it exceeds the cap. */
+  static _trimSeen(set) {
+    if (set.size <= WatchSession.MAX_SEEN_KEYS) return;
+    const drop = Math.floor(WatchSession.MAX_SEEN_KEYS / 5);
+    const it = set.values();
+    for (let i = 0; i < drop; i++) set.delete(it.next().value);
+  }
   /**
    * Run one poll cycle.
    *
@@ -350,6 +365,11 @@ export class WatchSession {
     findings.push(...analyzeSecurityNetwork(newNetwork, this._baseUrl));
     this._allFindings.push(...findings);
+    if (this._allFindings.length > WatchSession.MAX_ALL_FINDINGS) {
+      this._allFindings = this._allFindings.slice(-WatchSession.MAX_ALL_FINDINGS);
+    }
+    WatchSession._trimSeen(this._seenConsole);
+    WatchSession._trimSeen(this._seenNetwork);
     return { findings, newConsole, newNetwork };
   }

package/src/utils/a11y-deep-analyzer.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * ARGUS Deep Accessibility Analyzer (Sprint 4 — A12)
+ * ARGUS Deep Accessibility Analyzer (A12)
  *
  * Extends Argus accessibility coverage via two mechanisms:
  *

package/src/utils/contract-validator.js CHANGED Viewed

@@ -125,6 +125,32 @@ function loadSchema(contract) {
   return null;
 }
+/**
+ * Extract and JSON-parse the response body from a get_network_request result.
+ *
+ * chrome-devtools-mcp returns the request detail as markdown text with the
+ * body under a "### Response Body" section — the dominant production shape.
+ * Structured shapes ({ responseBody } / { body }) are kept for legacy clients.
+ *
+ * @param {any} raw - Raw value returned by browser.getNetworkRequest()
+ * @returns {any|null} Parsed JSON body, or null when absent
+ * @throws {SyntaxError} when a body section exists but is not valid JSON
+ */
+export function extractResponseBody(raw) {
+  if (raw == null) return null;
+  if (typeof raw === 'object') {
+    const text = raw.responseBody ?? raw.body ?? null;
+    if (text == null) return null;
+    return typeof text === 'string' ? JSON.parse(text) : text;
+  }
+  const text = String(raw);
+  const m = text.match(/### Response Body\s*\n([\s\S]*?)(?=\n###? |$)/);
+  if (!m) return null;
+  const section = m[1].trim();
+  if (!section) return null;
+  return JSON.parse(section);
+}
 /**
  * Validate captured network requests against apiContracts[].
  * For each request that matches a contract, fetches the response body via
@@ -153,8 +179,7 @@ export async function validateApiContracts(networkReqs, browser, contracts, page
       let body = null;
       try {
         const raw = await browser.getNetworkRequest(req.id ?? req.requestId);
-        const text = raw?.responseBody ?? raw?.body ?? null;
-        if (text) body = JSON.parse(text);
+        body = extractResponseBody(raw);
       } catch {
         continue; // body unavailable — skip validation for this request
       }

package/src/utils/design-fidelity-analyzer.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * ARGUS Design Fidelity Analyzer (Sprint 2 — D9: Design Fidelity)
+ * ARGUS Design Fidelity Analyzer (D9: Design Fidelity)
  *
  * Compares a live page's computed CSS against every property extracted by
  * src/adapters/figma.js. Requires pre-fetched figmaData — analysis is skipped

package/src/utils/flow-runner.js CHANGED Viewed

@@ -102,12 +102,26 @@ export async function resolveUidForSelector(browser, selector) {
   const fence = text.match(/```(?:json|text)?\s*([\s\S]*?)\s*```/);
   if (fence) text = fence[1];
+  // Pass 1 — exact accessible-name match across ALL identifiers before any
+  // substring matching. Substring matches can hit unrelated nodes whose text
+  // merely mentions the identifier (e.g. a paragraph documenting "#drag-source"
+  // matches the id "drag-source" and wins over the real element's text node).
   for (const identifier of identifiers) {
     const esc = identifier.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-    // Current snapshot format: "uid=N_M role "accessible name" [attrs]"
-    // uid precedes the role and accessible name; MCP tools expect just the N_M part (no "uid=" prefix).
     // Prefer interactive element lines (combobox, button, etc.) over StaticText label
     // nodes — both may share the same accessible name (e.g. a <label> and its <select>).
+    const e1 = text.match(new RegExp(`uid=([^\\s]+)\\s+(?!StaticText)[^\\n]*"${esc}"`, 'm'));
+    if (e1) return e1[1];
+    const e1b = text.match(new RegExp(`uid=([^\\s]+)[^\\n]*"${esc}"`, 'm'));
+    if (e1b) return e1b[1];
+  }
+  // Pass 2 — substring fallback (accessible names that embed the identifier,
+  // e.g. truncated textContent or label text with surrounding punctuation).
+  for (const identifier of identifiers) {
+    const esc = identifier.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+    // Current snapshot format: "uid=N_M role "accessible name" [attrs]"
+    // uid precedes the role and accessible name; MCP tools expect just the N_M part (no "uid=" prefix).
     const m1 = text.match(new RegExp(`uid=([^\\s]+)\\s+(?!StaticText)[^\\n]*"[^"]*${esc}`, 'm'));
     if (m1) return m1[1];
     // Fallback: accept StaticText nodes (e.g. draggable divs whose only a11y node is text)

package/src/utils/font-analyzer.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * ARGUS Font Loading Analyzer (Sprint 5c — A10)
+ * ARGUS Font Loading Analyzer (A10)
  *
  * Detects web font performance and reliability issues that cause invisible
  * text (FOIT), layout shifts (FOUT/CLS), or deliver fonts in suboptimal formats.

package/src/utils/form-analyzer.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * ARGUS Form Validation Analyzer (Sprint 5d — A11)
+ * ARGUS Form Validation Analyzer (A11)
  *
  * Detects accessibility and security gaps in HTML forms — one of the most
  * commonly broken areas in web apps.

package/src/utils/har-recorder.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * ARGUS HAR Network Baseline Recorder (Sprint 5 — N1)
+ * ARGUS HAR Network Baseline Recorder (N1)
  *
  * Records all network requests made during a page load as a HAR-style
  * baseline. On first run, saves the baseline. On subsequent runs, diffs

package/src/utils/issues-analyzer.js CHANGED Viewed

@@ -23,7 +23,7 @@
  *   analyzeIssues(browser, url, isCritical) — standalone navigator for direct harness use.
  */
-import { normalizeArray } from './flow-runner.js';
+import { parseConsoleMsgResponse } from './mcp-parsers.js';
 // ── Issue classifiers ─────────────────────────────────────────────────────────
@@ -112,7 +112,8 @@ function classifyIssue(issue, url, isCritical) {
  * Parse a pre-fetched, already-baseline-sliced issues array into findings.
  * Pure function — used by crawlRouteCheap after the D5 baseline-slice.
  *
- * @param {object[]} issues    - Issues from list_console_messages({ types: ['issue'] })
+ * @param {object[]} issues    - Parsed issue objects ({ level, text }) from
+ *                               parseConsoleMsgResponse(list_console_messages({ types: ['issue'] }))
  * @param {string}   url       - Page URL (used as finding context)
  * @param {boolean}  isCritical
  * @returns {object[]}
@@ -127,11 +128,12 @@ export function parseIssues(issues, url, isCritical = false) {
 }
 /**
- * Standalone issues analyzer — navigates to a URL, baselines the current
- * Issues count, queries the panel after load, and returns findings.
+ * Standalone issues analyzer — navigates to a URL, queries the Issues
+ * panel after load, and returns findings.
  *
- * Used by the test harness and any standalone caller. Baselines before
- * navigation (D5 pattern) so pre-existing issues from prior pages are excluded.
+ * Used by the test harness and any standalone caller. No baseline slice is
+ * needed: list_console_messages resets per navigation, so the post-navigation
+ * response contains only the current page's issues.
  *
  * @param {object}  browser
  * @param {string}  url
@@ -141,14 +143,6 @@ export function parseIssues(issues, url, isCritical = false) {
 export async function analyzeIssues(browser, url, isCritical = false) {
   const findings = [];
-  let baseline = 0;
-  try {
-    const priorRaw = await browser.listConsoleRaw({ types: ['issue'], includePreservedMessages: true });
-    baseline = normalizeArray(priorRaw).length;
-  } catch {
-    // Issues API may not be available — baseline stays 0
-  }
   try {
     await browser.navigate(url);
     await new Promise(r => setTimeout(r, 1000));
@@ -157,11 +151,10 @@ export async function analyzeIssues(browser, url, isCritical = false) {
   }
   try {
-    const raw    = await browser.listConsoleRaw({
-      types: ['issue'],
-      includePreservedMessages: true,
-    });
-    const issues = normalizeArray(raw).slice(baseline);
+    // Response is markdown text ("msgid=N [issue] text") — same format as
+    // console messages. parseConsoleMsgResponse extracts { level, text }.
+    const raw    = await browser.listConsoleRaw({ types: ['issue'] });
+    const issues = parseConsoleMsgResponse(raw);
     findings.push(...parseIssues(issues, url, isCritical));
   } catch {
     // Issues API not available in this chrome-devtools-mcp build — silent skip

package/src/utils/mcp-parsers.js CHANGED Viewed

@@ -55,3 +55,23 @@ export function parseNetworkReqResponse(raw) {
   }
   return reqs;
 }
+/**
+ * Parse the text response from list_pages.
+ * Format: "## Pages\n1: http://host/page.html [selected]\n2: about:blank"
+ * The numeric prefix is the pageId that select_page expects (as a number).
+ * @param {any} raw - Raw value returned by the MCP tool
+ * @returns {Array<{ id: number, url: string, selected: boolean }>}
+ */
+export function parseListPagesResponse(raw) {
+  if (!raw) return [];
+  if (Array.isArray(raw)) return raw;
+  if (typeof raw !== 'string') return [];
+  const pages = [];
+  const re = /^(\d+):\s+(\S+)(\s+\[selected\])?\s*$/gm;
+  let m;
+  while ((m = re.exec(raw)) !== null) {
+    pages.push({ id: Number(m[1]), url: m[2], selected: Boolean(m[3]) });
+  }
+  return pages;
+}

package/src/utils/motion-analyzer.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * ARGUS Motion & Animation Accessibility Analyzer (Sprint 5b — A9)
+ * ARGUS Motion & Animation Accessibility Analyzer (A9)
  *
  * Detects pages that trigger motion/animation without respecting the user's
  * `prefers-reduced-motion` OS preference — a WCAG 2.1 SC 2.3.3 (AAA) violation

package/src/utils/noise-filter.js ADDED Viewed

@@ -0,0 +1,159 @@
+/**
+ * Intelligent Baseline Filtering — cross-run noise classifier.
+ *
+ * Pure algorithmic false-positive filter: no external API, no per-run cost.
+ * Tracks which finding keys appeared on which routes across the last N runs
+ * (reports/baselines/<branch>-history.json) and flags findings that flip-flop
+ * between present and absent as "noisy". Noisy findings are downgraded to
+ * severity "info" (never suppressed — visibility is kept) and annotated with
+ * `noisy: true`, `noiseScore`, and `originalSeverity`.
+ *
+ * Distinct from flakiness-detector.js (B4), which compares two crawls WITHIN
+ * one run. This module classifies across run HISTORY, catching findings that
+ * are stable within a run but unstable between runs (timing-dependent ads,
+ * third-party scripts, A/B-tested content).
+ *
+ * Disable with ARGUS_NOISE_FILTER=0.
+ */
+import fs   from 'fs';
+import path from 'path';
+import { findingKey }  from './flakiness-detector.js';
+import { childLogger } from './logger.js';
+const logger = childLogger('noise-filter');
+/** Minimum recorded runs for a route before its findings can be classified noisy. */
+export const NOISE_MIN_RUNS = 4;
+/** Presence-flip ratio (transitions / (runs - 1)) at or above which a finding is noisy. */
+export const NOISE_FLIP_THRESHOLD = 0.4;
+/** Maximum run entries kept in the history file. */
+export const MAX_HISTORY_RUNS = 20;
+/**
+ * Load run history from disk. Returns [] when the file is absent or corrupt.
+ *
+ * @param {string} historyFile
+ * @returns {Array<{ runAt: string, routes: Record<string, string[]> }>}
+ */
+export function loadRunHistory(historyFile) {
+  if (!fs.existsSync(historyFile)) return [];
+  try {
+    const parsed = JSON.parse(fs.readFileSync(historyFile, 'utf8'));
+    return Array.isArray(parsed) ? parsed : [];
+  } catch {
+    return [];
+  }
+}
+/**
+ * Append the current report's finding keys as one run entry, capped at maxRuns.
+ * Atomic write (tmp + rename) — same pattern as baseline-manager.
+ *
+ * @param {string} historyFile
+ * @param {object} report  - { generatedAt, routes: [{ url, errors }] }
+ * @param {number} [maxRuns]
+ */
+export function recordRunHistory(historyFile, report, maxRuns = MAX_HISTORY_RUNS) {
+  const dir = path.dirname(historyFile);
+  if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+  const entry = { runAt: report.generatedAt ?? new Date().toISOString(), routes: {} };
+  for (const routeResult of (report.routes ?? [])) {
+    entry.routes[routeResult.url] = (routeResult.errors ?? []).map(findingKey);
+  }
+  let history = loadRunHistory(historyFile);
+  history.push(entry);
+  if (history.length > maxRuns) history = history.slice(-maxRuns);
+  const tmp = `${historyFile}.${process.pid}.${Date.now()}.tmp`;
+  fs.writeFileSync(tmp, JSON.stringify(history, null, 2)); // lgtm[js/network-data-to-file] — intentional: Argus persists crawl history to a local baseline file by design
+  fs.renameSync(tmp, historyFile);
+}
+/**
+ * Compute per-finding noise scores from run history.
+ *
+ * For every route, builds a presence series per finding key across the runs in
+ * which that route was crawled, then scores `transitions / (runs - 1)` — 0 for
+ * a finding that is always present (or always absent), 1 for one that flips on
+ * every consecutive run pair.
+ *
+ * @param {Array<{ routes: Record<string, string[]> }>} history
+ * @returns {Map<string, { score: number, runs: number, transitions: number }>}
+ *          keyed by `${url}::${findingKey}`
+ */
+export function computeNoiseScores(history) {
+  const scores = new Map();
+  if (!Array.isArray(history) || history.length < 2) return scores;
+  // url → array of Set(keys), one per run that crawled the url (run order preserved)
+  const routeSeries = new Map();
+  for (const run of history) {
+    for (const [url, keys] of Object.entries(run.routes ?? {})) {
+      if (!routeSeries.has(url)) routeSeries.set(url, []);
+      routeSeries.get(url).push(new Set(keys));
+    }
+  }
+  for (const [url, series] of routeSeries) {
+    if (series.length < 2) continue;
+    const allKeys = new Set();
+    for (const runKeys of series) for (const k of runKeys) allKeys.add(k);
+    for (const key of allKeys) {
+      let transitions = 0;
+      for (let i = 1; i < series.length; i++) {
+        if (series[i].has(key) !== series[i - 1].has(key)) transitions++;
+      }
+      scores.set(`${url}::${key}`, {
+        score: transitions / (series.length - 1),
+        runs: series.length,
+        transitions,
+      });
+    }
+  }
+  return scores;
+}
+/**
+ * Annotate and downgrade noisy findings in the report (mutates in place).
+ *
+ * A finding is noisy when its route has ≥ minRuns of history AND its presence
+ * flip ratio ≥ flipThreshold. Noisy findings get `noisy: true`, `noiseScore`,
+ * `originalSeverity`, and severity downgraded to "info". Caller is responsible
+ * for rebuilding report.summary afterwards.
+ *
+ * @param {object} report
+ * @param {Array}  history  - From loadRunHistory()
+ * @param {object} [opts]
+ * @param {number} [opts.minRuns]
+ * @param {number} [opts.flipThreshold]
+ * @returns {{ noisyCount: number }}
+ */
+export function applyNoiseFilter(report, history, { minRuns = NOISE_MIN_RUNS, flipThreshold = NOISE_FLIP_THRESHOLD } = {}) {
+  const scores = computeNoiseScores(history);
+  let noisyCount = 0;
+  if (scores.size === 0) return { noisyCount };
+  for (const routeResult of (report.routes ?? [])) {
+    for (const finding of (routeResult.errors ?? [])) {
+      const entry = scores.get(`${routeResult.url}::${findingKey(finding)}`);
+      if (!entry || entry.runs < minRuns || entry.score < flipThreshold) continue;
+      finding.noisy = true;
+      finding.noiseScore = Math.round(entry.score * 100) / 100;
+      if (finding.severity !== 'info') {
+        finding.originalSeverity = finding.severity;
+        finding.severity = 'info';
+      }
+      noisyCount++;
+    }
+  }
+  if (noisyCount > 0) {
+    logger.info(`[ARGUS] Noise filter: ${noisyCount} flip-flopping finding(s) downgraded to info`);
+  }
+  return { noisyCount };
+}