argusqa-os 9.7.5 → 9.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -9
- package/glama.json +2 -2
- package/package.json +12 -4
- package/src/adapters/browser.js +13 -1
- package/src/cli/pr-validate.js +275 -56
- package/src/mcp-server.js +142 -26
- package/src/orchestration/crawl-and-report.js +1 -1
- package/src/orchestration/orchestrator.js +64 -13
- package/src/utils/audit-depth.js +148 -0
- package/src/utils/deploy-preview.js +210 -0
- package/src/utils/github-api.js +242 -0
- package/src/utils/github-reporter.js +251 -39
- package/src/utils/html-reporter.js +283 -92
- package/src/utils/import-graph.js +290 -0
- package/src/utils/issues-analyzer.js +8 -2
- package/src/utils/lighthouse-checker.js +44 -4
- package/src/utils/parallel-crawler.js +202 -0
- package/src/utils/pr-baseline.js +230 -0
- package/src/utils/pr-diff-analyzer.js +378 -40
- package/src/utils/route-discoverer.js +25 -3
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Argus — static ES/CJS import graph for framework-aware PR route mapping (PR_VALIDATOR C1).
|
|
3
|
+
*
|
|
4
|
+
* Builds a forward + reverse import adjacency over a source tree by statically parsing
|
|
5
|
+
* import / export-from / require / dynamic-import specifiers (regex — no AST, no execution)
|
|
6
|
+
* and resolving relative + tsconfig-alias specifiers to on-disk files. The PR Validator's
|
|
7
|
+
* framework-aware mapping uses the REVERSE graph to find which route entry files
|
|
8
|
+
* (transitively) import a changed component, so a PR touching `components/Foo.tsx` audits
|
|
9
|
+
* only the routes that render it instead of every route.
|
|
10
|
+
*
|
|
11
|
+
* Deliberately conservative + bounded:
|
|
12
|
+
* - bare package specifiers (react, lodash, next/link) are ignored — they resolve to
|
|
13
|
+
* node_modules, never an app route;
|
|
14
|
+
* - unresolvable specifiers (computed requires, aliases we can't read) are dropped;
|
|
15
|
+
* - stylesheet imports (`import './x.module.css'`) are tracked as LEAF nodes so a changed
|
|
16
|
+
* stylesheet attributes to its importing routes (PR_VALIDATOR C3); they are never parsed;
|
|
17
|
+
* - the walk skips node_modules/.git/build dirs and caps file count + size.
|
|
18
|
+
* The CALLER treats an INCOMPLETE resolution as "ambiguous → audit all routes", so any gap
|
|
19
|
+
* in this graph costs precision, never a missed regression.
|
|
20
|
+
*
|
|
21
|
+
* Pure filesystem + string parsing. No Chrome, no MCP, no network. This module is reachable
|
|
22
|
+
* from the MCP server (via pr-diff-analyzer), so it writes NOTHING to stdout — diagnostics
|
|
23
|
+
* go to stderr through childLogger.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import fs from 'fs';
|
|
27
|
+
import path from 'path';
|
|
28
|
+
import { childLogger } from './logger.js';
|
|
29
|
+
|
|
30
|
+
const logger = childLogger('import-graph');
|
|
31
|
+
|
|
32
|
+
// Source extensions we parse + resolve, in resolution-preference order.
|
|
33
|
+
export const SOURCE_EXTS = ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'];
|
|
34
|
+
const SOURCE_EXT_SET = new Set(SOURCE_EXTS);
|
|
35
|
+
const INDEX_BASENAMES = SOURCE_EXTS.map(e => `index${e}`);
|
|
36
|
+
|
|
37
|
+
// Stylesheet (asset) extensions tracked as LEAF nodes in the graph (PR_VALIDATOR C3): a changed
|
|
38
|
+
// stylesheet must resolve to its importing routes, but a stylesheet imports nothing the route
|
|
39
|
+
// graph cares about, so asset files are never parsed for specifiers. They participate in
|
|
40
|
+
// resolution ONLY with an explicit extension (you write `import './x.module.css'`), never via
|
|
41
|
+
// the extensionless inference used for source modules.
|
|
42
|
+
export const ASSET_EXTS = ['.css', '.scss', '.sass', '.less'];
|
|
43
|
+
const ASSET_EXT_SET = new Set(ASSET_EXTS);
|
|
44
|
+
const GRAPH_EXT_SET = new Set([...SOURCE_EXTS, ...ASSET_EXTS]);
|
|
45
|
+
|
|
46
|
+
// Directories never worth walking for app source.
|
|
47
|
+
const SKIP_DIRS = new Set([
|
|
48
|
+
'node_modules', '.git', '.next', '.nuxt', '.svelte-kit',
|
|
49
|
+
'dist', 'build', 'out', 'coverage', '.turbo', '.cache', '.vercel',
|
|
50
|
+
]);
|
|
51
|
+
|
|
52
|
+
const MAX_FILES = 5000; // bound the walk on a mega-repo
|
|
53
|
+
const MAX_FILE_BYTES = 512 * 1024; // skip giant generated/bundled files
|
|
54
|
+
|
|
55
|
+
// Static specifier extractors. Targeted (not one mega-regex) to limit false matches.
|
|
56
|
+
const SPECIFIER_RES = [
|
|
57
|
+
/\bimport\s+(?:[\w*${},\s]+\s+from\s+)?['"]([^'"]+)['"]/g, // import x from 'y' | import 'y'
|
|
58
|
+
/\bexport\s+(?:[\w*${},\s]+\s+)?from\s+['"]([^'"]+)['"]/g, // export … from 'y'
|
|
59
|
+
/\brequire\(\s*['"]([^'"]+)['"]\s*\)/g, // require('y')
|
|
60
|
+
/\bimport\(\s*['"]([^'"]+)['"]\s*\)/g, // dynamic import('y')
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Extract the raw module specifiers referenced by a source file's text.
|
|
65
|
+
* Static parse only — template-string / computed specifiers are not detected (and are
|
|
66
|
+
* inherently unresolvable, so they fall into the caller's conservative bucket).
|
|
67
|
+
*
|
|
68
|
+
* @param {string} src
|
|
69
|
+
* @returns {string[]} de-duplicated specifier strings
|
|
70
|
+
*/
|
|
71
|
+
export function parseImports(src) {
|
|
72
|
+
if (typeof src !== 'string' || src.length === 0) return [];
|
|
73
|
+
const found = new Set();
|
|
74
|
+
for (const re of SPECIFIER_RES) {
|
|
75
|
+
re.lastIndex = 0;
|
|
76
|
+
for (const m of src.matchAll(re)) {
|
|
77
|
+
if (m[1]) found.add(m[1]);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return [...found];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Resolve a candidate path (without/with extension, or a directory) to a concrete source
|
|
85
|
+
* file on disk, applying extension inference and index-file resolution. Returns null when
|
|
86
|
+
* nothing matches.
|
|
87
|
+
*/
|
|
88
|
+
function resolveFileCandidate(candidate) {
|
|
89
|
+
try {
|
|
90
|
+
// Exact path with a known source OR asset extension (asset = a C3 stylesheet leaf, e.g.
|
|
91
|
+
// an explicit `import './x.module.css'`).
|
|
92
|
+
if (GRAPH_EXT_SET.has(path.extname(candidate)) && isFile(candidate)) return candidate;
|
|
93
|
+
// Extensionless import → try each source extension. (Stylesheets are always imported with
|
|
94
|
+
// their extension, so they never participate in this inference.)
|
|
95
|
+
for (const ext of SOURCE_EXTS) {
|
|
96
|
+
const withExt = candidate + ext;
|
|
97
|
+
if (isFile(withExt)) return withExt;
|
|
98
|
+
}
|
|
99
|
+
// Directory import → index.<ext>.
|
|
100
|
+
for (const idx of INDEX_BASENAMES) {
|
|
101
|
+
const indexed = path.join(candidate, idx);
|
|
102
|
+
if (isFile(indexed)) return indexed;
|
|
103
|
+
}
|
|
104
|
+
} catch { /* fall through to null */ }
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function isFile(p) {
|
|
109
|
+
try { return fs.statSync(p).isFile(); } catch { return false; }
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Read tsconfig.json / jsconfig.json `compilerOptions.paths` aliases (the common `@/*`
|
|
114
|
+
* style). Lenient: strips // and /* */ comments + trailing commas before parsing, and
|
|
115
|
+
* returns [] on any read/parse error so a missing or exotic config never throws.
|
|
116
|
+
*
|
|
117
|
+
* @param {string} rootDir
|
|
118
|
+
* @returns {Array<{ prefix: string, targets: string[] }>} prefix→absolute-dir aliases
|
|
119
|
+
*/
|
|
120
|
+
export function loadAliases(rootDir) {
|
|
121
|
+
for (const name of ['tsconfig.json', 'jsconfig.json']) {
|
|
122
|
+
const file = path.join(rootDir, name);
|
|
123
|
+
if (!isFile(file)) continue;
|
|
124
|
+
try {
|
|
125
|
+
const raw = fs.readFileSync(file, 'utf8');
|
|
126
|
+
const cleaned = raw
|
|
127
|
+
.replace(/\/\*[\s\S]*?\*\//g, '') // block comments
|
|
128
|
+
.replace(/(^|[^:])\/\/.*$/gm, '$1') // line comments (not URLs after ':')
|
|
129
|
+
.replace(/,(\s*[}\]])/g, '$1'); // trailing commas
|
|
130
|
+
const cfg = JSON.parse(cleaned);
|
|
131
|
+
const co = cfg.compilerOptions ?? {};
|
|
132
|
+
const baseUrl = path.resolve(rootDir, co.baseUrl ?? '.');
|
|
133
|
+
const paths = co.paths ?? {};
|
|
134
|
+
const aliases = [];
|
|
135
|
+
for (const [key, targets] of Object.entries(paths)) {
|
|
136
|
+
if (!Array.isArray(targets) || targets.length === 0) continue;
|
|
137
|
+
const prefix = key.replace(/\*$/, ''); // "@/*" → "@/"
|
|
138
|
+
const absTargets = targets.map(t =>
|
|
139
|
+
path.resolve(baseUrl, String(t).replace(/\*$/, '')), // "src/*" → "<base>/src/"
|
|
140
|
+
);
|
|
141
|
+
aliases.push({ prefix, targets: absTargets });
|
|
142
|
+
}
|
|
143
|
+
// Longer prefixes first so "@/components/" wins over "@/".
|
|
144
|
+
aliases.sort((a, b) => b.prefix.length - a.prefix.length);
|
|
145
|
+
return aliases;
|
|
146
|
+
} catch (err) {
|
|
147
|
+
logger.debug(`[ARGUS] C1: could not parse ${name} for path aliases — ${err.message}`);
|
|
148
|
+
return [];
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return [];
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Resolve a single import specifier from a source file to an absolute on-disk source file,
|
|
156
|
+
* or null when it is a bare package import / unresolvable.
|
|
157
|
+
*
|
|
158
|
+
* @param {string} spec the import specifier (e.g. "../components/Foo", "@/lib/api")
|
|
159
|
+
* @param {string} fromFileAbs absolute path of the importing file
|
|
160
|
+
* @param {Array<{prefix:string,targets:string[]}>} aliases from loadAliases()
|
|
161
|
+
* @returns {string|null}
|
|
162
|
+
*/
|
|
163
|
+
export function resolveSpecifier(spec, fromFileAbs, aliases = []) {
|
|
164
|
+
if (typeof spec !== 'string' || spec.length === 0) return null;
|
|
165
|
+
|
|
166
|
+
// Relative import.
|
|
167
|
+
if (spec.startsWith('./') || spec.startsWith('../') || spec === '.' || spec === '..') {
|
|
168
|
+
return resolveFileCandidate(path.resolve(path.dirname(fromFileAbs), spec));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// tsconfig path alias.
|
|
172
|
+
for (const { prefix, targets } of aliases) {
|
|
173
|
+
if (prefix && spec.startsWith(prefix)) {
|
|
174
|
+
const rest = spec.slice(prefix.length);
|
|
175
|
+
for (const target of targets) {
|
|
176
|
+
const hit = resolveFileCandidate(path.join(target, rest));
|
|
177
|
+
if (hit) return hit;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Bare package specifier (react, next/link, lodash/get, …) → not an app file.
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Recursively collect graph files under dir — source modules plus C3 stylesheet leaves
|
|
188
|
+
* (bounded; skips SKIP_DIRS + symlinks).
|
|
189
|
+
*/
|
|
190
|
+
function collectSourceFiles(dir) {
|
|
191
|
+
const out = [];
|
|
192
|
+
const stack = [dir];
|
|
193
|
+
while (stack.length > 0 && out.length < MAX_FILES) {
|
|
194
|
+
const cur = stack.pop();
|
|
195
|
+
let entries;
|
|
196
|
+
try { entries = fs.readdirSync(cur, { withFileTypes: true }); } catch { continue; }
|
|
197
|
+
for (const entry of entries) {
|
|
198
|
+
if (entry.isSymbolicLink()) continue; // avoid symlink cycles
|
|
199
|
+
const full = path.join(cur, entry.name);
|
|
200
|
+
if (entry.isDirectory()) {
|
|
201
|
+
if (!SKIP_DIRS.has(entry.name)) stack.push(full);
|
|
202
|
+
} else if (GRAPH_EXT_SET.has(path.extname(entry.name))) {
|
|
203
|
+
out.push(full);
|
|
204
|
+
if (out.length >= MAX_FILES) break;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return out;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Build the import graph for a source tree.
|
|
213
|
+
*
|
|
214
|
+
* @param {string} rootDir
|
|
215
|
+
* @returns {{ files: Set<string>,
|
|
216
|
+
* forward: Map<string, Set<string>>,
|
|
217
|
+
* reverse: Map<string, Set<string>>,
|
|
218
|
+
* truncated: boolean }}
|
|
219
|
+
* `forward[a]` = files a imports; `reverse[b]` = files that import b. Both keyed by
|
|
220
|
+
* absolute path. `truncated` is true when the MAX_FILES cap was hit (the graph may be
|
|
221
|
+
* incomplete → the caller must fall back conservatively).
|
|
222
|
+
*/
|
|
223
|
+
export function buildImportGraph(rootDir) {
|
|
224
|
+
const files = new Set();
|
|
225
|
+
const forward = new Map();
|
|
226
|
+
const reverse = new Map();
|
|
227
|
+
const empty = { files, forward, reverse, truncated: false };
|
|
228
|
+
|
|
229
|
+
if (typeof rootDir !== 'string' || !isDir(rootDir)) return empty;
|
|
230
|
+
|
|
231
|
+
const sourceFiles = collectSourceFiles(rootDir);
|
|
232
|
+
const truncated = sourceFiles.length >= MAX_FILES;
|
|
233
|
+
const fileSet = new Set(sourceFiles);
|
|
234
|
+
const aliases = loadAliases(rootDir);
|
|
235
|
+
|
|
236
|
+
for (const file of sourceFiles) {
|
|
237
|
+
files.add(file);
|
|
238
|
+
if (!forward.has(file)) forward.set(file, new Set());
|
|
239
|
+
|
|
240
|
+
// C3: stylesheet leaves are graph NODES (so a changed stylesheet resolves + carries reverse
|
|
241
|
+
// edges from its importers) but import nothing the route graph tracks — never parse them.
|
|
242
|
+
if (ASSET_EXT_SET.has(path.extname(file))) continue;
|
|
243
|
+
|
|
244
|
+
let src;
|
|
245
|
+
try {
|
|
246
|
+
if (fs.statSync(file).size > MAX_FILE_BYTES) continue; // skip giant files
|
|
247
|
+
src = fs.readFileSync(file, 'utf8');
|
|
248
|
+
} catch { continue; }
|
|
249
|
+
|
|
250
|
+
for (const spec of parseImports(src)) {
|
|
251
|
+
const target = resolveSpecifier(spec, file, aliases);
|
|
252
|
+
if (!target || !fileSet.has(target)) continue; // bare pkg or outside the tree
|
|
253
|
+
forward.get(file).add(target);
|
|
254
|
+
if (!reverse.has(target)) reverse.set(target, new Set());
|
|
255
|
+
reverse.get(target).add(file);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return { files, forward, reverse, truncated };
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function isDir(p) {
|
|
263
|
+
try { return fs.statSync(p).isDirectory(); } catch { return false; }
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Transitive reverse closure: every file that (directly or indirectly) imports any seed.
|
|
268
|
+
* Seeds themselves are NOT included unless they are reached via another importer.
|
|
269
|
+
*
|
|
270
|
+
* @param {Map<string, Set<string>>} reverse reverse adjacency from buildImportGraph
|
|
271
|
+
* @param {Iterable<string>} seeds absolute paths of changed files
|
|
272
|
+
* @returns {Set<string>} absolute paths of importing files
|
|
273
|
+
*/
|
|
274
|
+
export function findDependents(reverse, seeds) {
|
|
275
|
+
const dependents = new Set();
|
|
276
|
+
const stack = [...seeds];
|
|
277
|
+
const visited = new Set(stack);
|
|
278
|
+
while (stack.length > 0) {
|
|
279
|
+
const cur = stack.pop();
|
|
280
|
+
const importers = reverse.get(cur);
|
|
281
|
+
if (!importers) continue;
|
|
282
|
+
for (const imp of importers) {
|
|
283
|
+
if (!dependents.has(imp)) {
|
|
284
|
+
dependents.add(imp);
|
|
285
|
+
if (!visited.has(imp)) { visited.add(imp); stack.push(imp); }
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return dependents;
|
|
290
|
+
}
|
|
@@ -31,7 +31,10 @@ const CLASSIFIERS = [
|
|
|
31
31
|
{
|
|
32
32
|
type: 'cors_violation',
|
|
33
33
|
issueTypePattern: /cors/i,
|
|
34
|
-
|
|
34
|
+
// Live Chrome 149 surfaces CorsIssue in the panel as e.g.
|
|
35
|
+
// "Ensure CORS response header values are valid" — the older phrase-specific
|
|
36
|
+
// patterns missed it, so the \bcors\b word-match anchors any CORS issue title.
|
|
37
|
+
textPattern: /cors policy|cross.origin.*blocked|access.control.allow.origin|\bcors\b/i,
|
|
35
38
|
severity: (isCritical) => isCritical ? 'critical' : 'warning',
|
|
36
39
|
},
|
|
37
40
|
{
|
|
@@ -49,7 +52,10 @@ const CLASSIFIERS = [
|
|
|
49
52
|
{
|
|
50
53
|
type: 'cookie_attribute_missing',
|
|
51
54
|
issueTypePattern: /cookie/i,
|
|
52
|
-
|
|
55
|
+
// Live Chrome 149 surfaces the SameSite=None-without-Secure cookie Issue as
|
|
56
|
+
// "Mark cross-site cookies as Secure to allow setting them in cross-site contexts"
|
|
57
|
+
// — neither "samesite" nor "secure attribute" appear, so match those phrasings too.
|
|
58
|
+
textPattern: /samesite|secure attribute|partitioned|cookie.*rejected|set-cookie.*blocked|cross-site cookies?|cookies? as secure/i,
|
|
53
59
|
severity: () => 'warning',
|
|
54
60
|
},
|
|
55
61
|
{
|
|
@@ -5,12 +5,49 @@
|
|
|
5
5
|
* checkLighthouse directly without pulling in the Slack-initialised orchestrator.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
import fs from 'node:fs';
|
|
8
9
|
import { registerExpensive } from '../registry.js';
|
|
9
10
|
import { thresholds } from '../config/targets.js';
|
|
10
11
|
import { childLogger } from './logger.js';
|
|
11
12
|
|
|
12
13
|
const logger = childLogger('lighthouse-checker');
|
|
13
14
|
|
|
15
|
+
/**
|
|
16
|
+
* Parse a chrome-devtools-mcp `lighthouse_audit` response into the Lighthouse
|
|
17
|
+
* result shape this module consumes: `{ categories, audits }` (category scores 0–1,
|
|
18
|
+
* `audits` keyed by id). The tool returns markdown with a "### Reports" section that
|
|
19
|
+
* points at a full `report.json`; we read that for complete category scores +
|
|
20
|
+
* per-audit detail (`auditRefs`, `title`, `description`). If the file is unavailable
|
|
21
|
+
* we fall back to the markdown "### Category Scores" block (scores only, no audits).
|
|
22
|
+
* Returns `{ categories: {}, audits: {} }` when nothing parses — never throws.
|
|
23
|
+
*
|
|
24
|
+
* @param {string} responseText - raw lighthouse_audit response (markdown text)
|
|
25
|
+
* @returns {{ categories: object, audits: object }}
|
|
26
|
+
*/
|
|
27
|
+
export function parseLighthouseReport(responseText) {
|
|
28
|
+
const text = String(responseText ?? '');
|
|
29
|
+
// Prefer the authoritative report.json (categories + auditRefs + per-audit detail).
|
|
30
|
+
const pathMatch = text.match(/([A-Za-z]:\\[^\r\n]*?report\.json|\/[^\r\n]*?report\.json)/);
|
|
31
|
+
if (pathMatch) {
|
|
32
|
+
try {
|
|
33
|
+
const json = JSON.parse(fs.readFileSync(pathMatch[1].trim(), 'utf8'));
|
|
34
|
+
if (json && typeof json === 'object' && json.categories) {
|
|
35
|
+
return { categories: json.categories, audits: json.audits ?? {} };
|
|
36
|
+
}
|
|
37
|
+
} catch { /* fall through to the markdown scores */ }
|
|
38
|
+
}
|
|
39
|
+
// Fallback: synthesize categories from the "### Category Scores" markdown block,
|
|
40
|
+
// e.g. "- Accessibility: 96 (accessibility)". Scores normalised to 0–1 to match report.json.
|
|
41
|
+
const categories = {};
|
|
42
|
+
const block = text.match(/### Category Scores\s*\n([\s\S]*?)(?:\n###|\s*$)/);
|
|
43
|
+
if (block) {
|
|
44
|
+
for (const m of block[1].matchAll(/^\s*-\s+.+?:\s*([\d.]+)\s*\(([\w-]+)\)\s*$/gm)) {
|
|
45
|
+
categories[m[2]] = { id: m[2], score: Number(m[1]) / 100 };
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return { categories, audits: {} };
|
|
49
|
+
}
|
|
50
|
+
|
|
14
51
|
const LIGHTHOUSE_LABELS = {
|
|
15
52
|
accessibility: 'Accessibility',
|
|
16
53
|
performance: 'Performance',
|
|
@@ -39,13 +76,16 @@ export async function checkLighthouse(browser, url) {
|
|
|
39
76
|
const LIGHTHOUSE_TIMEOUT_MS = parseInt(process.env.ARGUS_LIGHTHOUSE_TIMEOUT ?? '120000', 10);
|
|
40
77
|
|
|
41
78
|
try {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
}
|
|
79
|
+
// browser.lighthouse navigates to url + audits the current page. lighthouse_audit
|
|
80
|
+
// returns markdown referencing a full report.json — parseLighthouseReport reads that
|
|
81
|
+
// back into the { categories, audits } shape this function consumes. Performance is
|
|
82
|
+
// excluded by the tool (covered by web-vitals); thresholds.lighthouse.performance is
|
|
83
|
+
// simply skipped below when its category is absent.
|
|
84
|
+
const auditPromise = browser.lighthouse(url);
|
|
45
85
|
const timeoutPromise = new Promise((_, reject) =>
|
|
46
86
|
setTimeout(() => reject(new Error(`Lighthouse timed out after ${LIGHTHOUSE_TIMEOUT_MS / 1000}s`)), LIGHTHOUSE_TIMEOUT_MS)
|
|
47
87
|
);
|
|
48
|
-
const result = await Promise.race([auditPromise, timeoutPromise]);
|
|
88
|
+
const result = parseLighthouseReport(await Promise.race([auditPromise, timeoutPromise]));
|
|
49
89
|
|
|
50
90
|
const categories = result?.categories ?? {};
|
|
51
91
|
const audits = result?.audits ?? {};
|
|
@@ -26,3 +26,205 @@ export function chunkArray(arr, n) {
|
|
|
26
26
|
for (let i = 0; i < arr.length; i += size) chunks.push(arr.slice(i, i + size));
|
|
27
27
|
return chunks;
|
|
28
28
|
}
|
|
29
|
+
|
|
30
|
+
// NOTE: this module is imported by mcp-server.js (via the PR-validate path) and by the
|
|
31
|
+
// orchestrator. It MUST stay stdout-clean (no console.log) — stdout is reserved for JSON-RPC.
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Map `worker` over `items` with bounded concurrency, returning results in INPUT order
|
|
35
|
+
* (results[i] ⟷ items[i]) regardless of completion order — so the output is identical to a
|
|
36
|
+
* sequential `for…of` map. This is the safety property the PR Validator relies on: the
|
|
37
|
+
* aggregate findings and the merge-block decision must not depend on how routes interleave.
|
|
38
|
+
*
|
|
39
|
+
* Spawns exactly `min(concurrency, items.length)` persistent lanes; each lane repeatedly pulls
|
|
40
|
+
* the next item from a shared cursor (the read-then-increment `cursor++` is atomic on the
|
|
41
|
+
* single-threaded event loop — there is no await between the read and the bump, so two lanes can
|
|
42
|
+
* never claim the same index). The lane index (0…lanes-1) is passed to `worker` so a caller can
|
|
43
|
+
* pin a per-lane resource — e.g. one Chrome client per lane — and be sure it is never used by two
|
|
44
|
+
* items at once.
|
|
45
|
+
*
|
|
46
|
+
* Errors: if a `worker` call rejects, its result slot is left `undefined`, the OTHER in-flight
|
|
47
|
+
* items still drain to completion, and the FIRST rejection is re-thrown after every lane finishes
|
|
48
|
+
* (fail-loud — a parallel error is never silently dropped). Callers that need per-item error
|
|
49
|
+
* handling (e.g. recording a per-route audit failure) should catch inside `worker` and return an
|
|
50
|
+
* error marker instead of throwing.
|
|
51
|
+
*
|
|
52
|
+
* @template T, R
|
|
53
|
+
* @param {T[]} items
|
|
54
|
+
* @param {number} concurrency max parallel workers; effective lanes = min(concurrency, items.length)
|
|
55
|
+
* @param {(item: T, index: number, lane: number) => Promise<R>} worker
|
|
56
|
+
* @returns {Promise<R[]>} results in input order
|
|
57
|
+
*/
|
|
58
|
+
export async function mapWithConcurrency(items, concurrency, worker) {
|
|
59
|
+
if (!Array.isArray(items)) throw new TypeError('mapWithConcurrency: items must be an array');
|
|
60
|
+
if (typeof worker !== 'function') throw new TypeError('mapWithConcurrency: worker must be a function');
|
|
61
|
+
if (!Number.isInteger(concurrency) || concurrency <= 0) {
|
|
62
|
+
throw new RangeError('mapWithConcurrency: concurrency must be a positive integer');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const results = new Array(items.length);
|
|
66
|
+
if (items.length === 0) return results;
|
|
67
|
+
|
|
68
|
+
const lanes = Math.min(concurrency, items.length);
|
|
69
|
+
let cursor = 0;
|
|
70
|
+
let firstError = null;
|
|
71
|
+
|
|
72
|
+
async function runLane(lane) {
|
|
73
|
+
for (let index = cursor++; index < items.length; index = cursor++) {
|
|
74
|
+
try {
|
|
75
|
+
results[index] = await worker(items[index], index, lane);
|
|
76
|
+
} catch (err) {
|
|
77
|
+
if (firstError === null) firstError = err;
|
|
78
|
+
// keep draining the remaining items so siblings finish; this slot stays undefined
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
await Promise.all(Array.from({ length: lanes }, (_unused, lane) => runLane(lane)));
|
|
84
|
+
if (firstError !== null) throw firstError;
|
|
85
|
+
return results;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Audit `routes` with bounded concurrency, giving each lane its OWN client, and return the
|
|
90
|
+
* per-route results in INPUT (route) order — identical to a sequential crawl.
|
|
91
|
+
*
|
|
92
|
+
* `crawlRouteCheap` mutates page-navigation state, so two concurrent crawls must never share a
|
|
93
|
+
* Chrome connection. This wrapper allocates one client per lane: lane 0 reuses `primaryClient`
|
|
94
|
+
* (the already-open connection); lanes 1…n-1 each get a fresh client from `createClient()`.
|
|
95
|
+
* Because a lane processes one route at a time, a given client is never used by two routes at
|
|
96
|
+
* once. The extra clients are always closed in a `finally` (via `closeClient`, default
|
|
97
|
+
* `client.close()`); `primaryClient` is owned by the caller and is NOT closed here.
|
|
98
|
+
*
|
|
99
|
+
* `crawlRoute` should handle its own per-route errors (catch + return a marker) so one route's
|
|
100
|
+
* failure does not abort its siblings and the all-routes-failed guard still sees every route.
|
|
101
|
+
*
|
|
102
|
+
* @param {Array} routes [] returns [] with no clients created
|
|
103
|
+
* @param {object} opts
|
|
104
|
+
* @param {number} opts.concurrency desired max parallel clients; lanes = min(concurrency, routes.length)
|
|
105
|
+
* @param {*} opts.primaryClient the already-open client used by lane 0
|
|
106
|
+
* @param {() => Promise<*>} opts.createClient factory for lanes 1…n-1 (only called when lanes > 1)
|
|
107
|
+
* @param {(route: any, client: any, meta: { index: number, lane: number }) => Promise<any>} opts.crawlRoute
|
|
108
|
+
* @param {(client: any) => any} [opts.closeClient] teardown for the extra clients (default: client.close())
|
|
109
|
+
* @returns {Promise<Array>} results in route order
|
|
110
|
+
*/
|
|
111
|
+
export async function auditRoutesConcurrently(routes, { concurrency, primaryClient, createClient, crawlRoute, closeClient } = {}) {
|
|
112
|
+
if (!Array.isArray(routes)) throw new TypeError('auditRoutesConcurrently: routes must be an array');
|
|
113
|
+
if (typeof crawlRoute !== 'function') throw new TypeError('auditRoutesConcurrently: crawlRoute must be a function');
|
|
114
|
+
if (routes.length === 0) return [];
|
|
115
|
+
|
|
116
|
+
const want = Number.isInteger(concurrency) && concurrency > 0 ? concurrency : 1;
|
|
117
|
+
const lanes = Math.min(want, routes.length);
|
|
118
|
+
const extraClients = [];
|
|
119
|
+
const close = typeof closeClient === 'function' ? closeClient : (c) => c?.close?.();
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
for (let i = 1; i < lanes; i++) {
|
|
123
|
+
if (typeof createClient !== 'function') {
|
|
124
|
+
throw new TypeError('auditRoutesConcurrently: createClient is required when concurrency > 1');
|
|
125
|
+
}
|
|
126
|
+
extraClients.push(await createClient());
|
|
127
|
+
}
|
|
128
|
+
const clients = [primaryClient, ...extraClients];
|
|
129
|
+
return await mapWithConcurrency(
|
|
130
|
+
routes, lanes,
|
|
131
|
+
(route, index, lane) => crawlRoute(route, clients[lane], { index, lane }),
|
|
132
|
+
);
|
|
133
|
+
} finally {
|
|
134
|
+
for (const client of extraClients) {
|
|
135
|
+
try { await close(client); } catch { /* ignore teardown errors */ }
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ── D4 — per-route timeout / retry ────────────────────────────────────────────
|
|
141
|
+
// A hung or flaky route audit must surface as a REJECTION, never a silent zero-findings
|
|
142
|
+
// resolution — the caller records the rejection as a route ERROR, which feeds the
|
|
143
|
+
// all-routes-failed guard (src/cli/pr-validate.js allRoutesFailed). These helpers therefore
|
|
144
|
+
// never resolve on timeout; a timed-out audit can never become a false PASS.
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Race `work` against a timeout. If `ms` elapses before the work settles, the returned promise
|
|
148
|
+
* REJECTS with a timeout Error — it NEVER resolves on timeout. This is the load-bearing safety
|
|
149
|
+
* property of the PR Validator's per-route audit: a hung audit must surface as a rejection
|
|
150
|
+
* (→ recorded as a route ERROR → fed to the all-routes-failed guard), never as a silently
|
|
151
|
+
* passing zero-findings route (a false PASS).
|
|
152
|
+
*
|
|
153
|
+
* A non-finite or non-positive `ms` disables the bound (the work is awaited as-is). The timer is
|
|
154
|
+
* cleared once the work settles so it never keeps the event loop alive; the underlying work is
|
|
155
|
+
* NOT cancelled (there is no abort channel through CDP) — the timer only stops US from waiting.
|
|
156
|
+
*
|
|
157
|
+
* @template R
|
|
158
|
+
* @param {Promise<R> | (() => Promise<R>)} work a promise, or a thunk returning one (a thunk's
|
|
159
|
+
* synchronous throw is converted to a rejection)
|
|
160
|
+
* @param {number} ms timeout in milliseconds (<=0 / non-finite → unbounded)
|
|
161
|
+
* @param {string} [label] human-readable label used in the timeout message
|
|
162
|
+
* @returns {Promise<R>}
|
|
163
|
+
*/
|
|
164
|
+
export function withTimeout(work, ms, label = 'operation') {
|
|
165
|
+
// Wrapping a thunk in an async IIFE turns a synchronous throw into a rejection.
|
|
166
|
+
const promise = typeof work === 'function' ? (async () => work())() : Promise.resolve(work);
|
|
167
|
+
if (!Number.isFinite(ms) || ms <= 0) return promise;
|
|
168
|
+
let timer;
|
|
169
|
+
const timeout = new Promise((_resolve, reject) => {
|
|
170
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms);
|
|
171
|
+
});
|
|
172
|
+
return Promise.race([promise, timeout]).finally(() => clearTimeout(timer));
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Run a single route audit with a per-attempt timeout and bounded retries. Returns the audit
|
|
177
|
+
* result on the first success; THROWS (fail-loud) if every attempt times out or errors, so the
|
|
178
|
+
* caller's per-route catch records a route ERROR (never a false PASS). With the defaults
|
|
179
|
+
* (1 attempt, unbounded) this is byte-identical to calling `auditFn()` directly.
|
|
180
|
+
*
|
|
181
|
+
* Retries are immediate (no backoff): a route audit re-navigates from a clean state, so a fixed
|
|
182
|
+
* inter-attempt delay would only add wall-clock without changing a deterministic failure. This is
|
|
183
|
+
* deliberately distinct from withRetry() (retry.js), which back-off-retries idempotent CDP ops;
|
|
184
|
+
* a route audit needs a per-ATTEMPT timeout plus a route-scoped retry count.
|
|
185
|
+
*
|
|
186
|
+
* @template R
|
|
187
|
+
* @param {() => Promise<R>} auditFn the per-route audit, e.g. () => crawlRouteWithDepth(route, …)
|
|
188
|
+
* @param {object} opts
|
|
189
|
+
* @param {number} [opts.timeoutMs=0] per-attempt timeout (<=0 / non-finite → unbounded)
|
|
190
|
+
* @param {number} [opts.retries=0] additional attempts after the first (total = retries + 1)
|
|
191
|
+
* @param {string} [opts.label] label used in the timeout message
|
|
192
|
+
* @param {(attempt: number, err: Error) => void} [opts.onRetry] invoked before each retry
|
|
193
|
+
* @returns {Promise<R>}
|
|
194
|
+
*/
|
|
195
|
+
export async function auditRouteWithRetry(auditFn, { timeoutMs = 0, retries = 0, label = 'route audit', onRetry } = {}) {
|
|
196
|
+
if (typeof auditFn !== 'function') throw new TypeError('auditRouteWithRetry: auditFn must be a function');
|
|
197
|
+
const attempts = Math.max(1, Math.floor(Number.isFinite(retries) ? retries : 0) + 1);
|
|
198
|
+
let lastErr;
|
|
199
|
+
for (let attempt = 1; attempt <= attempts; attempt++) {
|
|
200
|
+
try {
|
|
201
|
+
return await withTimeout(auditFn, timeoutMs, label);
|
|
202
|
+
} catch (err) {
|
|
203
|
+
lastErr = err;
|
|
204
|
+
if (attempt < attempts && typeof onRetry === 'function') {
|
|
205
|
+
try { onRetry(attempt, err); } catch { /* a logging callback must never break the retry loop */ }
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
throw lastErr;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Resolve the per-route audit timeout + retry policy from the environment. Shared by BOTH
|
|
214
|
+
* PR-validate paths (CLI + the MCP tool) so they cannot diverge on the bound.
|
|
215
|
+
* ARGUS_ROUTE_TIMEOUT_MS — per-route audit timeout (default 120000 ms; explicit 0 / negative →
|
|
216
|
+
* unbounded; unset or non-numeric → the 120000 default, the safe bounded direction).
|
|
217
|
+
* ARGUS_ROUTE_RETRIES — extra attempts on a failed/timed-out audit (default 0; clamped 0–5).
|
|
218
|
+
* A timed-out audit is recorded as a route ERROR and feeds the all-routes-failed guard — bounding
|
|
219
|
+
* a route can only BLOCK (the conservative direction), never produce a false PASS.
|
|
220
|
+
*
|
|
221
|
+
* @param {Record<string, string|undefined>} [env=process.env]
|
|
222
|
+
* @returns {{ timeoutMs: number, retries: number }}
|
|
223
|
+
*/
|
|
224
|
+
export function routeResilienceFromEnv(env = process.env) {
|
|
225
|
+
const rawTimeout = parseInt(env.ARGUS_ROUTE_TIMEOUT_MS, 10);
|
|
226
|
+
const timeoutMs = Number.isNaN(rawTimeout) ? 120000 : (rawTimeout > 0 ? rawTimeout : 0);
|
|
227
|
+
const rawRetries = parseInt(env.ARGUS_ROUTE_RETRIES, 10);
|
|
228
|
+
const retries = Number.isNaN(rawRetries) ? 0 : Math.min(5, Math.max(0, rawRetries));
|
|
229
|
+
return { timeoutMs, retries };
|
|
230
|
+
}
|