argusqa-os 9.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +8 -0
- package/LICENSE +21 -0
- package/README.md +879 -0
- package/package.json +69 -0
- package/src/adapters/browser.js +82 -0
- package/src/argus.js +8 -0
- package/src/batch-runner.js +8 -0
- package/src/cli/init.js +314 -0
- package/src/config/schema.js +108 -0
- package/src/config/targets.js +309 -0
- package/src/domain/finding.js +25 -0
- package/src/mcp-server.js +156 -0
- package/src/orchestration/crawl-and-report.js +16 -0
- package/src/orchestration/dispatcher.js +263 -0
- package/src/orchestration/env-comparison.js +498 -0
- package/src/orchestration/orchestrator.js +1128 -0
- package/src/orchestration/report-processor.js +134 -0
- package/src/orchestration/slack-notifier.js +337 -0
- package/src/orchestration/watch-mode.js +316 -0
- package/src/registry.js +18 -0
- package/src/server/index.js +94 -0
- package/src/server/interaction-handler.js +126 -0
- package/src/server/slash-command-handler.js +185 -0
- package/src/utils/api-frequency.js +128 -0
- package/src/utils/baseline-manager.js +255 -0
- package/src/utils/codebase-analyzer.js +299 -0
- package/src/utils/content-analyzer.js +155 -0
- package/src/utils/contract-validator.js +178 -0
- package/src/utils/css-analyzer.js +407 -0
- package/src/utils/diff.js +189 -0
- package/src/utils/flakiness-detector.js +82 -0
- package/src/utils/flow-runner.js +572 -0
- package/src/utils/github-reporter.js +310 -0
- package/src/utils/hover-analyzer.js +214 -0
- package/src/utils/html-reporter.js +301 -0
- package/src/utils/issues-analyzer.js +171 -0
- package/src/utils/keyboard-analyzer.js +141 -0
- package/src/utils/lighthouse-checker.js +120 -0
- package/src/utils/logger.js +39 -0
- package/src/utils/login-orchestrator.js +99 -0
- package/src/utils/mcp-client.js +264 -0
- package/src/utils/mcp-parsers.js +57 -0
- package/src/utils/memory-analyzer.js +270 -0
- package/src/utils/network-timing-analyzer.js +76 -0
- package/src/utils/parallel-crawler.js +28 -0
- package/src/utils/responsive-analyzer.js +253 -0
- package/src/utils/retry.js +36 -0
- package/src/utils/route-discoverer.js +306 -0
- package/src/utils/security-analyzer.js +302 -0
- package/src/utils/seo-analyzer.js +164 -0
- package/src/utils/session-manager.js +12 -0
- package/src/utils/session-persistence.js +214 -0
- package/src/utils/severity-overrides.js +91 -0
- package/src/utils/slack-guard.js +18 -0
- package/src/utils/slug.js +8 -0
- package/src/utils/snapshot-analyzer.js +330 -0
- package/src/utils/telemetry.js +190 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ARGUS Phase C1: Codebase Cross-Reference Analysis
|
|
3
|
+
*
|
|
4
|
+
* Reads target app source files to surface issues that browser-only testing misses:
|
|
5
|
+
* C1.1 env_var_missing — process.env.X used in code but absent from all .env files
|
|
6
|
+
* C1.2 feature_flag_leakage — env var used in a conditional that is falsy/unset in .env
|
|
7
|
+
* C1.3 error_source_linked — console error stack trace parsed to file:line (info — enrichment)
|
|
8
|
+
* C1.4 dead_route — internal navigation link that returns HTTP 404
|
|
9
|
+
*
|
|
10
|
+
* All functions are pure (no MCP dependency) except detectDeadRoutes which does
|
|
11
|
+
* Node.js fetch() calls — it still requires no browser.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import fs from 'fs';
|
|
15
|
+
import path from 'path';
|
|
16
|
+
import { childLogger } from './logger.js';
|
|
17
|
+
|
|
18
|
+
const logger = childLogger('codebase-analyzer');
|
|
19
|
+
|
|
20
|
+
// ── File scanning ──────────────────────────────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
const SOURCE_EXTENSIONS = new Set(['.js', '.mjs', '.cjs', '.ts', '.jsx', '.tsx', '.vue', '.svelte']);
|
|
23
|
+
|
|
24
|
+
// Node/OS built-in env names to skip in all checks
|
|
25
|
+
const BUILTIN_VARS = new Set([
|
|
26
|
+
'NODE_ENV', 'PORT', 'HOST', 'PATH', 'HOME', 'USER', 'SHELL', 'PWD',
|
|
27
|
+
'LANG', 'TZ', 'TERM', 'TMPDIR', 'TEMP', 'TMP', 'LOGNAME', 'UID',
|
|
28
|
+
'COLORTERM', 'npm_package_version', 'npm_lifecycle_event',
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
function collectSourceFiles(sourceDir) {
|
|
32
|
+
const files = [];
|
|
33
|
+
function walk(dir) {
|
|
34
|
+
let entries;
|
|
35
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
|
|
36
|
+
for (const e of entries) {
|
|
37
|
+
if (e.name.startsWith('.') || e.name === 'node_modules' || e.name === 'dist' || e.name === 'build' || e.name === '.next') continue;
|
|
38
|
+
if (e.isSymbolicLink()) continue; // avoid symlink cycles
|
|
39
|
+
const full = path.join(dir, e.name);
|
|
40
|
+
if (e.isDirectory()) { walk(full); }
|
|
41
|
+
else if (SOURCE_EXTENSIONS.has(path.extname(e.name))) {
|
|
42
|
+
try {
|
|
43
|
+
const stat = fs.statSync(full);
|
|
44
|
+
if (stat.size > 1_000_000) continue; // skip files > 1MB (minified bundles, etc.)
|
|
45
|
+
files.push({ filePath: full, content: fs.readFileSync(full, 'utf8') });
|
|
46
|
+
} catch {}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
walk(sourceDir);
|
|
51
|
+
return files;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function parseEnvFile(envFilePath) {
|
|
55
|
+
const vars = {};
|
|
56
|
+
let content;
|
|
57
|
+
try { content = fs.readFileSync(envFilePath, 'utf8'); } catch { return vars; }
|
|
58
|
+
for (const raw of content.split('\n')) {
|
|
59
|
+
const line = raw.trim();
|
|
60
|
+
if (!line || line.startsWith('#')) continue;
|
|
61
|
+
const eq = line.indexOf('=');
|
|
62
|
+
if (eq < 1) continue;
|
|
63
|
+
const key = line.slice(0, eq).trim();
|
|
64
|
+
const val = line.slice(eq + 1).trim().replace(/^["']|["']$/g, '');
|
|
65
|
+
if (key) vars[key] = val;
|
|
66
|
+
}
|
|
67
|
+
return vars;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function loadDeclaredVars(sourceDir, envFile) {
|
|
71
|
+
const declared = {};
|
|
72
|
+
const candidates = [
|
|
73
|
+
envFile,
|
|
74
|
+
envFile ? null : path.join(sourceDir, '.env'),
|
|
75
|
+
path.join(sourceDir, '.env.local'),
|
|
76
|
+
path.join(sourceDir, '.env.example'),
|
|
77
|
+
path.join(sourceDir, '.env.development'),
|
|
78
|
+
path.join(sourceDir, '.env.production'),
|
|
79
|
+
].filter(Boolean);
|
|
80
|
+
|
|
81
|
+
for (const ef of candidates) Object.assign(declared, parseEnvFile(ef));
|
|
82
|
+
|
|
83
|
+
// Runtime env (the process running Argus) counts too — it may have vars set in CI
|
|
84
|
+
for (const [k, v] of Object.entries(process.env)) {
|
|
85
|
+
if (v !== undefined) declared[k] = v;
|
|
86
|
+
}
|
|
87
|
+
return declared;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// ── C1.1: Env variable audit ───────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
const ENV_REF_RE = /\bprocess\.env\.([A-Z_][A-Z0-9_]*)\b/g;
|
|
93
|
+
|
|
94
|
+
export function auditEnvVariables(sourceDir, envFile) {
|
|
95
|
+
if (!sourceDir) return [];
|
|
96
|
+
const files = collectSourceFiles(sourceDir);
|
|
97
|
+
const declared = loadDeclaredVars(sourceDir, envFile);
|
|
98
|
+
|
|
99
|
+
// Collect all refs: varName → [relPath, ...]
|
|
100
|
+
const refs = {};
|
|
101
|
+
for (const { filePath, content } of files) {
|
|
102
|
+
const rel = path.relative(sourceDir, filePath);
|
|
103
|
+
ENV_REF_RE.lastIndex = 0;
|
|
104
|
+
let m;
|
|
105
|
+
while ((m = ENV_REF_RE.exec(content)) !== null) {
|
|
106
|
+
const name = m[1];
|
|
107
|
+
if (BUILTIN_VARS.has(name)) continue;
|
|
108
|
+
if (!refs[name]) refs[name] = [];
|
|
109
|
+
if (!refs[name].includes(rel)) refs[name].push(rel);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return Object.entries(refs)
|
|
114
|
+
.filter(([name]) => !(name in declared))
|
|
115
|
+
.map(([name, files]) => ({
|
|
116
|
+
type: 'env_var_missing',
|
|
117
|
+
varName: name,
|
|
118
|
+
referencedIn: files.slice(0, 5),
|
|
119
|
+
message: `process.env.${name} referenced in source but not declared in any .env file (found in: ${files.slice(0, 3).join(', ')})`,
|
|
120
|
+
severity: 'warning',
|
|
121
|
+
url: '',
|
|
122
|
+
}));
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ── C1.2: Feature flag leakage ────────────────────────────────────────────────
|
|
126
|
+
// Detect env vars used in conditionals (if/&&/||/ternary) that are falsy in .env.
|
|
127
|
+
// A permanently-disabled code path is a dead-weight risk — it may also shadow bugs.
|
|
128
|
+
|
|
129
|
+
// Match env var on either side of a comparison / logical operator
|
|
130
|
+
const FLAG_RE = /(?:(?:if\s*\(|&&|\|\||[?]|===|!==|==|!=)\s*process\.env\.([A-Z_][A-Z0-9_]*)|process\.env\.([A-Z_][A-Z0-9_]*)\s*(?:===|!==|==|!=|&&|\|\||[?:]))/g;
|
|
131
|
+
|
|
132
|
+
export function detectFeatureFlagLeakage(sourceDir, envFile) {
|
|
133
|
+
if (!sourceDir) return [];
|
|
134
|
+
const files = collectSourceFiles(sourceDir);
|
|
135
|
+
const envVars = parseEnvFile(envFile ?? path.join(sourceDir, '.env'));
|
|
136
|
+
// Don't use runtime process.env here — we want to surface flags that are absent from .env
|
|
137
|
+
|
|
138
|
+
const findings = [];
|
|
139
|
+
const seen = new Set();
|
|
140
|
+
|
|
141
|
+
for (const { filePath, content } of files) {
|
|
142
|
+
const rel = path.relative(sourceDir, filePath);
|
|
143
|
+
FLAG_RE.lastIndex = 0;
|
|
144
|
+
let m;
|
|
145
|
+
while ((m = FLAG_RE.exec(content)) !== null) {
|
|
146
|
+
const name = m[1] ?? m[2];
|
|
147
|
+
if (!name || BUILTIN_VARS.has(name)) continue;
|
|
148
|
+
|
|
149
|
+
const dedupeKey = `${name}::${rel}`;
|
|
150
|
+
if (seen.has(dedupeKey)) continue;
|
|
151
|
+
seen.add(dedupeKey);
|
|
152
|
+
|
|
153
|
+
const value = envVars[name]; // undefined if not in .env
|
|
154
|
+
const falsy = value === undefined || value === '' || value === 'false' || value === '0';
|
|
155
|
+
if (!falsy) continue;
|
|
156
|
+
|
|
157
|
+
findings.push({
|
|
158
|
+
type: 'feature_flag_leakage',
|
|
159
|
+
varName: name,
|
|
160
|
+
value: value ?? '(not set)',
|
|
161
|
+
file: rel,
|
|
162
|
+
message: `process.env.${name} is used in a conditional in ${rel} but is ${value === undefined ? 'not set in .env' : `"${value}" (falsy)`} — that code branch is permanently disabled`,
|
|
163
|
+
severity: 'warning',
|
|
164
|
+
url: '',
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
return findings;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ── C1.3: Error-to-source linking ─────────────────────────────────────────────
|
|
172
|
+
// Parse stack traces from console error findings. Source maps are not resolved —
|
|
173
|
+
// we surface the bundle file:line as-is; that's already enough to grep.
|
|
174
|
+
|
|
175
|
+
// Chrome stack frame: " at FnName (http://host/bundle.js:1:4567)"
|
|
176
|
+
// Chrome anon: " at http://host/chunk.js:1:4567"
|
|
177
|
+
const FRAME_RE = /at\s+(?:([^\s(]+)\s+\()?(?:https?:\/\/[^)]+?\/([^/)\s]+\.(?:js|ts|jsx|tsx|mjs)):(\d+):(\d+)\)?|([^\s/]+\.(?:js|ts|jsx|tsx|mjs)):(\d+):(\d+))/g;
|
|
178
|
+
|
|
179
|
+
export function enrichErrorsWithSource(consoleFindings) {
|
|
180
|
+
const enriched = [];
|
|
181
|
+
for (const finding of consoleFindings) {
|
|
182
|
+
if (finding.type !== 'console') continue;
|
|
183
|
+
const msg = String(finding.message ?? finding.text ?? '');
|
|
184
|
+
if (!msg.includes(' at ')) continue;
|
|
185
|
+
|
|
186
|
+
const frames = [];
|
|
187
|
+
FRAME_RE.lastIndex = 0;
|
|
188
|
+
let m;
|
|
189
|
+
while ((m = FRAME_RE.exec(msg)) !== null && frames.length < 5) {
|
|
190
|
+
frames.push({
|
|
191
|
+
fn: m[1] ?? '(anonymous)',
|
|
192
|
+
file: m[2] ?? m[5] ?? '?',
|
|
193
|
+
line: parseInt(m[3] ?? m[6] ?? '0', 10),
|
|
194
|
+
col: parseInt(m[4] ?? m[7] ?? '0', 10),
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
if (frames.length === 0) continue;
|
|
198
|
+
|
|
199
|
+
const top = frames[0];
|
|
200
|
+
enriched.push({
|
|
201
|
+
type: 'error_source_linked',
|
|
202
|
+
originalMessage: msg.slice(0, 200),
|
|
203
|
+
stackFrames: frames,
|
|
204
|
+
message: `Console error in ${top.file}:${top.line} (fn: ${top.fn})`,
|
|
205
|
+
severity: 'info',
|
|
206
|
+
url: '',
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
return enriched;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// ── C1.4: Dead route detection ────────────────────────────────────────────────
|
|
213
|
+
// HEAD-request each internal link discovered on crawled pages that was not already
|
|
214
|
+
// in the targeted route list. 404 responses are emitted as dead_route warnings.
|
|
215
|
+
|
|
216
|
+
// Uses getAttribute('href') so '#section' is caught before a.href resolves it to
|
|
217
|
+
// an absolute URL (a.href always returns a fully-qualified URL in browsers).
|
|
218
|
+
const INTERNAL_LINKS_SCRIPT = `() => {
|
|
219
|
+
try {
|
|
220
|
+
var o = window.location.origin;
|
|
221
|
+
return Array.from(document.querySelectorAll('a[href]'))
|
|
222
|
+
.filter(function(a){
|
|
223
|
+
var raw = a.getAttribute('href') || '';
|
|
224
|
+
if (!raw || raw.startsWith('#') || raw.startsWith('mailto:') || raw.startsWith('tel:') || raw.startsWith('javascript:')) return false;
|
|
225
|
+
try { return new URL(a.href).origin === o; } catch { return false; }
|
|
226
|
+
})
|
|
227
|
+
.map(function(a){ return a.href; });
|
|
228
|
+
} catch(e) { return []; }
|
|
229
|
+
}`;
|
|
230
|
+
|
|
231
|
+
export { INTERNAL_LINKS_SCRIPT };
|
|
232
|
+
|
|
233
|
+
export async function detectDeadRoutes(baseUrl, discoveredLinks, alreadyTestedPaths) {
|
|
234
|
+
if (!discoveredLinks?.length) return [];
|
|
235
|
+
|
|
236
|
+
const findings = [];
|
|
237
|
+
const testedSet = new Set(
|
|
238
|
+
(alreadyTestedPaths ?? []).map(p => p.replace(/\/$/, '') || '/')
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
for (const href of discoveredLinks) {
|
|
242
|
+
let normalized;
|
|
243
|
+
try {
|
|
244
|
+
const u = new URL(href, baseUrl);
|
|
245
|
+
normalized = u.pathname.replace(/\/$/, '') || '/';
|
|
246
|
+
} catch { continue; }
|
|
247
|
+
|
|
248
|
+
if (testedSet.has(normalized)) continue;
|
|
249
|
+
testedSet.add(normalized);
|
|
250
|
+
|
|
251
|
+
try {
|
|
252
|
+
const res = await fetch(new URL(href, baseUrl).href, {
|
|
253
|
+
method: 'HEAD',
|
|
254
|
+
signal: AbortSignal.timeout(5000),
|
|
255
|
+
redirect: 'follow',
|
|
256
|
+
});
|
|
257
|
+
if (res.status === 404) {
|
|
258
|
+
findings.push({
|
|
259
|
+
type: 'dead_route',
|
|
260
|
+
path: normalized,
|
|
261
|
+
status: 404,
|
|
262
|
+
message: `Internal link ${normalized} returns 404`,
|
|
263
|
+
severity: 'warning',
|
|
264
|
+
url: baseUrl,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
} catch { /* network error — skip */ }
|
|
268
|
+
}
|
|
269
|
+
return findings;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// ── Main export ───────────────────────────────────────────────────────────────
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* C1 codebase analysis — static analysis (no MCP, no browser).
|
|
276
|
+
* detectDeadRoutes is called separately from runCrawl (needs discovered link list).
|
|
277
|
+
*
|
|
278
|
+
* @param {object} opts
|
|
279
|
+
* @param {string} opts.sourceDir — abs path to target app source code
|
|
280
|
+
* @param {string} [opts.envFile] — path to .env file (defaults to sourceDir/.env)
|
|
281
|
+
* @param {object[]} [opts.consoleFindings] — console findings from route crawl for enrichment
|
|
282
|
+
* @returns {object[]} findings array (env_var_missing, feature_flag_leakage, error_source_linked)
|
|
283
|
+
*/
|
|
284
|
+
export async function analyzeCodebase({ sourceDir, envFile = null, consoleFindings = [] } = {}) {
|
|
285
|
+
if (!sourceDir) return [];
|
|
286
|
+
|
|
287
|
+
const findings = [];
|
|
288
|
+
|
|
289
|
+
try { findings.push(...auditEnvVariables(sourceDir, envFile)); }
|
|
290
|
+
catch (e) { logger.warn(`[ARGUS] C1: env audit skipped: ${e.message}`); }
|
|
291
|
+
|
|
292
|
+
try { findings.push(...detectFeatureFlagLeakage(sourceDir, envFile)); }
|
|
293
|
+
catch (e) { logger.warn(`[ARGUS] C1: feature flag check skipped: ${e.message}`); }
|
|
294
|
+
|
|
295
|
+
try { findings.push(...enrichErrorsWithSource(consoleFindings)); }
|
|
296
|
+
catch (e) { logger.warn(`[ARGUS] C1: error enrichment skipped: ${e.message}`); }
|
|
297
|
+
|
|
298
|
+
return findings;
|
|
299
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ARGUS Content Analyzer (v3 Phase A5)
|
|
3
|
+
*
|
|
4
|
+
* DOM-based content quality checks via evaluate_script:
|
|
5
|
+
* 1. undefined / null / NaN rendered as visible text
|
|
6
|
+
* 2. Placeholder text — "Lorem ipsum", "TODO", "FIXME", etc.
|
|
7
|
+
* 3. Broken images — <img> that loaded but has naturalWidth === 0
|
|
8
|
+
* 4. Empty data-oriented lists — <ul>/<ol> with a results/items/grid class but zero <li> children
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Synchronous arrow function injected into the page via mcp.evaluate_script.
|
|
13
|
+
* Returns a JSON string consumed by parseContentAnalysisResult().
|
|
14
|
+
*/
|
|
15
|
+
import { childLogger } from './logger.js';
|
|
16
|
+
|
|
17
|
+
const logger = childLogger('content-analyzer');
|
|
18
|
+
|
|
19
|
+
export const CONTENT_ANALYSIS_SCRIPT = `() => {
|
|
20
|
+
var body = document.body || {};
|
|
21
|
+
var bodyText = body.innerText || '';
|
|
22
|
+
|
|
23
|
+
// 1. Standalone undefined / null / NaN in visible body text
|
|
24
|
+
var nullMatches = [];
|
|
25
|
+
var nullSet = {};
|
|
26
|
+
var nullPat = /\\bundefined\\b|\\bnull\\b|\\bNaN\\b/g;
|
|
27
|
+
var m;
|
|
28
|
+
while ((m = nullPat.exec(bodyText)) !== null) {
|
|
29
|
+
if (!nullSet[m[0]]) { nullSet[m[0]] = true; nullMatches.push(m[0]); }
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// 2. Placeholder text patterns
|
|
33
|
+
var placeholders = [];
|
|
34
|
+
var phChecks = [
|
|
35
|
+
['lorem ipsum', /lorem ipsum/i],
|
|
36
|
+
['todo', /\\btodo\\b/i],
|
|
37
|
+
['fixme', /\\bfixme\\b/i],
|
|
38
|
+
['coming soon', /\\bcoming soon\\b/i],
|
|
39
|
+
['placeholder', /\\bplaceholder text\\b/i],
|
|
40
|
+
['sample text', /\\bsample text\\b/i],
|
|
41
|
+
['insert content', /\\binsert (content|text|copy) here\\b/i],
|
|
42
|
+
['hello world', /\\bhello[\\s-]world\\b/i],
|
|
43
|
+
['test user', /\\btest user\\b/i],
|
|
44
|
+
['foo bar', /\\bfoo bar\\b/i],
|
|
45
|
+
['dummy text', /\\bdummy (text|data|content)\\b/i],
|
|
46
|
+
['ipsa lore', /\\bipsa lore\\b/i],
|
|
47
|
+
];
|
|
48
|
+
phChecks.forEach(function(pair) {
|
|
49
|
+
if (pair[1].test(bodyText)) placeholders.push(pair[0]);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
// 3. Broken images — loaded (complete) but naturalWidth === 0 (excludes data: URIs)
|
|
53
|
+
var brokenImages = [];
|
|
54
|
+
var imgs = Array.prototype.slice.call(document.querySelectorAll('img[src]'));
|
|
55
|
+
imgs.forEach(function(img) {
|
|
56
|
+
if (img.complete && img.naturalWidth === 0 &&
|
|
57
|
+
img.src && img.src.indexOf('data:') !== 0) {
|
|
58
|
+
brokenImages.push(img.src.slice(0, 200));
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// 4. Empty data-oriented lists (ul/ol with results/items/list/feed/grid class but 0 li children)
|
|
63
|
+
var emptyLists = [];
|
|
64
|
+
var listClassPat = /results|items|list|feed|grid|entries|collection/i;
|
|
65
|
+
var lists = Array.prototype.slice.call(document.querySelectorAll('ul, ol'));
|
|
66
|
+
lists.forEach(function(list) {
|
|
67
|
+
// Use :scope > li to count only direct children, not nested <li> elements.
|
|
68
|
+
// querySelectorAll('li') descends into nested lists and would miss genuinely empty parents.
|
|
69
|
+
if (!list.querySelector(':scope > li') && listClassPat.test(list.className || '')) {
|
|
70
|
+
emptyLists.push((list.className || 'unnamed').slice(0, 100));
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
return JSON.stringify({
|
|
75
|
+
nullMatches: nullMatches,
|
|
76
|
+
placeholders: placeholders,
|
|
77
|
+
brokenImages: brokenImages,
|
|
78
|
+
emptyLists: emptyLists,
|
|
79
|
+
});
|
|
80
|
+
}`;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Convert the raw evaluate_script result from CONTENT_ANALYSIS_SCRIPT into
|
|
84
|
+
* structured bug entries for the Argus report.
|
|
85
|
+
*
|
|
86
|
+
* @param {object|string|null} rawResult
|
|
87
|
+
* @param {string} url - Page URL for context
|
|
88
|
+
* @returns {object[]}
|
|
89
|
+
*/
|
|
90
|
+
export function parseContentAnalysisResult(rawResult, url) {
|
|
91
|
+
if (rawResult == null) return [];
|
|
92
|
+
|
|
93
|
+
let data;
|
|
94
|
+
try {
|
|
95
|
+
// Unwrap MCP { result: '...' } wrapper before parsing. Without this,
|
|
96
|
+
// JSON.stringify({ result: '{"nullMatches":[],...}' }) → parse → { result: '...' } and
|
|
97
|
+
// all field lookups (nullMatches, brokenImages, etc.) return undefined — zero findings.
|
|
98
|
+
// JSON.stringify on a circular object throws; catch logs and returns [].
|
|
99
|
+
let raw = rawResult;
|
|
100
|
+
if (typeof raw === 'object' && !Array.isArray(raw) && raw !== null && raw.result !== undefined) {
|
|
101
|
+
raw = raw.result;
|
|
102
|
+
}
|
|
103
|
+
const str = typeof raw === 'string' ? raw : JSON.stringify(raw);
|
|
104
|
+
data = JSON.parse(str);
|
|
105
|
+
} catch (e) {
|
|
106
|
+
logger.warn('[ARGUS] parseContentAnalysisResult: parse failed —', e.message);
|
|
107
|
+
return [];
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (!data || typeof data !== 'object') return [];
|
|
111
|
+
|
|
112
|
+
const bugs = [];
|
|
113
|
+
|
|
114
|
+
if (Array.isArray(data.nullMatches) && data.nullMatches.length > 0) {
|
|
115
|
+
bugs.push({
|
|
116
|
+
type: 'content_null_rendered',
|
|
117
|
+
values: data.nullMatches,
|
|
118
|
+
message: `Null-like value rendered as visible text: ${data.nullMatches.join(', ')}`,
|
|
119
|
+
severity: 'warning',
|
|
120
|
+
url,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (Array.isArray(data.placeholders) && data.placeholders.length > 0) {
|
|
125
|
+
bugs.push({
|
|
126
|
+
type: 'content_placeholder_text',
|
|
127
|
+
placeholders: data.placeholders,
|
|
128
|
+
message: `Placeholder text found in page body: ${data.placeholders.join(', ')}`,
|
|
129
|
+
severity: 'warning',
|
|
130
|
+
url,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
for (const src of (Array.isArray(data.brokenImages) ? data.brokenImages : [])) {
|
|
135
|
+
bugs.push({
|
|
136
|
+
type: 'content_broken_image',
|
|
137
|
+
src,
|
|
138
|
+
message: `Broken image (naturalWidth=0): ${src}`,
|
|
139
|
+
severity: 'warning',
|
|
140
|
+
url,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (Array.isArray(data.emptyLists) && data.emptyLists.length > 0) {
|
|
145
|
+
bugs.push({
|
|
146
|
+
type: 'content_empty_list',
|
|
147
|
+
classes: data.emptyLists,
|
|
148
|
+
message: `Empty data list detected (${data.emptyLists.length} list(s) with no items): ${data.emptyLists.join(', ')}`,
|
|
149
|
+
severity: 'warning',
|
|
150
|
+
url,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return bugs;
|
|
155
|
+
}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Argus D7.4 — API contract validation.
|
|
3
|
+
* Validates captured network response bodies against JSON Schema-like schemas
|
|
4
|
+
* defined in src/config/targets.js apiContracts[].
|
|
5
|
+
*
|
|
6
|
+
* Supported schema keywords: type, required, properties, items.
|
|
7
|
+
* URL matching: exact pathname or pathname-prefix; full URL for http(s) contracts.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import fs from 'fs';
|
|
11
|
+
import path from 'path';
|
|
12
|
+
import { childLogger } from './logger.js';
|
|
13
|
+
|
|
14
|
+
const logger = childLogger('contract-validator');
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Lightweight JSON Schema validator.
|
|
18
|
+
* Supports: type, required, properties (recursive), items (first element).
|
|
19
|
+
*
|
|
20
|
+
* @param {any} value - Value to validate
|
|
21
|
+
* @param {object} schema - Schema object
|
|
22
|
+
* @param {string} path - JSONPath prefix for error messages (internal)
|
|
23
|
+
* @returns {string[]} Array of human-readable violation strings (empty = valid)
|
|
24
|
+
*/
|
|
25
|
+
export function validateSchema(value, schema, path = '') {
|
|
26
|
+
const violations = [];
|
|
27
|
+
if (!schema || typeof schema !== 'object') return violations;
|
|
28
|
+
const label = path || 'root';
|
|
29
|
+
|
|
30
|
+
// Type check
|
|
31
|
+
if (schema.type !== undefined) {
|
|
32
|
+
const actual = Array.isArray(value) ? 'array' : value === null ? 'null' : typeof value;
|
|
33
|
+
if (actual !== schema.type) {
|
|
34
|
+
violations.push(`${label}: expected type "${schema.type}", got "${actual}"`);
|
|
35
|
+
return violations; // no point descending if the type is wrong
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Required fields (objects only)
|
|
40
|
+
if (schema.required && typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
|
41
|
+
for (const field of schema.required) {
|
|
42
|
+
if (!(field in value)) {
|
|
43
|
+
violations.push(`${label}: missing required field "${field}"`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Properties (recursive, objects only)
|
|
49
|
+
if (schema.properties && typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
|
50
|
+
for (const [key, propSchema] of Object.entries(schema.properties)) {
|
|
51
|
+
if (key in value) {
|
|
52
|
+
violations.push(...validateSchema(value[key], propSchema, path ? `${path}.${key}` : key));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Array items — validate first 5 elements; deduplicate identical violations
|
|
58
|
+
if (schema.items && Array.isArray(value) && value.length > 0) {
|
|
59
|
+
const seen = new Set();
|
|
60
|
+
for (let i = 0; i < Math.min(value.length, 5); i++) {
|
|
61
|
+
for (const v of validateSchema(value[i], schema.items, `${label}[${i}]`)) {
|
|
62
|
+
const norm = v.replace(/\[\d+\]/, '[*]');
|
|
63
|
+
if (!seen.has(norm)) { seen.add(norm); violations.push(v); }
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return violations;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Decide whether a captured network request matches a contract definition.
|
|
73
|
+
*
|
|
74
|
+
* URL matching rules:
|
|
75
|
+
* - If contract.url starts with http(s):// → exact full-URL match
|
|
76
|
+
* - Otherwise → pathname exact-match or prefix-match
|
|
77
|
+
*
|
|
78
|
+
* Method matching: case-insensitive; no constraint when contract.method is falsy.
|
|
79
|
+
*
|
|
80
|
+
* @param {string} reqUrl - Full URL from list_network_requests
|
|
81
|
+
* @param {string} reqMethod - HTTP method from list_network_requests
|
|
82
|
+
* @param {object} contract - Entry from apiContracts[]
|
|
83
|
+
* @returns {boolean}
|
|
84
|
+
*/
|
|
85
|
+
export function matchesContract(reqUrl, reqMethod, contract) {
|
|
86
|
+
if (!contract?.url) return false;
|
|
87
|
+
const method = (reqMethod ?? 'GET').toUpperCase();
|
|
88
|
+
if (contract.method && contract.method.toUpperCase() !== method) return false;
|
|
89
|
+
|
|
90
|
+
if (contract.url.startsWith('http://') || contract.url.startsWith('https://')) {
|
|
91
|
+
return reqUrl === contract.url;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Path-based match
|
|
95
|
+
try {
|
|
96
|
+
const { pathname } = new URL(reqUrl);
|
|
97
|
+
return pathname === contract.url || pathname.startsWith(contract.url + '/');
|
|
98
|
+
} catch {
|
|
99
|
+
return reqUrl.includes(contract.url);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Load a schema from a contract definition.
|
|
105
|
+
* Prefers contract.schema (inline); falls back to contract.schemaFile (JSON file).
|
|
106
|
+
* Returns null if neither is present or the file cannot be parsed.
|
|
107
|
+
*/
|
|
108
|
+
function loadSchema(contract) {
|
|
109
|
+
if (contract.schema) return contract.schema;
|
|
110
|
+
if (contract.schemaFile) {
|
|
111
|
+
// Prevent path traversal — schemaFile must stay within the project directory
|
|
112
|
+
const resolved = path.resolve(contract.schemaFile);
|
|
113
|
+
const cwd = process.cwd();
|
|
114
|
+
if (!resolved.startsWith(cwd + path.sep) && resolved !== cwd) {
|
|
115
|
+
logger.warn('[ARGUS] contract-validator: schemaFile outside project directory — skipping:', contract.schemaFile);
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
try {
|
|
119
|
+
return JSON.parse(fs.readFileSync(resolved, 'utf8'));
|
|
120
|
+
} catch {
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Validate captured network requests against apiContracts[].
|
|
129
|
+
* For each request that matches a contract, fetches the response body via
|
|
130
|
+
* browser.getNetworkRequest and validates the parsed JSON against the schema.
|
|
131
|
+
*
|
|
132
|
+
* Gracefully skips requests whose body cannot be fetched or parsed.
|
|
133
|
+
*
|
|
134
|
+
* @param {object[]} networkReqs - Route-sliced requests from list_network_requests()
|
|
135
|
+
* @param {object} browser - CdpBrowserAdapter
|
|
136
|
+
* @param {object[]} contracts - apiContracts[] from targets.js
|
|
137
|
+
* @param {string} pageUrl - Current page URL (stored on each finding)
|
|
138
|
+
* @returns {Promise<object[]>} api_contract_violation findings
|
|
139
|
+
*/
|
|
140
|
+
export async function validateApiContracts(networkReqs, browser, contracts, pageUrl) {
|
|
141
|
+
if (!contracts?.length) return [];
|
|
142
|
+
const findings = [];
|
|
143
|
+
|
|
144
|
+
for (const req of networkReqs) {
|
|
145
|
+
for (const contract of contracts) {
|
|
146
|
+
if (!matchesContract(req.url, req.method, contract)) continue;
|
|
147
|
+
|
|
148
|
+
const schema = loadSchema(contract);
|
|
149
|
+
if (!schema) continue;
|
|
150
|
+
|
|
151
|
+
// Fetch response body — graceful: skip if unavailable or not JSON
|
|
152
|
+
let body = null;
|
|
153
|
+
try {
|
|
154
|
+
const raw = await browser.getNetworkRequest(req.id ?? req.requestId);
|
|
155
|
+
const text = raw?.responseBody ?? raw?.body ?? null;
|
|
156
|
+
if (text) body = JSON.parse(text);
|
|
157
|
+
} catch {
|
|
158
|
+
continue; // body unavailable — skip validation for this request
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (body === null) continue;
|
|
162
|
+
|
|
163
|
+
const violations = validateSchema(body, schema);
|
|
164
|
+
for (const violation of violations) {
|
|
165
|
+
findings.push({
|
|
166
|
+
type: 'api_contract_violation',
|
|
167
|
+
requestUrl: req.url,
|
|
168
|
+
method: req.method ?? 'GET',
|
|
169
|
+
message: `API contract violation for ${req.method ?? 'GET'} ${req.url}: ${violation}`,
|
|
170
|
+
severity: 'warning',
|
|
171
|
+
url: pageUrl,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return findings;
|
|
178
|
+
}
|