argusqa-os 9.7.6 → 9.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/glama.json +2 -2
- package/package.json +6 -6
- package/src/cli/pr-validate.js +275 -56
- package/src/mcp-server.js +142 -26
- package/src/orchestration/crawl-and-report.js +1 -1
- package/src/orchestration/orchestrator.js +34 -0
- package/src/utils/audit-depth.js +148 -0
- package/src/utils/deploy-preview.js +210 -0
- package/src/utils/github-api.js +242 -0
- package/src/utils/github-reporter.js +251 -39
- package/src/utils/html-reporter.js +283 -92
- package/src/utils/import-graph.js +290 -0
- package/src/utils/parallel-crawler.js +202 -0
- package/src/utils/pr-baseline.js +230 -0
- package/src/utils/pr-diff-analyzer.js +378 -40
- package/src/utils/route-discoverer.js +25 -3
|
@@ -2,8 +2,32 @@
|
|
|
2
2
|
* PR Diff Analyzer — maps GitHub PR changed files to affected Argus routes.
|
|
3
3
|
*
|
|
4
4
|
* parsePrUrl(prUrl) → { owner, repo, prNumber }
|
|
5
|
-
* fetchPrFiles(prUrl, token) →
|
|
5
|
+
* fetchPrFiles(prUrl, token) → Array<{ filename, status, patch }> for changed files
|
|
6
6
|
* mapFilesToRoutes(files, routes) → Route[] subset likely affected by the diff
|
|
7
|
+
* (accepts string[] OR the fetchPrFiles object[] above)
|
|
8
|
+
* mapFilesToRoutesDeep(files, routes, { sourceDir })
|
|
9
|
+
* → framework-aware Route[] (C1): maps a changed component to
|
|
10
|
+
* only the routes whose page files import it via the static
|
|
11
|
+
* import graph; falls back to mapFilesToRoutes on any
|
|
12
|
+
* ambiguity (never narrows away a possible regression).
|
|
13
|
+
* Monorepo-aware (C2): when sourceDir points at a workspace
|
|
14
|
+
* package subdir (apps/web, packages/ui, …) and GitHub
|
|
15
|
+
* returns repo-root-relative paths, the workspace prefix is
|
|
16
|
+
* stripped so the file resolves into the package graph.
|
|
17
|
+
* Stylesheet-aware (C3): a changed non-global stylesheet
|
|
18
|
+
* (CSS/SCSS/Sass/Less) attributes to only the routes that
|
|
19
|
+
* import it via the same graph; global stylesheets
|
|
20
|
+
* (globals.css, …) keep the conservative all-routes fallback.
|
|
21
|
+
* stripWorkspacePrefix(filename) → drop a leading monorepo workspace prefix
|
|
22
|
+
* ("apps/<pkg>/", "packages/<pkg>/", …) so workspace dirs
|
|
23
|
+
* don't pollute slug matching (C2)
|
|
24
|
+
* packageRelativePath(root, file) → repo-root-relative file path re-based onto a package
|
|
25
|
+
* subdir `root`, or null when there is no prefix overlap (C2)
|
|
26
|
+
* firstAddedLine(patch) → new-file line number of the first added (+) line, or null
|
|
27
|
+
* resolveAnnotationTarget(route, prFiles)
|
|
28
|
+
* → { path, line } when a changed file SPECIFICALLY maps to
|
|
29
|
+
* the route AND has a real patch line; null otherwise
|
|
30
|
+
* (Phase A3 file:line annotations — never fabricates a line)
|
|
7
31
|
*
|
|
8
32
|
* Pure functions + one async fetch — no Chrome, no MCP, no AI verdict.
|
|
9
33
|
* AI verdict logic ships separately in the private argus-pro repo.
|
|
@@ -13,7 +37,11 @@
|
|
|
13
37
|
* entry point (src/cli/pr-validate.js), which owns stdout.
|
|
14
38
|
*/
|
|
15
39
|
|
|
40
|
+
import path from 'path';
|
|
16
41
|
import { childLogger } from './logger.js';
|
|
42
|
+
import { discoverNextJsRouteFiles } from './route-discoverer.js';
|
|
43
|
+
import { buildImportGraph, findDependents } from './import-graph.js';
|
|
44
|
+
import { githubFetch } from './github-api.js';
|
|
17
45
|
|
|
18
46
|
const logger = childLogger('pr-diff-analyzer');
|
|
19
47
|
|
|
@@ -36,11 +64,23 @@ export function parsePrUrl(prUrl) {
|
|
|
36
64
|
}
|
|
37
65
|
|
|
38
66
|
/**
|
|
39
|
-
* Fetch the
|
|
67
|
+
* Fetch the files changed by a GitHub pull request (up to 300 — 3 pages × 100).
|
|
68
|
+
*
|
|
69
|
+
* Returns the per-file `status` (added | modified | removed | renamed | …) and the
|
|
70
|
+
* unified-diff `patch` hunk text for each file, in addition to the filename. `status`
|
|
71
|
+
* + `patch` are consumed downstream by the PR Validator's line-level annotations
|
|
72
|
+
* (file:line from the patch hunks) and component→route mapping; callers that only need
|
|
73
|
+
* filenames derive them with `files.map(f => f.filename)`.
|
|
74
|
+
*
|
|
75
|
+
* GitHub omits `patch` for binary files and files whose diff is too large to inline —
|
|
76
|
+
* those come back as `patch: null` (never `undefined`).
|
|
77
|
+
*
|
|
78
|
+
* The REST host is `process.env.GITHUB_API_URL` (GitHub Actions sets this; GHES points it
|
|
79
|
+
* at the enterprise host) and defaults to `https://api.github.com` when unset.
|
|
40
80
|
*
|
|
41
81
|
* @param {string} prUrl - GitHub PR URL (any format accepted by parsePrUrl)
|
|
42
82
|
* @param {string} [githubToken] - GitHub token; omit for public repos
|
|
43
|
-
* @returns {Promise<
|
|
83
|
+
* @returns {Promise<Array<{ filename: string, status: string, patch: string|null }>>}
|
|
44
84
|
*/
|
|
45
85
|
export async function fetchPrFiles(prUrl, githubToken) {
|
|
46
86
|
const { owner, repo, prNumber } = parsePrUrl(prUrl);
|
|
@@ -51,18 +91,33 @@ export async function fetchPrFiles(prUrl, githubToken) {
|
|
|
51
91
|
...(githubToken ? { Authorization: `Bearer ${githubToken}` } : {}),
|
|
52
92
|
};
|
|
53
93
|
|
|
94
|
+
// REST API base host. Defaults to the public github.com API; honours GITHUB_API_URL —
|
|
95
|
+
// the env var GitHub Actions injects automatically (and that GitHub Enterprise Server
|
|
96
|
+
// sets to the enterprise API host), so the validator works on GHES unchanged and the
|
|
97
|
+
// CLI entry point is hermetically testable against a recorded fixture server. Unset →
|
|
98
|
+
// 'https://api.github.com' → byte-identical to before. Trailing slash trimmed so the
|
|
99
|
+
// path concatenation below never doubles a '/'.
|
|
100
|
+
const apiBase = String(process.env.GITHUB_API_URL || 'https://api.github.com').replace(/\/+$/, '');
|
|
101
|
+
|
|
54
102
|
const allFiles = [];
|
|
55
103
|
const MAX_PAGES = 3; // caps at 300 files; avoids runaway requests on mega-PRs
|
|
56
104
|
|
|
57
105
|
for (let page = 1; page <= MAX_PAGES; page++) {
|
|
58
|
-
const apiUrl =
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
106
|
+
const apiUrl = `${apiBase}/repos/${owner}/${repo}/pulls/${prNumber}/files?per_page=100&page=${page}`;
|
|
107
|
+
// E2: resilient — retries a rate-limit (403 primary / 429 secondary) + transient
|
|
108
|
+
// 5xx with backoff, and throws a structured, secret-free error on 404/422/etc.
|
|
109
|
+
// (the CLI surfaces err.message into a ::error:: annotation, so it must never leak
|
|
110
|
+
// the token). githubFetch returns the OK Response; this loop owns the pagination.
|
|
111
|
+
const res = await githubFetch(apiUrl, {
|
|
112
|
+
headers,
|
|
113
|
+
context: `GET repos/${owner}/${repo}/pulls/${prNumber}/files (page ${page})`,
|
|
114
|
+
});
|
|
64
115
|
const files = await res.json();
|
|
65
|
-
allFiles.push(...files.map(f =>
|
|
116
|
+
allFiles.push(...files.map(f => ({
|
|
117
|
+
filename: f.filename,
|
|
118
|
+
status: f.status,
|
|
119
|
+
patch: f.patch ?? null,
|
|
120
|
+
})));
|
|
66
121
|
if (files.length < 100) break; // last page reached
|
|
67
122
|
}
|
|
68
123
|
|
|
@@ -104,6 +159,116 @@ export const INFRA_PATTERNS = [
|
|
|
104
159
|
/package\.json$/i,
|
|
105
160
|
];
|
|
106
161
|
|
|
162
|
+
/**
|
|
163
|
+
* Recognized monorepo workspace roots. A file under one of these (e.g.
|
|
164
|
+
* "apps/web/components/Foo.tsx", "packages/ui/src/Button.tsx") carries TWO leading segments —
|
|
165
|
+
* the workspace-root literal and the package name — that describe WHICH package the file
|
|
166
|
+
* lives in, not which app ROUTE it affects. (PR_VALIDATOR C2 — monorepo path awareness.)
|
|
167
|
+
*/
|
|
168
|
+
const WORKSPACE_ROOTS = new Set(['apps', 'packages', 'libs', 'services']);
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Strip a leading monorepo workspace prefix ("<root>/<package>/") from a repo-relative path,
|
|
172
|
+
* returning the package-relative remainder, so the workspace-root literal and the package
|
|
173
|
+
* name don't pollute slug tokenization (e.g. "apps/web/checkout.tsx" → "checkout.tsx", matched
|
|
174
|
+
* on "checkout" not the workspace dirs "apps"/"web"). Returns the path unchanged when it is not
|
|
175
|
+
* under a recognized workspace root, or when stripping would leave no remaining segment. Pure
|
|
176
|
+
* string op. (PR_VALIDATOR C2.)
|
|
177
|
+
*
|
|
178
|
+
* @param {string} filename - a repo-root-relative path (forward or back slashes)
|
|
179
|
+
* @returns {string}
|
|
180
|
+
*/
|
|
181
|
+
export function stripWorkspacePrefix(filename) {
|
|
182
|
+
const parts = String(filename).split(/[/\\]+/).filter(Boolean);
|
|
183
|
+
if (parts.length >= 3 && WORKSPACE_ROOTS.has(parts[0].toLowerCase())) {
|
|
184
|
+
return parts.slice(2).join('/');
|
|
185
|
+
}
|
|
186
|
+
return String(filename);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Re-base a repo-root-relative file path onto a package-subdir `root`.
|
|
191
|
+
*
|
|
192
|
+
* The PR Validator's `sourceDir` may point at a monorepo PACKAGE subdir (e.g. ".../apps/web"),
|
|
193
|
+
* but GitHub returns changed-file paths relative to the REPO root ("apps/web/components/Foo.tsx").
|
|
194
|
+
* A naive path.resolve(root, file) then double-counts the workspace prefix and misses the file.
|
|
195
|
+
* This returns the file path relative to the package by stripping the longest leading run of the
|
|
196
|
+
* file's segments that equals `root`'s trailing segments (e.g. root ".../apps/web" + file
|
|
197
|
+
* "apps/web/components/Foo.tsx" → "components/Foo.tsx"). Returns null when there is no such overlap
|
|
198
|
+
* — the file is already package-relative, or belongs to a DIFFERENT package, in which case the
|
|
199
|
+
* caller's direct resolve + conservative fallback handle it (a foreign-package file is never
|
|
200
|
+
* misattributed into this package's graph). Comparison is case-insensitive (Windows/macOS).
|
|
201
|
+
* Always leaves ≥1 remaining segment. Pure string op. (PR_VALIDATOR C2.)
|
|
202
|
+
*
|
|
203
|
+
* @param {string} root - absolute path to the package subdir (sourceDir)
|
|
204
|
+
* @param {string} file - repo-root-relative changed-file path
|
|
205
|
+
* @returns {string|null}
|
|
206
|
+
*/
|
|
207
|
+
export function packageRelativePath(root, file) {
|
|
208
|
+
const rootSegs = String(root).split(/[/\\]+/).filter(Boolean);
|
|
209
|
+
const fileSegs = String(file).split(/[/\\]+/).filter(Boolean);
|
|
210
|
+
const maxK = Math.min(rootSegs.length, fileSegs.length - 1); // leave ≥1 remainder
|
|
211
|
+
let best = 0;
|
|
212
|
+
for (let k = 1; k <= maxK; k++) {
|
|
213
|
+
const tail = rootSegs.slice(rootSegs.length - k);
|
|
214
|
+
if (tail.every((s, i) => s.toLowerCase() === fileSegs[i].toLowerCase())) best = k;
|
|
215
|
+
}
|
|
216
|
+
return best > 0 ? fileSegs.slice(best).join('/') : null;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Lowercase slug tokens extracted from a file path (e.g. "src/pages/checkout.tsx"
|
|
221
|
+
* → {"src","pages","checkout"}), with any monorepo workspace prefix stripped first (C2) so
|
|
222
|
+
* workspace dirs don't pollute matching. Shared by mapFilesToRoutes (route matching) and
|
|
223
|
+
* resolveAnnotationTarget (annotation file matching) so the two cannot drift.
|
|
224
|
+
*/
|
|
225
|
+
function fileSlugTokens(filename) {
|
|
226
|
+
return new Set(
|
|
227
|
+
stripWorkspacePrefix(filename)
|
|
228
|
+
.toLowerCase()
|
|
229
|
+
.replace(/\.[^./\\]+$/, '')
|
|
230
|
+
.split(/[/\\._-]+/)
|
|
231
|
+
.filter(s => s.length > 1),
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Meaningful lowercase segments of a route path
|
|
237
|
+
* (e.g. "/checkout/review" → ["checkout","review"]). The root path "/" yields [].
|
|
238
|
+
*/
|
|
239
|
+
function routePathSegments(routePath) {
|
|
240
|
+
return String(routePath ?? '')
|
|
241
|
+
.toLowerCase()
|
|
242
|
+
.split('/')
|
|
243
|
+
.map(s => s.replace(/[^a-z0-9]/g, ''))
|
|
244
|
+
.filter(s => s.length > 1);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Normalize + classify changed files into the buckets every route mapper shares, so the
|
|
249
|
+
* slug heuristic (mapFilesToRoutes) and the framework-aware mapper (mapFilesToRoutesDeep)
|
|
250
|
+
* apply the SAME excluded/infra short-circuits and cannot drift.
|
|
251
|
+
*
|
|
252
|
+
* @param {Array<string|{ filename: string }>} changedFiles
|
|
253
|
+
* @returns {{ fileNames: string[], appFiles: string[], allExcluded: boolean, hasInfra: boolean }}
|
|
254
|
+
* - fileNames: every changed filename (extracted from string OR object, empties dropped)
|
|
255
|
+
* - appFiles: fileNames minus EXCLUDED_PATTERNS (docs / CI / repo metadata)
|
|
256
|
+
* - allExcluded: there were files but ALL were excluded → audit should be skipped ([])
|
|
257
|
+
* - hasInfra: an appFile matches INFRA_PATTERNS → blast radius is ALL routes
|
|
258
|
+
*/
|
|
259
|
+
function classifyChangedFiles(changedFiles) {
|
|
260
|
+
const fileNames = (Array.isArray(changedFiles) ? changedFiles : [])
|
|
261
|
+
.map(f => (typeof f === 'string' ? f : f?.filename))
|
|
262
|
+
.filter(f => typeof f === 'string' && f.length > 0);
|
|
263
|
+
const appFiles = fileNames.filter(f => !EXCLUDED_PATTERNS.some(re => re.test(f)));
|
|
264
|
+
return {
|
|
265
|
+
fileNames,
|
|
266
|
+
appFiles,
|
|
267
|
+
allExcluded: fileNames.length > 0 && appFiles.length === 0,
|
|
268
|
+
hasInfra: appFiles.some(f => INFRA_PATTERNS.some(re => re.test(f))),
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
107
272
|
/**
|
|
108
273
|
* Map a list of changed file paths to the subset of Argus route configs that
|
|
109
274
|
* are likely affected, using heuristic slug matching.
|
|
@@ -113,7 +278,9 @@ export const INFRA_PATTERNS = [
|
|
|
113
278
|
* 2. File path contains a slug that matches a route path segment → include that route
|
|
114
279
|
* 3. No matches → return ALL routes (conservative fallback — never miss a regression)
|
|
115
280
|
*
|
|
116
|
-
* @param {string
|
|
281
|
+
* @param {Array<string|{ filename: string }>} changedFiles - Filenames from fetchPrFiles;
|
|
282
|
+
* accepts either bare path strings or the `{ filename, status, patch }` objects that
|
|
283
|
+
* fetchPrFiles now returns (the filename is extracted from each).
|
|
117
284
|
* @param {Array<{ path: string, name: string }>} routes - Route configs from targets.js
|
|
118
285
|
* @returns {Array<{ path: string, name: string }>}
|
|
119
286
|
*/
|
|
@@ -121,41 +288,212 @@ export function mapFilesToRoutes(changedFiles, routes) {
|
|
|
121
288
|
if (!routes || routes.length === 0) return [];
|
|
122
289
|
if (!changedFiles || changedFiles.length === 0) return routes;
|
|
123
290
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
);
|
|
128
|
-
|
|
129
|
-
// README-only / CI-only PR — skip the audit entirely
|
|
130
|
-
if (appFiles.length === 0) return [];
|
|
131
|
-
|
|
132
|
-
// Infrastructure change → full audit
|
|
133
|
-
if (appFiles.some(f => INFRA_PATTERNS.some(re => re.test(f)))) {
|
|
134
|
-
return routes;
|
|
135
|
-
}
|
|
291
|
+
const { fileNames, appFiles, allExcluded, hasInfra } = classifyChangedFiles(changedFiles);
|
|
292
|
+
if (fileNames.length === 0) return routes; // nothing usable → conservative
|
|
293
|
+
if (allExcluded) return []; // README-only / CI-only PR — skip the audit
|
|
294
|
+
if (hasInfra) return routes; // infrastructure change → full audit
|
|
136
295
|
|
|
137
296
|
// Build a flat set of lowercase slugs from app-relevant changed files
|
|
138
|
-
const fileSlugs = new Set(
|
|
139
|
-
appFiles.flatMap(f =>
|
|
140
|
-
f.toLowerCase()
|
|
141
|
-
.replace(/\.[^./\\]+$/, '')
|
|
142
|
-
.split(/[/\\._-]+/)
|
|
143
|
-
.filter(s => s.length > 1),
|
|
144
|
-
),
|
|
145
|
-
);
|
|
146
|
-
|
|
147
|
-
// Extract meaningful segments from a route path (e.g. "/checkout/review" → ["checkout","review"])
|
|
148
|
-
const routeSegments = (route) =>
|
|
149
|
-
route.path
|
|
150
|
-
.toLowerCase()
|
|
151
|
-
.split('/')
|
|
152
|
-
.map(s => s.replace(/[^a-z0-9]/g, ''))
|
|
153
|
-
.filter(s => s.length > 1);
|
|
297
|
+
const fileSlugs = new Set(appFiles.flatMap(f => [...fileSlugTokens(f)]));
|
|
154
298
|
|
|
155
299
|
const matched = routes.filter(route =>
|
|
156
|
-
|
|
300
|
+
routePathSegments(route.path).some(seg => fileSlugs.has(seg)),
|
|
157
301
|
);
|
|
158
302
|
|
|
159
303
|
// Conservative fallback: if nothing matched, audit everything
|
|
160
304
|
return matched.length > 0 ? matched : routes;
|
|
161
305
|
}
|
|
306
|
+
|
|
307
|
+
/** Normalize a route path to a single leading slash for slug-insensitive comparison. */
|
|
308
|
+
function normalizeRoutePath(p) {
|
|
309
|
+
return '/' + String(p ?? '').replace(/^\/+/, '');
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Framework-aware route mapping (PR_VALIDATOR C1) — the precise sibling of mapFilesToRoutes.
|
|
314
|
+
*
|
|
315
|
+
* When `sourceDir` points at the app's source tree, a changed COMPONENT or STYLESHEET file is
|
|
316
|
+
* mapped to ONLY the routes whose page files (transitively) import it — using the Next.js
|
|
317
|
+
* route-file convention (route-discoverer) plus the static import graph (import-graph, which
|
|
318
|
+
* tracks CSS/SCSS/Sass/Less leaves for C3) — instead of the slug heuristic's blunt all-routes
|
|
319
|
+
* fallback. A changed PAGE file maps to its own route by convention. A GLOBAL stylesheet
|
|
320
|
+
* (globals.css — INFRA_PATTERNS) keeps the conservative all-routes classification on purpose:
|
|
321
|
+
* by convention it is imported by the root layout and applies to every route, so narrowing it
|
|
322
|
+
* could miss a regression. Only NON-global stylesheets are narrowed.
|
|
323
|
+
*
|
|
324
|
+
* SAFETY (never miss a regression): this only ever NARROWS the result, and only when EVERY
|
|
325
|
+
* changed app file resolves to ≥1 concrete route that EXISTS in the caller's `routes` list.
|
|
326
|
+
* It falls back to the conservative mapFilesToRoutes (which audits ALL routes on no-match)
|
|
327
|
+
* for every ambiguity:
|
|
328
|
+
* - no `sourceDir` (opt-in — default CLI behaviour is unchanged),
|
|
329
|
+
* - not a Next.js tree (no discoverable route files),
|
|
330
|
+
* - the import-graph walk hit its file cap (possibly incomplete),
|
|
331
|
+
* - a changed file is not a node in the tree (deleted, or in a DIFFERENT monorepo package
|
|
332
|
+
* than `sourceDir`; C2 re-bases a repo-root-relative path onto a package-subdir `sourceDir`,
|
|
333
|
+
* but a foreign package shares no prefix and still falls back here),
|
|
334
|
+
* - a changed file is imported by no page (could be an alias we did not resolve),
|
|
335
|
+
* - a resolved route path is absent from the caller's `routes` list.
|
|
336
|
+
* Precision is therefore never paid for with a missed route.
|
|
337
|
+
*
|
|
338
|
+
* @param {Array<string|{ filename: string }>} changedFiles
|
|
339
|
+
* @param {Array<{ path: string, name?: string }>} routes
|
|
340
|
+
* @param {{ sourceDir?: string|null }} [options]
|
|
341
|
+
* @returns {Array<{ path: string, name?: string }>}
|
|
342
|
+
*/
|
|
343
|
+
export function mapFilesToRoutesDeep(changedFiles, routes, { sourceDir } = {}) {
|
|
344
|
+
const heuristic = () => mapFilesToRoutes(changedFiles, routes);
|
|
345
|
+
|
|
346
|
+
if (!routes || routes.length === 0) return [];
|
|
347
|
+
if (!changedFiles || changedFiles.length === 0) return routes;
|
|
348
|
+
if (!sourceDir) return heuristic(); // opt-in: no source dir → pure heuristic (unchanged)
|
|
349
|
+
|
|
350
|
+
const { fileNames, appFiles, allExcluded, hasInfra } = classifyChangedFiles(changedFiles);
|
|
351
|
+
if (fileNames.length === 0) return routes;
|
|
352
|
+
if (allExcluded) return []; // docs/CI-only PR — skip (same as the heuristic)
|
|
353
|
+
if (hasInfra) return heuristic(); // infra change → all routes (defer to the heuristic)
|
|
354
|
+
|
|
355
|
+
try {
|
|
356
|
+
const root = path.resolve(sourceDir);
|
|
357
|
+
const routeFiles = discoverNextJsRouteFiles(root);
|
|
358
|
+
if (!routeFiles || routeFiles.length === 0) return heuristic(); // not a Next.js tree
|
|
359
|
+
|
|
360
|
+
const graph = buildImportGraph(root);
|
|
361
|
+
if (graph.truncated) return heuristic(); // file cap hit → graph may be incomplete
|
|
362
|
+
|
|
363
|
+
// route file (abs) → set of route paths it defines
|
|
364
|
+
const routeFileToPaths = new Map();
|
|
365
|
+
for (const { path: rp, file } of routeFiles) {
|
|
366
|
+
if (!routeFileToPaths.has(file)) routeFileToPaths.set(file, new Set());
|
|
367
|
+
routeFileToPaths.get(file).add(rp);
|
|
368
|
+
}
|
|
369
|
+
const routeFileSet = new Set(routeFileToPaths.keys());
|
|
370
|
+
|
|
371
|
+
// Resolve every changed app file to the route paths it can affect.
|
|
372
|
+
const resolvedPaths = new Set();
|
|
373
|
+
for (const f of appFiles) {
|
|
374
|
+
let abs = path.resolve(root, f);
|
|
375
|
+
if (!graph.files.has(abs)) {
|
|
376
|
+
// C2 (monorepo): `root` may be a package subdir (".../apps/web") while `f` is
|
|
377
|
+
// repo-root-relative ("apps/web/components/Foo.tsx"). Re-base onto the package and
|
|
378
|
+
// retry — but ONLY adopt the result when it is a VERIFIED graph node, so a
|
|
379
|
+
// foreign-package file (no prefix overlap) still falls through to the conservative
|
|
380
|
+
// fallback below rather than being misattributed into this package's graph.
|
|
381
|
+
const rel = packageRelativePath(root, f);
|
|
382
|
+
if (rel) {
|
|
383
|
+
const monoAbs = path.resolve(root, rel);
|
|
384
|
+
if (graph.files.has(monoAbs)) abs = monoAbs;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
if (!graph.files.has(abs)) return heuristic(); // not a node in the tree → ambiguous
|
|
388
|
+
|
|
389
|
+
const pathsForFile = new Set();
|
|
390
|
+
const ownPaths = routeFileToPaths.get(abs); // the file IS a page
|
|
391
|
+
if (ownPaths) for (const p of ownPaths) pathsForFile.add(p);
|
|
392
|
+
for (const dep of findDependents(graph.reverse, [abs])) {
|
|
393
|
+
const depPaths = routeFileToPaths.get(dep); // a page that imports the file
|
|
394
|
+
if (depPaths) for (const p of depPaths) pathsForFile.add(p);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
if (pathsForFile.size === 0) return heuristic(); // imported by no page → ambiguous
|
|
398
|
+
for (const p of pathsForFile) resolvedPaths.add(p);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Intersect resolved route paths with the caller's routes (slug-insensitive on the
|
|
402
|
+
// leading slash). Any resolved path not in the list → we cannot confidently narrow.
|
|
403
|
+
const routesByNorm = new Map();
|
|
404
|
+
for (const r of routes) {
|
|
405
|
+
const n = normalizeRoutePath(r.path);
|
|
406
|
+
if (!routesByNorm.has(n)) routesByNorm.set(n, []);
|
|
407
|
+
routesByNorm.get(n).push(r);
|
|
408
|
+
}
|
|
409
|
+
for (const p of resolvedPaths) {
|
|
410
|
+
if (!routesByNorm.has(normalizeRoutePath(p))) return heuristic();
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
const seen = new Set();
|
|
414
|
+
const matched = [];
|
|
415
|
+
for (const p of resolvedPaths) {
|
|
416
|
+
for (const r of routesByNorm.get(normalizeRoutePath(p))) {
|
|
417
|
+
if (!seen.has(r)) { seen.add(r); matched.push(r); }
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
if (matched.length === 0) return heuristic();
|
|
421
|
+
logger.debug(`[ARGUS] C1: framework-aware mapping narrowed ${appFiles.length} file(s) to ${matched.length}/${routes.length} route(s)`);
|
|
422
|
+
return matched;
|
|
423
|
+
} catch (err) {
|
|
424
|
+
// Any unexpected failure in static analysis must never narrow the audit — fall back.
|
|
425
|
+
logger.debug(`[ARGUS] C1: framework-aware mapping failed (${err.message}) — using slug heuristic`);
|
|
426
|
+
return heuristic();
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Parse the new-file line number of the FIRST added (`+`) line in a unified-diff patch.
|
|
432
|
+
*
|
|
433
|
+
* Walks the first hunk that contains an added line, counting new-file lines from the
|
|
434
|
+
* hunk header's `+start`. Returns null when the patch is absent/binary (`patch: null`),
|
|
435
|
+
* empty, or deletion-only (no `+` line) — the caller MUST treat null as "no real line"
|
|
436
|
+
* and fall back to a route-level annotation. This function never fabricates a line: every
|
|
437
|
+
* value it returns is a genuine line present in the PR diff.
|
|
438
|
+
*
|
|
439
|
+
* @param {string|null|undefined} patch - unified-diff hunk text from fetchPrFiles
|
|
440
|
+
* @returns {number|null}
|
|
441
|
+
*/
|
|
442
|
+
export function firstAddedLine(patch) {
|
|
443
|
+
if (typeof patch !== 'string' || patch.length === 0) return null;
|
|
444
|
+
|
|
445
|
+
let newLineNo = null; // null until we are inside a hunk body
|
|
446
|
+
for (const line of patch.split('\n')) {
|
|
447
|
+
const header = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
|
|
448
|
+
if (header) { newLineNo = parseInt(header[1], 10); continue; }
|
|
449
|
+
if (newLineNo === null) continue; // pre-hunk lines (---/+++ file headers)
|
|
450
|
+
|
|
451
|
+
if (line.startsWith('+')) return newLineNo; // first added line — done
|
|
452
|
+
if (line.startsWith('-')) continue; // deletion — no new-file line consumed
|
|
453
|
+
newLineNo++; // context line advances the new-file counter
|
|
454
|
+
}
|
|
455
|
+
return null; // no added line found (deletion-only hunk)
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* Resolve a file:line annotation target for a route from the PR's changed files.
|
|
460
|
+
*
|
|
461
|
+
* Returns `{ path, line }` ONLY when a changed file maps to the route SPECIFICALLY via
|
|
462
|
+
* slug overlap (the same heuristic mapFilesToRoutes uses) AND that file carries a real
|
|
463
|
+
* added line in its patch. Returns null — so the caller emits a route-level annotation —
|
|
464
|
+
* for every ambiguous case:
|
|
465
|
+
* - the root route "/" (no segment to match specifically)
|
|
466
|
+
* - infra files (INFRA_PATTERNS → they map to ALL routes, never a specific cause)
|
|
467
|
+
* - excluded files (docs/CI metadata)
|
|
468
|
+
* - a matched file with no usable patch line (binary or deletion-only)
|
|
469
|
+
* - no slug-matching file at all
|
|
470
|
+
* When several files match, the first with a usable line wins (deterministic by PR order).
|
|
471
|
+
*
|
|
472
|
+
* This NEVER fabricates a line: the returned `line` always comes from firstAddedLine,
|
|
473
|
+
* which only yields lines genuinely present in the diff.
|
|
474
|
+
*
|
|
475
|
+
* @param {string} routePath - the audited route path (e.g. "/checkout")
|
|
476
|
+
* @param {Array<string|{ filename: string, patch?: string|null }>} prFiles - fetchPrFiles output
|
|
477
|
+
* @returns {{ path: string, line: number } | null}
|
|
478
|
+
*/
|
|
479
|
+
export function resolveAnnotationTarget(routePath, prFiles) {
|
|
480
|
+
if (!Array.isArray(prFiles) || prFiles.length === 0) return null;
|
|
481
|
+
const segs = routePathSegments(routePath);
|
|
482
|
+
if (segs.length === 0) return null; // root / unsegmented route — no specific cause
|
|
483
|
+
|
|
484
|
+
for (const file of prFiles) {
|
|
485
|
+
const filename = typeof file === 'string' ? file : file?.filename;
|
|
486
|
+
if (typeof filename !== 'string' || filename.length === 0) continue;
|
|
487
|
+
if (EXCLUDED_PATTERNS.some(re => re.test(filename))) continue;
|
|
488
|
+
if (INFRA_PATTERNS.some(re => re.test(filename))) continue; // maps to ALL routes — not specific
|
|
489
|
+
|
|
490
|
+
const slugs = fileSlugTokens(filename);
|
|
491
|
+
if (!segs.some(seg => slugs.has(seg))) continue;
|
|
492
|
+
|
|
493
|
+
const patch = typeof file === 'string' ? null : file?.patch;
|
|
494
|
+
const line = firstAddedLine(patch);
|
|
495
|
+
if (line != null) return { path: filename, line };
|
|
496
|
+
// slug-matched but no real line (binary/deletion) — keep scanning for a better file
|
|
497
|
+
}
|
|
498
|
+
return null;
|
|
499
|
+
}
|
|
@@ -141,6 +141,28 @@ function parseLocElements(xml, baseUrl) {
|
|
|
141
141
|
*/
|
|
142
142
|
export function discoverFromNextJs(sourceDir) {
|
|
143
143
|
const discovered = new Set();
|
|
144
|
+
for (const { path: routePath } of discoverNextJsRouteFiles(sourceDir)) {
|
|
145
|
+
discovered.add(routePath);
|
|
146
|
+
}
|
|
147
|
+
return [...discovered];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Like discoverFromNextJs, but returns each route paired with the absolute path of the
|
|
152
|
+
* page/route source FILE that defines it. The PR Validator's framework-aware route mapping
|
|
153
|
+
* (PR_VALIDATOR C1) uses this to map a changed file back to the route(s) it renders:
|
|
154
|
+
* directly when the changed file IS a page, or transitively (via the import graph) when a
|
|
155
|
+
* changed component is imported by a page. discoverFromNextJs is the path-only projection
|
|
156
|
+
* of this function (so the two can never disagree on which files are routes).
|
|
157
|
+
*
|
|
158
|
+
* `file` is whatever absolute/relative form `path.join(sourceDir, …)` yields — callers that
|
|
159
|
+
* compare it against an import graph must build that graph from the SAME sourceDir.
|
|
160
|
+
*
|
|
161
|
+
* @param {string} sourceDir project root (contains pages/ and/or app/)
|
|
162
|
+
* @returns {Array<{ path: string, file: string }>}
|
|
163
|
+
*/
|
|
164
|
+
export function discoverNextJsRouteFiles(sourceDir) {
|
|
165
|
+
const routeFiles = [];
|
|
144
166
|
|
|
145
167
|
// ── pages/ ────────────────────────────────────────────────────────────────
|
|
146
168
|
const pagesDir = path.join(sourceDir, 'pages');
|
|
@@ -162,7 +184,7 @@ export function discoverFromNextJs(sourceDir) {
|
|
|
162
184
|
// Skip dynamic segments like [slug] — no concrete URL to crawl
|
|
163
185
|
if (urlParts.some(p => p.includes('['))) continue;
|
|
164
186
|
|
|
165
|
-
|
|
187
|
+
routeFiles.push({ path: urlParts.length === 0 ? '/' : '/' + urlParts.join('/'), file });
|
|
166
188
|
}
|
|
167
189
|
}
|
|
168
190
|
|
|
@@ -183,11 +205,11 @@ export function discoverFromNextJs(sourceDir) {
|
|
|
183
205
|
// Skip dynamic segments like [id] — no concrete URL to crawl
|
|
184
206
|
if (filtered.some(p => p.includes('['))) continue;
|
|
185
207
|
|
|
186
|
-
|
|
208
|
+
routeFiles.push({ path: filtered.length === 0 ? '/' : '/' + filtered.join('/'), file });
|
|
187
209
|
}
|
|
188
210
|
}
|
|
189
211
|
|
|
190
|
-
return
|
|
212
|
+
return routeFiles;
|
|
191
213
|
}
|
|
192
214
|
|
|
193
215
|
// ── C3.3: React Router route discovery ───────────────────────────────────────
|