argusqa-os 9.7.6 → 9.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,32 @@
2
2
  * PR Diff Analyzer — maps GitHub PR changed files to affected Argus routes.
3
3
  *
4
4
  * parsePrUrl(prUrl) → { owner, repo, prNumber }
5
- * fetchPrFiles(prUrl, token) → string[] of changed file paths
5
+ * fetchPrFiles(prUrl, token) → Array<{ filename, status, patch }> for changed files
6
6
  * mapFilesToRoutes(files, routes) → Route[] subset likely affected by the diff
7
+ * (accepts string[] OR the fetchPrFiles object[] above)
8
+ * mapFilesToRoutesDeep(files, routes, { sourceDir })
9
+ * → framework-aware Route[] (C1): maps a changed component to
10
+ * only the routes whose page files import it via the static
11
+ * import graph; falls back to mapFilesToRoutes on any
12
+ * ambiguity (never narrows away a possible regression).
13
+ * Monorepo-aware (C2): when sourceDir points at a workspace
14
+ * package subdir (apps/web, packages/ui, …) and GitHub
15
+ * returns repo-root-relative paths, the workspace prefix is
16
+ * stripped so the file resolves into the package graph.
17
+ * Stylesheet-aware (C3): a changed non-global stylesheet
18
+ * (CSS/SCSS/Sass/Less) attributes to only the routes that
19
+ * import it via the same graph; global stylesheets
20
+ * (globals.css, …) keep the conservative all-routes fallback.
21
+ * stripWorkspacePrefix(filename) → drop a leading monorepo workspace prefix
22
+ * ("apps/<pkg>/", "packages/<pkg>/", …) so workspace dirs
23
+ * don't pollute slug matching (C2)
24
+ * packageRelativePath(root, file) → repo-root-relative file path re-based onto a package
25
+ * subdir `root`, or null when there is no prefix overlap (C2)
26
+ * firstAddedLine(patch) → new-file line number of the first added (+) line, or null
27
+ * resolveAnnotationTarget(route, prFiles)
28
+ * → { path, line } when a changed file SPECIFICALLY maps to
29
+ * the route AND has a real patch line; null otherwise
30
+ * (Phase A3 file:line annotations — never fabricates a line)
7
31
  *
8
32
  * Pure functions + one async fetch — no Chrome, no MCP, no AI verdict.
9
33
  * AI verdict logic ships separately in the private argus-pro repo.
@@ -13,7 +37,11 @@
13
37
  * entry point (src/cli/pr-validate.js), which owns stdout.
14
38
  */
15
39
 
40
+ import path from 'path';
16
41
  import { childLogger } from './logger.js';
42
+ import { discoverNextJsRouteFiles } from './route-discoverer.js';
43
+ import { buildImportGraph, findDependents } from './import-graph.js';
44
+ import { githubFetch } from './github-api.js';
17
45
 
18
46
  const logger = childLogger('pr-diff-analyzer');
19
47
 
@@ -36,11 +64,23 @@ export function parsePrUrl(prUrl) {
36
64
  }
37
65
 
38
66
  /**
39
- * Fetch the list of file paths changed by a GitHub pull request (up to 100 files).
67
+ * Fetch the files changed by a GitHub pull request (up to 300 — 3 pages × 100).
68
+ *
69
+ * Returns the per-file `status` (added | modified | removed | renamed | …) and the
70
+ * unified-diff `patch` hunk text for each file, in addition to the filename. `status`
71
+ * + `patch` are consumed downstream by the PR Validator's line-level annotations
72
+ * (file:line from the patch hunks) and component→route mapping; callers that only need
73
+ * filenames derive them with `files.map(f => f.filename)`.
74
+ *
75
+ * GitHub omits `patch` for binary files and files whose diff is too large to inline —
76
+ * those come back as `patch: null` (never `undefined`).
77
+ *
78
+ * The REST host is `process.env.GITHUB_API_URL` (GitHub Actions sets this; GHES points it
79
+ * at the enterprise host) and defaults to `https://api.github.com` when unset.
40
80
  *
41
81
  * @param {string} prUrl - GitHub PR URL (any format accepted by parsePrUrl)
42
82
  * @param {string} [githubToken] - GitHub token; omit for public repos
43
- * @returns {Promise<string[]>} - Changed file paths relative to the repo root
83
+ * @returns {Promise<Array<{ filename: string, status: string, patch: string|null }>>}
44
84
  */
45
85
  export async function fetchPrFiles(prUrl, githubToken) {
46
86
  const { owner, repo, prNumber } = parsePrUrl(prUrl);
@@ -51,18 +91,33 @@ export async function fetchPrFiles(prUrl, githubToken) {
51
91
  ...(githubToken ? { Authorization: `Bearer ${githubToken}` } : {}),
52
92
  };
53
93
 
94
+ // REST API base host. Defaults to the public github.com API; honours GITHUB_API_URL —
95
+ // the env var GitHub Actions injects automatically (and that GitHub Enterprise Server
96
+ // sets to the enterprise API host), so the validator works on GHES unchanged and the
97
+ // CLI entry point is hermetically testable against a recorded fixture server. Unset →
98
+ // 'https://api.github.com' → byte-identical to before. Trailing slash trimmed so the
99
+ // path concatenation below never doubles a '/'.
100
+ const apiBase = String(process.env.GITHUB_API_URL || 'https://api.github.com').replace(/\/+$/, '');
101
+
54
102
  const allFiles = [];
55
103
  const MAX_PAGES = 3; // caps at 300 files; avoids runaway requests on mega-PRs
56
104
 
57
105
  for (let page = 1; page <= MAX_PAGES; page++) {
58
- const apiUrl = `https://api.github.com/repos/${owner}/${repo}/pulls/${prNumber}/files?per_page=100&page=${page}`;
59
- const res = await fetch(apiUrl, { headers });
60
- if (!res.ok) {
61
- const body = await res.text().catch(() => '');
62
- throw new Error(`GitHub API ${res.status}: ${body || res.statusText}`);
63
- }
106
+ const apiUrl = `${apiBase}/repos/${owner}/${repo}/pulls/${prNumber}/files?per_page=100&page=${page}`;
107
+ // E2: resilient retries a rate-limit (403 primary / 429 secondary) + transient
108
+ // 5xx with backoff, and throws a structured, secret-free error on 404/422/etc.
109
+ // (the CLI surfaces err.message into a ::error:: annotation, so it must never leak
110
+ // the token). githubFetch returns the OK Response; this loop owns the pagination.
111
+ const res = await githubFetch(apiUrl, {
112
+ headers,
113
+ context: `GET repos/${owner}/${repo}/pulls/${prNumber}/files (page ${page})`,
114
+ });
64
115
  const files = await res.json();
65
- allFiles.push(...files.map(f => f.filename));
116
+ allFiles.push(...files.map(f => ({
117
+ filename: f.filename,
118
+ status: f.status,
119
+ patch: f.patch ?? null,
120
+ })));
66
121
  if (files.length < 100) break; // last page reached
67
122
  }
68
123
 
@@ -104,6 +159,116 @@ export const INFRA_PATTERNS = [
104
159
  /package\.json$/i,
105
160
  ];
106
161
 
162
+ /**
163
+ * Recognized monorepo workspace roots. A file under one of these (e.g.
164
+ * "apps/web/components/Foo.tsx", "packages/ui/src/Button.tsx") carries TWO leading segments —
165
+ * the workspace-root literal and the package name — that describe WHICH package the file
166
+ * lives in, not which app ROUTE it affects. (PR_VALIDATOR C2 — monorepo path awareness.)
167
+ */
168
+ const WORKSPACE_ROOTS = new Set(['apps', 'packages', 'libs', 'services']);
169
+
170
+ /**
171
+ * Strip a leading monorepo workspace prefix ("<root>/<package>/") from a repo-relative path,
172
+ * returning the package-relative remainder, so the workspace-root literal and the package
173
+ * name don't pollute slug tokenization (e.g. "apps/web/checkout.tsx" → "checkout.tsx", matched
174
+ * on "checkout" not the workspace dirs "apps"/"web"). Returns the path unchanged when it is not
175
+ * under a recognized workspace root, or when stripping would leave no remaining segment. Pure
176
+ * string op. (PR_VALIDATOR C2.)
177
+ *
178
+ * @param {string} filename - a repo-root-relative path (forward or back slashes)
179
+ * @returns {string}
180
+ */
181
+ export function stripWorkspacePrefix(filename) {
182
+ const parts = String(filename).split(/[/\\]+/).filter(Boolean);
183
+ if (parts.length >= 3 && WORKSPACE_ROOTS.has(parts[0].toLowerCase())) {
184
+ return parts.slice(2).join('/');
185
+ }
186
+ return String(filename);
187
+ }
188
+
189
+ /**
190
+ * Re-base a repo-root-relative file path onto a package-subdir `root`.
191
+ *
192
+ * The PR Validator's `sourceDir` may point at a monorepo PACKAGE subdir (e.g. ".../apps/web"),
193
+ * but GitHub returns changed-file paths relative to the REPO root ("apps/web/components/Foo.tsx").
194
+ * A naive path.resolve(root, file) then double-counts the workspace prefix and misses the file.
195
+ * This returns the file path relative to the package by stripping the longest leading run of the
196
+ * file's segments that equals `root`'s trailing segments (e.g. root ".../apps/web" + file
197
+ * "apps/web/components/Foo.tsx" → "components/Foo.tsx"). Returns null when there is no such overlap
198
+ * — the file is already package-relative, or belongs to a DIFFERENT package, in which case the
199
+ * caller's direct resolve + conservative fallback handle it (a foreign-package file is never
200
+ * misattributed into this package's graph). Comparison is case-insensitive (Windows/macOS).
201
+ * Always leaves ≥1 remaining segment. Pure string op. (PR_VALIDATOR C2.)
202
+ *
203
+ * @param {string} root - absolute path to the package subdir (sourceDir)
204
+ * @param {string} file - repo-root-relative changed-file path
205
+ * @returns {string|null}
206
+ */
207
+ export function packageRelativePath(root, file) {
208
+ const rootSegs = String(root).split(/[/\\]+/).filter(Boolean);
209
+ const fileSegs = String(file).split(/[/\\]+/).filter(Boolean);
210
+ const maxK = Math.min(rootSegs.length, fileSegs.length - 1); // leave ≥1 remainder
211
+ let best = 0;
212
+ for (let k = 1; k <= maxK; k++) {
213
+ const tail = rootSegs.slice(rootSegs.length - k);
214
+ if (tail.every((s, i) => s.toLowerCase() === fileSegs[i].toLowerCase())) best = k;
215
+ }
216
+ return best > 0 ? fileSegs.slice(best).join('/') : null;
217
+ }
218
+
219
+ /**
220
+ * Lowercase slug tokens extracted from a file path (e.g. "src/pages/checkout.tsx"
221
+ * → {"src","pages","checkout"}), with any monorepo workspace prefix stripped first (C2) so
222
+ * workspace dirs don't pollute matching. Shared by mapFilesToRoutes (route matching) and
223
+ * resolveAnnotationTarget (annotation file matching) so the two cannot drift.
224
+ */
225
+ function fileSlugTokens(filename) {
226
+ return new Set(
227
+ stripWorkspacePrefix(filename)
228
+ .toLowerCase()
229
+ .replace(/\.[^./\\]+$/, '')
230
+ .split(/[/\\._-]+/)
231
+ .filter(s => s.length > 1),
232
+ );
233
+ }
234
+
235
+ /**
236
+ * Meaningful lowercase segments of a route path
237
+ * (e.g. "/checkout/review" → ["checkout","review"]). The root path "/" yields [].
238
+ */
239
+ function routePathSegments(routePath) {
240
+ return String(routePath ?? '')
241
+ .toLowerCase()
242
+ .split('/')
243
+ .map(s => s.replace(/[^a-z0-9]/g, ''))
244
+ .filter(s => s.length > 1);
245
+ }
246
+
247
+ /**
248
+ * Normalize + classify changed files into the buckets every route mapper shares, so the
249
+ * slug heuristic (mapFilesToRoutes) and the framework-aware mapper (mapFilesToRoutesDeep)
250
+ * apply the SAME excluded/infra short-circuits and cannot drift.
251
+ *
252
+ * @param {Array<string|{ filename: string }>} changedFiles
253
+ * @returns {{ fileNames: string[], appFiles: string[], allExcluded: boolean, hasInfra: boolean }}
254
+ * - fileNames: every changed filename (extracted from string OR object, empties dropped)
255
+ * - appFiles: fileNames minus EXCLUDED_PATTERNS (docs / CI / repo metadata)
256
+ * - allExcluded: there were files but ALL were excluded → audit should be skipped ([])
257
+ * - hasInfra: an appFile matches INFRA_PATTERNS → blast radius is ALL routes
258
+ */
259
+ function classifyChangedFiles(changedFiles) {
260
+ const fileNames = (Array.isArray(changedFiles) ? changedFiles : [])
261
+ .map(f => (typeof f === 'string' ? f : f?.filename))
262
+ .filter(f => typeof f === 'string' && f.length > 0);
263
+ const appFiles = fileNames.filter(f => !EXCLUDED_PATTERNS.some(re => re.test(f)));
264
+ return {
265
+ fileNames,
266
+ appFiles,
267
+ allExcluded: fileNames.length > 0 && appFiles.length === 0,
268
+ hasInfra: appFiles.some(f => INFRA_PATTERNS.some(re => re.test(f))),
269
+ };
270
+ }
271
+
107
272
  /**
108
273
  * Map a list of changed file paths to the subset of Argus route configs that
109
274
  * are likely affected, using heuristic slug matching.
@@ -113,7 +278,9 @@ export const INFRA_PATTERNS = [
113
278
  * 2. File path contains a slug that matches a route path segment → include that route
114
279
  * 3. No matches → return ALL routes (conservative fallback — never miss a regression)
115
280
  *
116
- * @param {string[]} changedFiles - Relative file paths from fetchPrFiles
281
+ * @param {Array<string|{ filename: string }>} changedFiles - Filenames from fetchPrFiles;
282
+ * accepts either bare path strings or the `{ filename, status, patch }` objects that
283
+ * fetchPrFiles now returns (the filename is extracted from each).
117
284
  * @param {Array<{ path: string, name: string }>} routes - Route configs from targets.js
118
285
  * @returns {Array<{ path: string, name: string }>}
119
286
  */
@@ -121,41 +288,212 @@ export function mapFilesToRoutes(changedFiles, routes) {
121
288
  if (!routes || routes.length === 0) return [];
122
289
  if (!changedFiles || changedFiles.length === 0) return routes;
123
290
 
124
- // Strip files that are never app-route-relevant (CI configs, docs, repo metadata)
125
- const appFiles = changedFiles.filter(
126
- f => !EXCLUDED_PATTERNS.some(re => re.test(f)),
127
- );
128
-
129
- // README-only / CI-only PR — skip the audit entirely
130
- if (appFiles.length === 0) return [];
131
-
132
- // Infrastructure change → full audit
133
- if (appFiles.some(f => INFRA_PATTERNS.some(re => re.test(f)))) {
134
- return routes;
135
- }
291
+ const { fileNames, appFiles, allExcluded, hasInfra } = classifyChangedFiles(changedFiles);
292
+ if (fileNames.length === 0) return routes; // nothing usable → conservative
293
+ if (allExcluded) return []; // README-only / CI-only PR — skip the audit
294
+ if (hasInfra) return routes; // infrastructure change → full audit
136
295
 
137
296
  // Build a flat set of lowercase slugs from app-relevant changed files
138
- const fileSlugs = new Set(
139
- appFiles.flatMap(f =>
140
- f.toLowerCase()
141
- .replace(/\.[^./\\]+$/, '')
142
- .split(/[/\\._-]+/)
143
- .filter(s => s.length > 1),
144
- ),
145
- );
146
-
147
- // Extract meaningful segments from a route path (e.g. "/checkout/review" → ["checkout","review"])
148
- const routeSegments = (route) =>
149
- route.path
150
- .toLowerCase()
151
- .split('/')
152
- .map(s => s.replace(/[^a-z0-9]/g, ''))
153
- .filter(s => s.length > 1);
297
+ const fileSlugs = new Set(appFiles.flatMap(f => [...fileSlugTokens(f)]));
154
298
 
155
299
  const matched = routes.filter(route =>
156
- routeSegments(route).some(seg => fileSlugs.has(seg)),
300
+ routePathSegments(route.path).some(seg => fileSlugs.has(seg)),
157
301
  );
158
302
 
159
303
  // Conservative fallback: if nothing matched, audit everything
160
304
  return matched.length > 0 ? matched : routes;
161
305
  }
306
+
307
+ /** Normalize a route path to a single leading slash for slug-insensitive comparison. */
308
+ function normalizeRoutePath(p) {
309
+ return '/' + String(p ?? '').replace(/^\/+/, '');
310
+ }
311
+
312
+ /**
313
+ * Framework-aware route mapping (PR_VALIDATOR C1) — the precise sibling of mapFilesToRoutes.
314
+ *
315
+ * When `sourceDir` points at the app's source tree, a changed COMPONENT or STYLESHEET file is
316
+ * mapped to ONLY the routes whose page files (transitively) import it — using the Next.js
317
+ * route-file convention (route-discoverer) plus the static import graph (import-graph, which
318
+ * tracks CSS/SCSS/Sass/Less leaves for C3) — instead of the slug heuristic's blunt all-routes
319
+ * fallback. A changed PAGE file maps to its own route by convention. A GLOBAL stylesheet
320
+ * (globals.css — INFRA_PATTERNS) keeps the conservative all-routes classification on purpose:
321
+ * by convention it is imported by the root layout and applies to every route, so narrowing it
322
+ * could miss a regression. Only NON-global stylesheets are narrowed.
323
+ *
324
+ * SAFETY (never miss a regression): this only ever NARROWS the result, and only when EVERY
325
+ * changed app file resolves to ≥1 concrete route that EXISTS in the caller's `routes` list.
326
+ * It falls back to the conservative mapFilesToRoutes (which audits ALL routes on no-match)
327
+ * for every ambiguity:
328
+ * - no `sourceDir` (opt-in — default CLI behaviour is unchanged),
329
+ * - not a Next.js tree (no discoverable route files),
330
+ * - the import-graph walk hit its file cap (possibly incomplete),
331
+ * - a changed file is not a node in the tree (deleted, or in a DIFFERENT monorepo package
332
+ * than `sourceDir`; C2 re-bases a repo-root-relative path onto a package-subdir `sourceDir`,
333
+ * but a foreign package shares no prefix and still falls back here),
334
+ * - a changed file is imported by no page (could be an alias we did not resolve),
335
+ * - a resolved route path is absent from the caller's `routes` list.
336
+ * Precision is therefore never paid for with a missed route.
337
+ *
338
+ * @param {Array<string|{ filename: string }>} changedFiles
339
+ * @param {Array<{ path: string, name?: string }>} routes
340
+ * @param {{ sourceDir?: string|null }} [options]
341
+ * @returns {Array<{ path: string, name?: string }>}
342
+ */
343
+ export function mapFilesToRoutesDeep(changedFiles, routes, { sourceDir } = {}) {
344
+ const heuristic = () => mapFilesToRoutes(changedFiles, routes);
345
+
346
+ if (!routes || routes.length === 0) return [];
347
+ if (!changedFiles || changedFiles.length === 0) return routes;
348
+ if (!sourceDir) return heuristic(); // opt-in: no source dir → pure heuristic (unchanged)
349
+
350
+ const { fileNames, appFiles, allExcluded, hasInfra } = classifyChangedFiles(changedFiles);
351
+ if (fileNames.length === 0) return routes;
352
+ if (allExcluded) return []; // docs/CI-only PR — skip (same as the heuristic)
353
+ if (hasInfra) return heuristic(); // infra change → all routes (defer to the heuristic)
354
+
355
+ try {
356
+ const root = path.resolve(sourceDir);
357
+ const routeFiles = discoverNextJsRouteFiles(root);
358
+ if (!routeFiles || routeFiles.length === 0) return heuristic(); // not a Next.js tree
359
+
360
+ const graph = buildImportGraph(root);
361
+ if (graph.truncated) return heuristic(); // file cap hit → graph may be incomplete
362
+
363
+ // route file (abs) → set of route paths it defines
364
+ const routeFileToPaths = new Map();
365
+ for (const { path: rp, file } of routeFiles) {
366
+ if (!routeFileToPaths.has(file)) routeFileToPaths.set(file, new Set());
367
+ routeFileToPaths.get(file).add(rp);
368
+ }
369
+ const routeFileSet = new Set(routeFileToPaths.keys());
370
+
371
+ // Resolve every changed app file to the route paths it can affect.
372
+ const resolvedPaths = new Set();
373
+ for (const f of appFiles) {
374
+ let abs = path.resolve(root, f);
375
+ if (!graph.files.has(abs)) {
376
+ // C2 (monorepo): `root` may be a package subdir (".../apps/web") while `f` is
377
+ // repo-root-relative ("apps/web/components/Foo.tsx"). Re-base onto the package and
378
+ // retry — but ONLY adopt the result when it is a VERIFIED graph node, so a
379
+ // foreign-package file (no prefix overlap) still falls through to the conservative
380
+ // fallback below rather than being misattributed into this package's graph.
381
+ const rel = packageRelativePath(root, f);
382
+ if (rel) {
383
+ const monoAbs = path.resolve(root, rel);
384
+ if (graph.files.has(monoAbs)) abs = monoAbs;
385
+ }
386
+ }
387
+ if (!graph.files.has(abs)) return heuristic(); // not a node in the tree → ambiguous
388
+
389
+ const pathsForFile = new Set();
390
+ const ownPaths = routeFileToPaths.get(abs); // the file IS a page
391
+ if (ownPaths) for (const p of ownPaths) pathsForFile.add(p);
392
+ for (const dep of findDependents(graph.reverse, [abs])) {
393
+ const depPaths = routeFileToPaths.get(dep); // a page that imports the file
394
+ if (depPaths) for (const p of depPaths) pathsForFile.add(p);
395
+ }
396
+
397
+ if (pathsForFile.size === 0) return heuristic(); // imported by no page → ambiguous
398
+ for (const p of pathsForFile) resolvedPaths.add(p);
399
+ }
400
+
401
+ // Intersect resolved route paths with the caller's routes (slug-insensitive on the
402
+ // leading slash). Any resolved path not in the list → we cannot confidently narrow.
403
+ const routesByNorm = new Map();
404
+ for (const r of routes) {
405
+ const n = normalizeRoutePath(r.path);
406
+ if (!routesByNorm.has(n)) routesByNorm.set(n, []);
407
+ routesByNorm.get(n).push(r);
408
+ }
409
+ for (const p of resolvedPaths) {
410
+ if (!routesByNorm.has(normalizeRoutePath(p))) return heuristic();
411
+ }
412
+
413
+ const seen = new Set();
414
+ const matched = [];
415
+ for (const p of resolvedPaths) {
416
+ for (const r of routesByNorm.get(normalizeRoutePath(p))) {
417
+ if (!seen.has(r)) { seen.add(r); matched.push(r); }
418
+ }
419
+ }
420
+ if (matched.length === 0) return heuristic();
421
+ logger.debug(`[ARGUS] C1: framework-aware mapping narrowed ${appFiles.length} file(s) to ${matched.length}/${routes.length} route(s)`);
422
+ return matched;
423
+ } catch (err) {
424
+ // Any unexpected failure in static analysis must never narrow the audit — fall back.
425
+ logger.debug(`[ARGUS] C1: framework-aware mapping failed (${err.message}) — using slug heuristic`);
426
+ return heuristic();
427
+ }
428
+ }
429
+
430
+ /**
431
+ * Parse the new-file line number of the FIRST added (`+`) line in a unified-diff patch.
432
+ *
433
+ * Walks the first hunk that contains an added line, counting new-file lines from the
434
+ * hunk header's `+start`. Returns null when the patch is absent/binary (`patch: null`),
435
+ * empty, or deletion-only (no `+` line) — the caller MUST treat null as "no real line"
436
+ * and fall back to a route-level annotation. This function never fabricates a line: every
437
+ * value it returns is a genuine line present in the PR diff.
438
+ *
439
+ * @param {string|null|undefined} patch - unified-diff hunk text from fetchPrFiles
440
+ * @returns {number|null}
441
+ */
442
+ export function firstAddedLine(patch) {
443
+ if (typeof patch !== 'string' || patch.length === 0) return null;
444
+
445
+ let newLineNo = null; // null until we are inside a hunk body
446
+ for (const line of patch.split('\n')) {
447
+ const header = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
448
+ if (header) { newLineNo = parseInt(header[1], 10); continue; }
449
+ if (newLineNo === null) continue; // pre-hunk lines (---/+++ file headers)
450
+
451
+ if (line.startsWith('+')) return newLineNo; // first added line — done
452
+ if (line.startsWith('-')) continue; // deletion — no new-file line consumed
453
+ newLineNo++; // context line advances the new-file counter
454
+ }
455
+ return null; // no added line found (deletion-only hunk)
456
+ }
457
+
458
+ /**
459
+ * Resolve a file:line annotation target for a route from the PR's changed files.
460
+ *
461
+ * Returns `{ path, line }` ONLY when a changed file maps to the route SPECIFICALLY via
462
+ * slug overlap (the same heuristic mapFilesToRoutes uses) AND that file carries a real
463
+ * added line in its patch. Returns null — so the caller emits a route-level annotation —
464
+ * for every ambiguous case:
465
+ * - the root route "/" (no segment to match specifically)
466
+ * - infra files (INFRA_PATTERNS → they map to ALL routes, never a specific cause)
467
+ * - excluded files (docs/CI metadata)
468
+ * - a matched file with no usable patch line (binary or deletion-only)
469
+ * - no slug-matching file at all
470
+ * When several files match, the first with a usable line wins (deterministic by PR order).
471
+ *
472
+ * This NEVER fabricates a line: the returned `line` always comes from firstAddedLine,
473
+ * which only yields lines genuinely present in the diff.
474
+ *
475
+ * @param {string} routePath - the audited route path (e.g. "/checkout")
476
+ * @param {Array<string|{ filename: string, patch?: string|null }>} prFiles - fetchPrFiles output
477
+ * @returns {{ path: string, line: number } | null}
478
+ */
479
+ export function resolveAnnotationTarget(routePath, prFiles) {
480
+ if (!Array.isArray(prFiles) || prFiles.length === 0) return null;
481
+ const segs = routePathSegments(routePath);
482
+ if (segs.length === 0) return null; // root / unsegmented route — no specific cause
483
+
484
+ for (const file of prFiles) {
485
+ const filename = typeof file === 'string' ? file : file?.filename;
486
+ if (typeof filename !== 'string' || filename.length === 0) continue;
487
+ if (EXCLUDED_PATTERNS.some(re => re.test(filename))) continue;
488
+ if (INFRA_PATTERNS.some(re => re.test(filename))) continue; // maps to ALL routes — not specific
489
+
490
+ const slugs = fileSlugTokens(filename);
491
+ if (!segs.some(seg => slugs.has(seg))) continue;
492
+
493
+ const patch = typeof file === 'string' ? null : file?.patch;
494
+ const line = firstAddedLine(patch);
495
+ if (line != null) return { path: filename, line };
496
+ // slug-matched but no real line (binary/deletion) — keep scanning for a better file
497
+ }
498
+ return null;
499
+ }
@@ -141,6 +141,28 @@ function parseLocElements(xml, baseUrl) {
141
141
  */
142
142
  export function discoverFromNextJs(sourceDir) {
143
143
  const discovered = new Set();
144
+ for (const { path: routePath } of discoverNextJsRouteFiles(sourceDir)) {
145
+ discovered.add(routePath);
146
+ }
147
+ return [...discovered];
148
+ }
149
+
150
+ /**
151
+ * Like discoverFromNextJs, but returns each route paired with the absolute path of the
152
+ * page/route source FILE that defines it. The PR Validator's framework-aware route mapping
153
+ * (PR_VALIDATOR C1) uses this to map a changed file back to the route(s) it renders:
154
+ * directly when the changed file IS a page, or transitively (via the import graph) when a
155
+ * changed component is imported by a page. discoverFromNextJs is the path-only projection
156
+ * of this function (so the two can never disagree on which files are routes).
157
+ *
158
+ * `file` is whatever absolute/relative form `path.join(sourceDir, …)` yields — callers that
159
+ * compare it against an import graph must build that graph from the SAME sourceDir.
160
+ *
161
+ * @param {string} sourceDir project root (contains pages/ and/or app/)
162
+ * @returns {Array<{ path: string, file: string }>}
163
+ */
164
+ export function discoverNextJsRouteFiles(sourceDir) {
165
+ const routeFiles = [];
144
166
 
145
167
  // ── pages/ ────────────────────────────────────────────────────────────────
146
168
  const pagesDir = path.join(sourceDir, 'pages');
@@ -162,7 +184,7 @@ export function discoverFromNextJs(sourceDir) {
162
184
  // Skip dynamic segments like [slug] — no concrete URL to crawl
163
185
  if (urlParts.some(p => p.includes('['))) continue;
164
186
 
165
- discovered.add(urlParts.length === 0 ? '/' : '/' + urlParts.join('/'));
187
+ routeFiles.push({ path: urlParts.length === 0 ? '/' : '/' + urlParts.join('/'), file });
166
188
  }
167
189
  }
168
190
 
@@ -183,11 +205,11 @@ export function discoverFromNextJs(sourceDir) {
183
205
  // Skip dynamic segments like [id] — no concrete URL to crawl
184
206
  if (filtered.some(p => p.includes('['))) continue;
185
207
 
186
- discovered.add(filtered.length === 0 ? '/' : '/' + filtered.join('/'));
208
+ routeFiles.push({ path: filtered.length === 0 ? '/' : '/' + filtered.join('/'), file });
187
209
  }
188
210
  }
189
211
 
190
- return [...discovered];
212
+ return routeFiles;
191
213
  }
192
214
 
193
215
  // ── C3.3: React Router route discovery ───────────────────────────────────────