code-review-forge 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. code_forge/__init__.py +14 -0
  2. code_forge/__main__.py +8 -0
  3. code_forge/autofix.py +78 -0
  4. code_forge/baseline.py +216 -0
  5. code_forge/cli.py +983 -0
  6. code_forge/delta.py +65 -0
  7. code_forge/diagnose.py +109 -0
  8. code_forge/diff.py +82 -0
  9. code_forge/disposition.py +32 -0
  10. code_forge/e2e_check.py +641 -0
  11. code_forge/env_resolver.py +91 -0
  12. code_forge/errors.py +34 -0
  13. code_forge/exit_codes.py +37 -0
  14. code_forge/factories.py +191 -0
  15. code_forge/falsify.py +85 -0
  16. code_forge/gate_check.py +466 -0
  17. code_forge/git.py +351 -0
  18. code_forge/hold.py +126 -0
  19. code_forge/install_hooks.py +331 -0
  20. code_forge/lock.py +162 -0
  21. code_forge/machine.py +792 -0
  22. code_forge/mode_resolver.py +60 -0
  23. code_forge/mutation.py +380 -0
  24. code_forge/parsers/__init__.py +56 -0
  25. code_forge/parsers/_sarif.py +77 -0
  26. code_forge/parsers/base.py +65 -0
  27. code_forge/parsers/checkpatch.py +66 -0
  28. code_forge/parsers/clippy.py +85 -0
  29. code_forge/parsers/non_ascii.py +47 -0
  30. code_forge/parsers/ruff.py +18 -0
  31. code_forge/parsers/semgrep.py +18 -0
  32. code_forge/parsers/shellcheck.py +56 -0
  33. code_forge/registry.py +153 -0
  34. code_forge/reporter.py +133 -0
  35. code_forge/runner.py +205 -0
  36. code_forge/sarif.py +226 -0
  37. code_forge/skills/adversarial-qe/SKILL.md +272 -0
  38. code_forge/skills/code-forge/SKILL.md +1193 -0
  39. code_forge/skills/code-review-expert/SKILL.md +162 -0
  40. code_forge/skills/code-review-expert/references/code-quality-checklist.md +130 -0
  41. code_forge/skills/code-review-expert/references/removal-plan.md +52 -0
  42. code_forge/skills/code-review-expert/references/security-checklist.md +118 -0
  43. code_forge/skills/code-review-expert/references/solid-checklist.md +65 -0
  44. code_forge/skills/kernel-fp-verify/SKILL.md +101 -0
  45. code_forge/skills/qodo-review/SKILL.md +135 -0
  46. code_forge/skills/smoke-test/SKILL.md +253 -0
  47. code_forge/skills/smoke-test/references/boundary-cases.md +114 -0
  48. code_forge/skills/smoke-test/references/concurrency-patterns.md +306 -0
  49. code_forge/skills/smoke-test/references/injection-payloads.md +124 -0
  50. code_forge/skills/smoke-test/test-library/shell/README.md +271 -0
  51. code_forge/skills/smoke-test/test-library/shell/primitives.sh +352 -0
  52. code_forge/skills/smoke-test/test-library/shell/primitives_test.sh +324 -0
  53. code_forge/snapshot.py +196 -0
  54. code_forge/source.py +64 -0
  55. code_forge/state.py +246 -0
  56. code_forge/verdict.py +43 -0
  57. code_review_forge-2.0.0a1.dist-info/METADATA +237 -0
  58. code_review_forge-2.0.0a1.dist-info/RECORD +62 -0
  59. code_review_forge-2.0.0a1.dist-info/WHEEL +5 -0
  60. code_review_forge-2.0.0a1.dist-info/entry_points.txt +2 -0
  61. code_review_forge-2.0.0a1.dist-info/licenses/LICENSE +179 -0
  62. code_review_forge-2.0.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,641 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright (c) 2026, Minxi Hou <houminxi@gmail.com>
3
+ """E2E coverage heuristic for forge (R3).
4
+
5
+ Layer 1 (heuristic, no config): diff touches >=2 source groups AND modifies
6
+ a function signature/return type -> non-blocking checklist finding.
7
+ Layer 2 (explicit, opt-in): .code-forge/components.yaml defines components, hubs,
8
+ data paths, and e2e artifact patterns. Co-occurrence trigger -> P2 finding.
9
+
10
+ No subprocess or git calls. diff_text is provided by caller via git module.
11
+ Uses unidiff directly (diff.py does not expose Hunk.section_header).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import hashlib
17
+ import re
18
+ from fnmatch import fnmatch
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ import unidiff
23
+ import yaml
24
+
25
+ from .diff import get_changed_files
26
+ from .disposition import Disposition
27
+ from .errors import ComponentsConfigError
28
+ from .state import StateFinding
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Signature-detection patterns (Python + shell; C detection not implemented).
32
+ # Compiled once at module level to avoid per-call overhead.
33
+ # ---------------------------------------------------------------------------
34
+
35
+ # Python: matches "def foo(" or "async def foo(" lines (added lines).
36
+ _PY_DEF_RE = re.compile(
37
+ r"^\s*(async\s+)?def\s+[A-Za-z_]\w*\s*\("
38
+ )
39
+
40
+ # Python: matches a return-type annotation "-> <type> :" at end of line.
41
+ _PY_RETURN_RE = re.compile(
42
+ r"->\s*\S+.*:\s*$"
43
+ )
44
+
45
+ # Shell: matches a function definition line.
46
+ _SH_FUNC_RE = re.compile(
47
+ r"^\s*(function\s+)?[A-Za-z_]\w*\s*\(\s*\)\s*\{?\s*$"
48
+ )
49
+
50
+ # Arm 2: matches a def/function pattern inside a section_header string.
51
+ # git emits section_header such as "def parse(self, ..." or "foo() {".
52
+ SECTION_HEADER_DEF_RE = re.compile(
53
+ r"(?:(?:async\s+)?def\s+[A-Za-z_]\w*\s*\(|"
54
+ r"(?:function\s+)?[A-Za-z_]\w*\s*\(\s*\)\s*\{?)"
55
+ )
56
+
57
+ # All added-line signature patterns as a flat list.
58
+ _SIG_PATTERNS = [_PY_DEF_RE, _PY_RETURN_RE, _SH_FUNC_RE]
59
+
60
+ # Test directory first-segment names excluded from source grouping by default.
61
+ _TEST_DIRS: frozenset[str] = frozenset({"tests", "test", "spec"})
62
+
63
+ # Default e2e artifact patterns when e2e_patterns absent from components.yaml.
64
+ _DEFAULT_E2E_PATTERNS = ["tests/e2e/**", "test_*integration*"]
65
+
66
+
67
+ def detect_signature_changes(diff_text: str) -> set[str]:
68
+ """Return set of file paths whose diff adds or modifies a function signature.
69
+
70
+ Two detection arms combined with logical OR:
71
+ - Arm 1 (added-lines regex): added line value matches any signature pattern.
72
+ - Arm 2 (section_header): hunk.section_header matches SECTION_HEADER_DEF_RE.
73
+
74
+ When section_header is empty (flat shell without a function wrapper), only
75
+ Arm 1 contributes. That is the documented fallback, not an error.
76
+
77
+ Returns empty set for empty diff, unparseable diff, or no signature found.
78
+ """
79
+ if not diff_text or not diff_text.strip():
80
+ return set()
81
+
82
+ try:
83
+ patchset = unidiff.PatchSet(diff_text)
84
+ except unidiff.errors.UnidiffParseError:
85
+ return set()
86
+
87
+ sig_files: set[str] = set()
88
+ for patched_file in patchset:
89
+ if patched_file.is_removed_file:
90
+ continue
91
+ filepath = patched_file.path
92
+ for hunk in patched_file:
93
+ # Arm 1: scan added lines for signature patterns.
94
+ for line in hunk:
95
+ if line.is_added:
96
+ val = line.value if hasattr(line, "value") else ""
97
+ for pat in _SIG_PATTERNS:
98
+ if pat.search(val):
99
+ sig_files.add(filepath)
100
+ break
101
+ # Arm 2: check section_header for def-pattern.
102
+ section_hdr = getattr(hunk, "section_header", "") or ""
103
+ if section_hdr and SECTION_HEADER_DEF_RE.search(section_hdr):
104
+ sig_files.add(filepath)
105
+
106
+ return sig_files
107
+
108
+
109
+ def group_source_files(
110
+ files: list[str],
111
+ components: Optional[dict] = None,
112
+ exclude_test_dirs: bool = True,
113
+ ) -> dict[str, list[str]]:
114
+ """Group file paths by source component.
115
+
116
+ Args:
117
+ files: list of file paths from the diff.
118
+ components: optional dict of {component_name: [glob_patterns]}.
119
+ When provided, files are assigned to the first matching component.
120
+ Files matching no component fall back to first-segment grouping.
121
+ exclude_test_dirs: when True, drop files whose first path segment is
122
+ in {"tests", "test", "spec"}. Default True.
123
+
124
+ Returns:
125
+ {group_name: sorted(list_of_files)} with empty groups omitted.
126
+
127
+ Top-level files (no "/" in path) group under their OWN filename -- NOT
128
+ under "" -- to avoid collapsing all top-level files into a single
129
+ pseudo-group that falsely triggers Layer 1.
130
+ """
131
+ groups: dict[str, list[str]] = {}
132
+
133
+ for fpath in files:
134
+ # Determine first path segment for exclusion + default grouping.
135
+ parts = fpath.split("/")
136
+ first_seg = parts[0]
137
+
138
+ if exclude_test_dirs and first_seg in _TEST_DIRS:
139
+ continue
140
+
141
+ if components is not None:
142
+ # Assign to first matching component.
143
+ assigned = None
144
+ for comp_name, patterns in components.items():
145
+ for pat in patterns:
146
+ if fnmatch(fpath, pat):
147
+ assigned = comp_name
148
+ break
149
+ if assigned is not None:
150
+ break
151
+ group_key = assigned if assigned is not None else first_seg
152
+ else:
153
+ # Default: first path segment, or own filename for top-level.
154
+ group_key = first_seg if len(parts) > 1 else fpath
155
+
156
+ groups.setdefault(group_key, []).append(fpath)
157
+
158
+ # Sort file lists for deterministic output.
159
+ return {k: sorted(v) for k, v in groups.items()}
160
+
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # components.yaml loader and schema validation
164
+ # ---------------------------------------------------------------------------
165
+
166
+ def load_components_yaml(repo_root: Path) -> Optional[dict]:
167
+ """Load and validate .code-forge/components.yaml.
168
+
169
+ Args:
170
+ repo_root: repository root path.
171
+
172
+ Returns:
173
+ Validated dict with e2e_patterns defaulted, or None when the file
174
+ does not exist (Layer 2 is opt-in; absence is normal, not an error).
175
+
176
+ Raises:
177
+ ComponentsConfigError: when the file is present but fails schema
178
+ validation. Every message starts "components.yaml: " and names
179
+ the offending key.
180
+ """
181
+ config_path = repo_root / ".code-forge" / "components.yaml"
182
+ if not config_path.exists():
183
+ return None
184
+
185
+ try:
186
+ with open(config_path, "r", encoding="utf-8") as f:
187
+ data = yaml.safe_load(f)
188
+ except yaml.YAMLError as e:
189
+ raise ComponentsConfigError(
190
+ "components.yaml: YAML parse error: %s" % e
191
+ ) from e
192
+
193
+ if not isinstance(data, dict):
194
+ raise ComponentsConfigError(
195
+ "components.yaml: top-level value must be a mapping"
196
+ )
197
+
198
+ # (a) version check
199
+ version = data.get("version")
200
+ if version != 1:
201
+ raise ComponentsConfigError(
202
+ "components.yaml: version: expected 1, got %r" % version
203
+ )
204
+
205
+ # (b) components must be a dict; each value has a paths list.
206
+ raw_components = data.get("components")
207
+ if not isinstance(raw_components, dict):
208
+ raise ComponentsConfigError(
209
+ "components.yaml: 'components' must be a mapping"
210
+ )
211
+ for name, info in raw_components.items():
212
+ if not isinstance(info, dict) or "paths" not in info:
213
+ raise ComponentsConfigError(
214
+ "components.yaml: component %r: missing 'paths' list" % name
215
+ )
216
+ if not isinstance(info["paths"], list):
217
+ raise ComponentsConfigError(
218
+ "components.yaml: component %r: 'paths' must be a list" % name
219
+ )
220
+
221
+ component_names = set(raw_components.keys())
222
+
223
+ # (c) depends_on targets must exist; (d) no self-reference.
224
+ for name, info in raw_components.items():
225
+ for target in info.get("depends_on", []):
226
+ if target == name:
227
+ raise ComponentsConfigError(
228
+ "components.yaml: self-reference '%s' -> '%s'"
229
+ % (name, name)
230
+ )
231
+ if target not in component_names:
232
+ raise ComponentsConfigError(
233
+ "components.yaml: depends_on '%s' (from '%s') is undefined"
234
+ % (target, name)
235
+ )
236
+
237
+ # (e) cycle detection via DFS.
238
+ _detect_cycles(raw_components)
239
+
240
+ # (f) e2e_absent_ok entries: each .component must exist.
241
+ absent_ok_raw = data.get("e2e_absent_ok", [])
242
+ if not isinstance(absent_ok_raw, list):
243
+ raise ComponentsConfigError(
244
+ "components.yaml: 'e2e_absent_ok' must be a list"
245
+ )
246
+ for entry in absent_ok_raw:
247
+ if not isinstance(entry, dict):
248
+ raise ComponentsConfigError(
249
+ "components.yaml: each e2e_absent_ok entry must be a mapping"
250
+ )
251
+ comp = entry.get("component", "")
252
+ if comp not in component_names:
253
+ raise ComponentsConfigError(
254
+ "components.yaml: e2e_absent_ok component %r is undefined"
255
+ % comp
256
+ )
257
+
258
+ # (g) data_paths: each entry is a list of exactly 2 elements; each name
259
+ # must exist.
260
+ data_paths_raw = data.get("data_paths", [])
261
+ if not isinstance(data_paths_raw, list):
262
+ raise ComponentsConfigError(
263
+ "components.yaml: 'data_paths' must be a list"
264
+ )
265
+ for entry in data_paths_raw:
266
+ if not isinstance(entry, list) or len(entry) != 2:
267
+ raise ComponentsConfigError(
268
+ "components.yaml: data_paths entry %r must be length 2, got %d"
269
+ % (entry, len(entry) if isinstance(entry, list) else -1)
270
+ )
271
+ for comp in entry:
272
+ if comp not in component_names:
273
+ raise ComponentsConfigError(
274
+ "components.yaml: data_paths component %r is undefined"
275
+ % comp
276
+ )
277
+
278
+ # (h) default e2e_patterns when absent.
279
+ if "e2e_patterns" not in data or not data["e2e_patterns"]:
280
+ data["e2e_patterns"] = list(_DEFAULT_E2E_PATTERNS)
281
+
282
+ return data
283
+
284
+
285
+ def _detect_cycles(raw_components: dict) -> None:
286
+ """Raise ComponentsConfigError if depends_on forms a cycle.
287
+
288
+ Uses DFS with three-color marking (white/gray/black).
289
+ """
290
+ WHITE, GRAY, BLACK = 0, 1, 2
291
+ color: dict[str, int] = {name: WHITE for name in raw_components}
292
+ path: list[str] = []
293
+
294
+ def dfs(node: str) -> None:
295
+ color[node] = GRAY
296
+ path.append(node)
297
+ for neighbor in raw_components[node].get("depends_on", []):
298
+ if neighbor not in color:
299
+ # Undefined references are caught by the caller before this
300
+ # function runs; hitting this branch indicates a call-order bug.
301
+ raise AssertionError(
302
+ "depends_on target %r not in component set; "
303
+ "validate before calling _detect_cycles" % neighbor
304
+ )
305
+ if color[neighbor] == GRAY:
306
+ # cycle: reconstruct the cycle segment from path
307
+ cycle_start = path.index(neighbor)
308
+ cycle_nodes = path[cycle_start:] + [neighbor]
309
+ raise ComponentsConfigError(
310
+ "components.yaml: cycle detected: %s"
311
+ % " -> ".join(cycle_nodes)
312
+ )
313
+ if color[neighbor] == WHITE:
314
+ dfs(neighbor)
315
+ path.pop()
316
+ color[node] = BLACK
317
+
318
+ for node in list(raw_components.keys()):
319
+ if color[node] == WHITE:
320
+ dfs(node)
321
+
322
+
323
+ # ---------------------------------------------------------------------------
324
+ # Layer 2 co-occurrence detection and e2e artifact matching
325
+ # ---------------------------------------------------------------------------
326
+
327
+ def sorted_pair_hash(a: str, b: str) -> str:
328
+ """Commutative 16-char sha256 hash of a pair of component names.
329
+
330
+ Uses the same scheme as the Layer 1 fingerprint so both layers produce
331
+ comparable identifiers; must stay in sync if the scheme changes.
332
+ """
333
+ names = sorted([a, b])
334
+ return hashlib.sha256("|".join(names).encode("utf-8")).hexdigest()[:16]
335
+
336
+
337
+ def find_e2e_artifacts(repo_root: Path, patterns: list[str]) -> set[str]:
338
+ """Return repo-relative POSIX paths matching any e2e pattern.
339
+
340
+ Uses pathlib.glob (not fnmatch) because patterns may contain **
341
+ (recursive glob). Each path is converted via Path.relative_to(repo_root)
342
+ .as_posix() before insertion -- never mix Path and str in the returned set.
343
+
344
+ Args:
345
+ repo_root: repository root path.
346
+ patterns: list of glob patterns (may include **).
347
+
348
+ Returns:
349
+ set[str] of repo-relative forward-slash paths.
350
+ """
351
+ artifacts: set[str] = set()
352
+ for pattern in patterns:
353
+ try:
354
+ for p in repo_root.glob(pattern):
355
+ if p.is_file():
356
+ artifacts.add(p.relative_to(repo_root).as_posix())
357
+ except (OSError, ValueError):
358
+ # glob errors (bad pattern, permission) are non-fatal; skip.
359
+ pass
360
+ return artifacts
361
+
362
+
363
+ def _artifact_satisfies_pair(
364
+ artifacts: set[str],
365
+ component_paths: list[str],
366
+ ) -> bool:
367
+ """Return True iff at least one artifact lies within the component's paths.
368
+
369
+ Uses fnmatch for component path globs; pathlib.glob is not needed here
370
+ because component paths do not require recursive ** expansion.
371
+
372
+ Args:
373
+ artifacts: set[str] of repo-relative POSIX artifact paths.
374
+ component_paths: list of glob patterns from the component's 'paths'.
375
+
376
+ Returns:
377
+ True on first match found; False if no artifact matches any pattern.
378
+ """
379
+ for artifact in artifacts:
380
+ for pattern in component_paths:
381
+ if fnmatch(artifact, pattern):
382
+ return True
383
+ return False
384
+
385
+
386
+ def check_layer_2(
387
+ diff_text: str,
388
+ repo_root: Path,
389
+ components: Optional[dict] = None,
390
+ ) -> list[StateFinding]:
391
+ """Layer 2 co-occurrence trigger.
392
+
393
+ Args:
394
+ diff_text: unified diff text.
395
+ repo_root: repository root path (for glob-based artifact search).
396
+ components: validated dict from load_components_yaml, or None.
397
+ When None, returns [] (Layer 2 is opt-in).
398
+
399
+ Returns:
400
+ list[StateFinding] with source="E2E_CHECK", disposition=UNCERTAIN,
401
+ id="e2e-layer2", file="", line_range=[], fingerprint "e2e-l2:<hash>".
402
+ """
403
+ if components is None:
404
+ return []
405
+
406
+ changed = get_changed_files(diff_text)
407
+
408
+ # Extract name->paths mapping before passing to group_source_files.
409
+ # The full YAML dict has structural keys ("version", "data_paths",
410
+ # "e2e_patterns") that group_source_files would silently iterate over.
411
+ component_paths_map = {
412
+ name: info["paths"]
413
+ for name, info in components["components"].items()
414
+ }
415
+
416
+ # Touched components: keys from group_source_files that are real component
417
+ # names. Filter out first-segment fallback groups that are not components.
418
+ groups = group_source_files(changed, component_paths_map)
419
+ touched_components: set[str] = {
420
+ key for key in groups if key in component_paths_map
421
+ }
422
+
423
+ artifacts = find_e2e_artifacts(repo_root, components["e2e_patterns"])
424
+ absent_ok: set[str] = {
425
+ entry["component"]
426
+ for entry in components.get("e2e_absent_ok", [])
427
+ }
428
+
429
+ # Compute hub set by reverse-scanning depends_on. A component is a hub
430
+ # when other components list it in their depends_on.
431
+ hubs: set[str] = set()
432
+ for name, info in components["components"].items():
433
+ for target in info.get("depends_on", []):
434
+ hubs.add(target)
435
+
436
+ findings: list[StateFinding] = []
437
+ seen_fingerprints: set[str] = set()
438
+
439
+ def _emit_p2(a: str, b: str, description: str) -> None:
440
+ """Emit a P2 finding for the (a, b) pair if not already emitted."""
441
+ fp = "e2e-l2:" + sorted_pair_hash(a, b)
442
+ if fp in seen_fingerprints:
443
+ return
444
+ seen_fingerprints.add(fp)
445
+ findings.append(
446
+ StateFinding(
447
+ id="e2e-layer2",
448
+ fingerprint=fp,
449
+ source="E2E_CHECK",
450
+ disposition=Disposition.UNCERTAIN,
451
+ file="",
452
+ line_range=[],
453
+ description=description,
454
+ )
455
+ )
456
+
457
+ # HUB+DEPENDENT arm (one-level only; co-occurrence, not blast-radius).
458
+ for h_name, h_info in components["components"].items():
459
+ if h_name not in hubs:
460
+ continue
461
+ if h_name not in touched_components:
462
+ # Hub not touched in this diff; skip (co-occurrence requires H).
463
+ continue
464
+ # Enumerate dependents (those that list H in their depends_on).
465
+ for d_name, d_info in components["components"].items():
466
+ if h_name not in d_info.get("depends_on", []):
467
+ continue
468
+ if d_name not in touched_components:
469
+ # Dependent not touched -> no co-occurrence; Layer 1 handles
470
+ # hub-only changes.
471
+ continue
472
+ # Escape hatch: e2e_absent_ok suppresses P2s for either endpoint.
473
+ if d_name in absent_ok or h_name in absent_ok:
474
+ continue
475
+ # PER-PAIR: artifact must be within the dependent's paths.
476
+ satisfied = _artifact_satisfies_pair(
477
+ artifacts,
478
+ components["components"][d_name]["paths"],
479
+ )
480
+ if satisfied:
481
+ continue
482
+ desc = (
483
+ "cross-component change: hub '%s' + dependent '%s' both "
484
+ "touched; no e2e artifact under '%s' paths matches e2e_patterns"
485
+ % (h_name, d_name, d_name)
486
+ )
487
+ _emit_p2(h_name, d_name, desc)
488
+
489
+ # PEER data_path arm (symmetric: both endpoints must be touched).
490
+ for pair in components.get("data_paths", []):
491
+ a, b = pair[0], pair[1]
492
+ if a not in touched_components or b not in touched_components:
493
+ continue
494
+ # Escape hatch: e2e_absent_ok suppresses P2s for either endpoint.
495
+ if a in absent_ok or b in absent_ok:
496
+ continue
497
+ # EITHER endpoint's component paths satisfies the pair.
498
+ satisfied = _artifact_satisfies_pair(
499
+ artifacts,
500
+ components["components"][a]["paths"],
501
+ ) or _artifact_satisfies_pair(
502
+ artifacts,
503
+ components["components"][b]["paths"],
504
+ )
505
+ if satisfied:
506
+ continue
507
+ desc = (
508
+ "cross-component change: peer pair ('%s', '%s') both touched; "
509
+ "no e2e artifact under either component's paths matches e2e_patterns"
510
+ % (a, b)
511
+ )
512
+ _emit_p2(a, b, desc)
513
+
514
+ return findings
515
+
516
+
517
+ def check_layer_1(
518
+ diff_text: str,
519
+ components: Optional[dict] = None,
520
+ ) -> list[StateFinding]:
521
+ """Layer 1 heuristic: cross-component change with a signature modification.
522
+
523
+ Fires only when:
524
+ - detect_signature_changes finds at least one changed signature, AND
525
+ - group_source_files yields >=2 distinct source groups.
526
+
527
+ Returns at most ONE finding, disposition=DISMISSED (advisory, never blocks).
528
+ Fingerprint is deterministic: sha256 of canonical groups+sig_files string,
529
+ truncated to 16 hex chars, prefixed "e2e-l1:".
530
+ """
531
+ sig_files = detect_signature_changes(diff_text)
532
+ if not sig_files:
533
+ return []
534
+
535
+ changed = get_changed_files(diff_text)
536
+ groups = group_source_files(changed, components)
537
+
538
+ if len(groups) < 2:
539
+ return []
540
+
541
+ # Defensive: sig_files should be a subset of changed; if somehow disjoint,
542
+ # do not emit (would be a spurious finding with no anchor in the diff).
543
+ if sig_files.isdisjoint(set(changed)):
544
+ return []
545
+
546
+ group_keys_str = "|".join(sorted(groups.keys()))
547
+ sig_files_str = "|".join(sorted(sig_files))
548
+ fp_input = (group_keys_str + "::" + sig_files_str).encode("utf-8")
549
+ fingerprint = "e2e-l1:" + hashlib.sha256(fp_input).hexdigest()[:16]
550
+
551
+ group_names = sorted(groups.keys())
552
+ sig_names = sorted(sig_files)
553
+ description = (
554
+ "cross-component change spans groups {%s}; "
555
+ "signature changed in {%s}; "
556
+ "is there an e2e test for the joined path?"
557
+ % (", ".join(group_names), ", ".join(sig_names))
558
+ )
559
+
560
+ finding = StateFinding(
561
+ id="e2e-layer1",
562
+ fingerprint=fingerprint,
563
+ source="E2E_CHECK",
564
+ disposition=Disposition.DISMISSED,
565
+ file="",
566
+ line_range=[],
567
+ description=description,
568
+ )
569
+ return [finding]
570
+
571
+
572
+ # ---------------------------------------------------------------------------
573
+ # Orchestration: load config, run both layers, deduplicate findings
574
+ # ---------------------------------------------------------------------------
575
+
576
+ def run_e2e_check(
577
+ diff_text: str,
578
+ repo_root: Path,
579
+ ) -> tuple[list[StateFinding], list[str]]:
580
+ """Orchestrate Layer 1 + Layer 2 e2e coverage checks.
581
+
582
+ Args:
583
+ diff_text: unified diff text (from caller via git module).
584
+ repo_root: repository root path (used by Layer 2 for path resolution).
585
+
586
+ Returns:
587
+ (findings, infra_errors) where findings is a list of StateFinding
588
+ with source="E2E_CHECK" and infra_errors is a list of error strings.
589
+ On unexpected exception, returns ([], [str(e)]) so a malformed diff
590
+ never crashes the review pipeline.
591
+
592
+ Dedup: if Layer 2 fires, Layer 1 is suppressed entirely. Layer 2 is
593
+ strictly stronger (enforceable, opt-in); Layer 1 adds no signal when
594
+ Layer 2 already covers the same change. This is whole-diff
595
+ simplification: even a partial L2 match drops the L1 finding.
596
+ """
597
+ infra_errors: list[str] = []
598
+ config_error_findings: list[StateFinding] = []
599
+ try:
600
+ # Load components.yaml (Layer 2 config; None = opt-in not exercised).
601
+ components_dict: Optional[dict] = None
602
+ try:
603
+ components_dict = load_components_yaml(repo_root)
604
+ except ComponentsConfigError as cfg_err:
605
+ # Surface the config error as a single UNCERTAIN finding so humans
606
+ # see it. Layer 1 still runs (on default grouping = no config).
607
+ config_error_findings.append(
608
+ StateFinding(
609
+ id="e2e-layer2",
610
+ fingerprint="e2e-config-error",
611
+ source="E2E_CHECK",
612
+ disposition=Disposition.UNCERTAIN,
613
+ file="",
614
+ line_range=[],
615
+ description=str(cfg_err),
616
+ )
617
+ )
618
+ components_dict = None
619
+
620
+ # Extract name->paths mapping for Layer 1. group_source_files expects
621
+ # {name: [patterns]}, not the full YAML dict whose top-level keys
622
+ # ("version", "data_paths", "e2e_patterns") would be silently iterated.
623
+ if components_dict is None:
624
+ component_paths_map: Optional[dict] = None
625
+ else:
626
+ component_paths_map = {
627
+ name: info["paths"]
628
+ for name, info in components_dict["components"].items()
629
+ }
630
+
631
+ l1 = check_layer_1(diff_text, components=component_paths_map)
632
+ l2 = check_layer_2(diff_text, repo_root, components=components_dict)
633
+
634
+ # Dedup: Layer 2 is strictly stronger; drop Layer 1 when Layer 2 fires.
635
+ kept_l1 = [] if l2 else l1
636
+
637
+ return (kept_l1 + l2 + config_error_findings, infra_errors)
638
+
639
+ except Exception as exc: # noqa: BLE001
640
+ infra_errors.append(str(exc))
641
+ return ([], infra_errors)