code-review-forge 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_forge/__init__.py +14 -0
- code_forge/__main__.py +8 -0
- code_forge/autofix.py +78 -0
- code_forge/baseline.py +216 -0
- code_forge/cli.py +983 -0
- code_forge/delta.py +65 -0
- code_forge/diagnose.py +109 -0
- code_forge/diff.py +82 -0
- code_forge/disposition.py +32 -0
- code_forge/e2e_check.py +641 -0
- code_forge/env_resolver.py +91 -0
- code_forge/errors.py +34 -0
- code_forge/exit_codes.py +37 -0
- code_forge/factories.py +191 -0
- code_forge/falsify.py +85 -0
- code_forge/gate_check.py +466 -0
- code_forge/git.py +351 -0
- code_forge/hold.py +126 -0
- code_forge/install_hooks.py +331 -0
- code_forge/lock.py +162 -0
- code_forge/machine.py +792 -0
- code_forge/mode_resolver.py +60 -0
- code_forge/mutation.py +380 -0
- code_forge/parsers/__init__.py +56 -0
- code_forge/parsers/_sarif.py +77 -0
- code_forge/parsers/base.py +65 -0
- code_forge/parsers/checkpatch.py +66 -0
- code_forge/parsers/clippy.py +85 -0
- code_forge/parsers/non_ascii.py +47 -0
- code_forge/parsers/ruff.py +18 -0
- code_forge/parsers/semgrep.py +18 -0
- code_forge/parsers/shellcheck.py +56 -0
- code_forge/registry.py +153 -0
- code_forge/reporter.py +133 -0
- code_forge/runner.py +205 -0
- code_forge/sarif.py +226 -0
- code_forge/skills/adversarial-qe/SKILL.md +272 -0
- code_forge/skills/code-forge/SKILL.md +1193 -0
- code_forge/skills/code-review-expert/SKILL.md +162 -0
- code_forge/skills/code-review-expert/references/code-quality-checklist.md +130 -0
- code_forge/skills/code-review-expert/references/removal-plan.md +52 -0
- code_forge/skills/code-review-expert/references/security-checklist.md +118 -0
- code_forge/skills/code-review-expert/references/solid-checklist.md +65 -0
- code_forge/skills/kernel-fp-verify/SKILL.md +101 -0
- code_forge/skills/qodo-review/SKILL.md +135 -0
- code_forge/skills/smoke-test/SKILL.md +253 -0
- code_forge/skills/smoke-test/references/boundary-cases.md +114 -0
- code_forge/skills/smoke-test/references/concurrency-patterns.md +306 -0
- code_forge/skills/smoke-test/references/injection-payloads.md +124 -0
- code_forge/skills/smoke-test/test-library/shell/README.md +271 -0
- code_forge/skills/smoke-test/test-library/shell/primitives.sh +352 -0
- code_forge/skills/smoke-test/test-library/shell/primitives_test.sh +324 -0
- code_forge/snapshot.py +196 -0
- code_forge/source.py +64 -0
- code_forge/state.py +246 -0
- code_forge/verdict.py +43 -0
- code_review_forge-2.0.0a1.dist-info/METADATA +237 -0
- code_review_forge-2.0.0a1.dist-info/RECORD +62 -0
- code_review_forge-2.0.0a1.dist-info/WHEEL +5 -0
- code_review_forge-2.0.0a1.dist-info/entry_points.txt +2 -0
- code_review_forge-2.0.0a1.dist-info/licenses/LICENSE +179 -0
- code_review_forge-2.0.0a1.dist-info/top_level.txt +1 -0
code_forge/e2e_check.py
ADDED
|
@@ -0,0 +1,641 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright (c) 2026, Minxi Hou <houminxi@gmail.com>
|
|
3
|
+
"""E2E coverage heuristic for forge (R3).
|
|
4
|
+
|
|
5
|
+
Layer 1 (heuristic, no config): diff touches >=2 source groups AND modifies
|
|
6
|
+
a function signature/return type -> non-blocking checklist finding.
|
|
7
|
+
Layer 2 (explicit, opt-in): .code-forge/components.yaml defines components, hubs,
|
|
8
|
+
data paths, and e2e artifact patterns. Co-occurrence trigger -> P2 finding.
|
|
9
|
+
|
|
10
|
+
No subprocess or git calls. diff_text is provided by caller via git module.
|
|
11
|
+
Uses unidiff directly (diff.py does not expose Hunk.section_header).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import hashlib
|
|
17
|
+
import re
|
|
18
|
+
from fnmatch import fnmatch
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
import unidiff
|
|
23
|
+
import yaml
|
|
24
|
+
|
|
25
|
+
from .diff import get_changed_files
|
|
26
|
+
from .disposition import Disposition
|
|
27
|
+
from .errors import ComponentsConfigError
|
|
28
|
+
from .state import StateFinding
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Signature-detection patterns (Python + shell; C detection not implemented).
|
|
32
|
+
# Compiled once at module level to avoid per-call overhead.
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
# Python: matches "def foo(" or "async def foo(" lines (added lines).
|
|
36
|
+
_PY_DEF_RE = re.compile(
|
|
37
|
+
r"^\s*(async\s+)?def\s+[A-Za-z_]\w*\s*\("
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Python: matches a return-type annotation "-> <type> :" at end of line.
|
|
41
|
+
_PY_RETURN_RE = re.compile(
|
|
42
|
+
r"->\s*\S+.*:\s*$"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Shell: matches a function definition line.
|
|
46
|
+
_SH_FUNC_RE = re.compile(
|
|
47
|
+
r"^\s*(function\s+)?[A-Za-z_]\w*\s*\(\s*\)\s*\{?\s*$"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Arm 2: matches a def/function pattern inside a section_header string.
|
|
51
|
+
# git emits section_header such as "def parse(self, ..." or "foo() {".
|
|
52
|
+
SECTION_HEADER_DEF_RE = re.compile(
|
|
53
|
+
r"(?:(?:async\s+)?def\s+[A-Za-z_]\w*\s*\(|"
|
|
54
|
+
r"(?:function\s+)?[A-Za-z_]\w*\s*\(\s*\)\s*\{?)"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# All added-line signature patterns as a flat list.
|
|
58
|
+
_SIG_PATTERNS = [_PY_DEF_RE, _PY_RETURN_RE, _SH_FUNC_RE]
|
|
59
|
+
|
|
60
|
+
# Test directory first-segment names excluded from source grouping by default.
|
|
61
|
+
_TEST_DIRS: frozenset[str] = frozenset({"tests", "test", "spec"})
|
|
62
|
+
|
|
63
|
+
# Default e2e artifact patterns when e2e_patterns absent from components.yaml.
|
|
64
|
+
_DEFAULT_E2E_PATTERNS = ["tests/e2e/**", "test_*integration*"]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def detect_signature_changes(diff_text: str) -> set[str]:
|
|
68
|
+
"""Return set of file paths whose diff adds or modifies a function signature.
|
|
69
|
+
|
|
70
|
+
Two detection arms combined with logical OR:
|
|
71
|
+
- Arm 1 (added-lines regex): added line value matches any signature pattern.
|
|
72
|
+
- Arm 2 (section_header): hunk.section_header matches SECTION_HEADER_DEF_RE.
|
|
73
|
+
|
|
74
|
+
When section_header is empty (flat shell without a function wrapper), only
|
|
75
|
+
Arm 1 contributes. That is the documented fallback, not an error.
|
|
76
|
+
|
|
77
|
+
Returns empty set for empty diff, unparseable diff, or no signature found.
|
|
78
|
+
"""
|
|
79
|
+
if not diff_text or not diff_text.strip():
|
|
80
|
+
return set()
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
patchset = unidiff.PatchSet(diff_text)
|
|
84
|
+
except unidiff.errors.UnidiffParseError:
|
|
85
|
+
return set()
|
|
86
|
+
|
|
87
|
+
sig_files: set[str] = set()
|
|
88
|
+
for patched_file in patchset:
|
|
89
|
+
if patched_file.is_removed_file:
|
|
90
|
+
continue
|
|
91
|
+
filepath = patched_file.path
|
|
92
|
+
for hunk in patched_file:
|
|
93
|
+
# Arm 1: scan added lines for signature patterns.
|
|
94
|
+
for line in hunk:
|
|
95
|
+
if line.is_added:
|
|
96
|
+
val = line.value if hasattr(line, "value") else ""
|
|
97
|
+
for pat in _SIG_PATTERNS:
|
|
98
|
+
if pat.search(val):
|
|
99
|
+
sig_files.add(filepath)
|
|
100
|
+
break
|
|
101
|
+
# Arm 2: check section_header for def-pattern.
|
|
102
|
+
section_hdr = getattr(hunk, "section_header", "") or ""
|
|
103
|
+
if section_hdr and SECTION_HEADER_DEF_RE.search(section_hdr):
|
|
104
|
+
sig_files.add(filepath)
|
|
105
|
+
|
|
106
|
+
return sig_files
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def group_source_files(
|
|
110
|
+
files: list[str],
|
|
111
|
+
components: Optional[dict] = None,
|
|
112
|
+
exclude_test_dirs: bool = True,
|
|
113
|
+
) -> dict[str, list[str]]:
|
|
114
|
+
"""Group file paths by source component.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
files: list of file paths from the diff.
|
|
118
|
+
components: optional dict of {component_name: [glob_patterns]}.
|
|
119
|
+
When provided, files are assigned to the first matching component.
|
|
120
|
+
Files matching no component fall back to first-segment grouping.
|
|
121
|
+
exclude_test_dirs: when True, drop files whose first path segment is
|
|
122
|
+
in {"tests", "test", "spec"}. Default True.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
{group_name: sorted(list_of_files)} with empty groups omitted.
|
|
126
|
+
|
|
127
|
+
Top-level files (no "/" in path) group under their OWN filename -- NOT
|
|
128
|
+
under "" -- to avoid collapsing all top-level files into a single
|
|
129
|
+
pseudo-group that falsely triggers Layer 1.
|
|
130
|
+
"""
|
|
131
|
+
groups: dict[str, list[str]] = {}
|
|
132
|
+
|
|
133
|
+
for fpath in files:
|
|
134
|
+
# Determine first path segment for exclusion + default grouping.
|
|
135
|
+
parts = fpath.split("/")
|
|
136
|
+
first_seg = parts[0]
|
|
137
|
+
|
|
138
|
+
if exclude_test_dirs and first_seg in _TEST_DIRS:
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
if components is not None:
|
|
142
|
+
# Assign to first matching component.
|
|
143
|
+
assigned = None
|
|
144
|
+
for comp_name, patterns in components.items():
|
|
145
|
+
for pat in patterns:
|
|
146
|
+
if fnmatch(fpath, pat):
|
|
147
|
+
assigned = comp_name
|
|
148
|
+
break
|
|
149
|
+
if assigned is not None:
|
|
150
|
+
break
|
|
151
|
+
group_key = assigned if assigned is not None else first_seg
|
|
152
|
+
else:
|
|
153
|
+
# Default: first path segment, or own filename for top-level.
|
|
154
|
+
group_key = first_seg if len(parts) > 1 else fpath
|
|
155
|
+
|
|
156
|
+
groups.setdefault(group_key, []).append(fpath)
|
|
157
|
+
|
|
158
|
+
# Sort file lists for deterministic output.
|
|
159
|
+
return {k: sorted(v) for k, v in groups.items()}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
# components.yaml loader and schema validation
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
def load_components_yaml(repo_root: Path) -> Optional[dict]:
|
|
167
|
+
"""Load and validate .code-forge/components.yaml.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
repo_root: repository root path.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Validated dict with e2e_patterns defaulted, or None when the file
|
|
174
|
+
does not exist (Layer 2 is opt-in; absence is normal, not an error).
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
ComponentsConfigError: when the file is present but fails schema
|
|
178
|
+
validation. Every message starts "components.yaml: " and names
|
|
179
|
+
the offending key.
|
|
180
|
+
"""
|
|
181
|
+
config_path = repo_root / ".code-forge" / "components.yaml"
|
|
182
|
+
if not config_path.exists():
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
with open(config_path, "r", encoding="utf-8") as f:
|
|
187
|
+
data = yaml.safe_load(f)
|
|
188
|
+
except yaml.YAMLError as e:
|
|
189
|
+
raise ComponentsConfigError(
|
|
190
|
+
"components.yaml: YAML parse error: %s" % e
|
|
191
|
+
) from e
|
|
192
|
+
|
|
193
|
+
if not isinstance(data, dict):
|
|
194
|
+
raise ComponentsConfigError(
|
|
195
|
+
"components.yaml: top-level value must be a mapping"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# (a) version check
|
|
199
|
+
version = data.get("version")
|
|
200
|
+
if version != 1:
|
|
201
|
+
raise ComponentsConfigError(
|
|
202
|
+
"components.yaml: version: expected 1, got %r" % version
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# (b) components must be a dict; each value has a paths list.
|
|
206
|
+
raw_components = data.get("components")
|
|
207
|
+
if not isinstance(raw_components, dict):
|
|
208
|
+
raise ComponentsConfigError(
|
|
209
|
+
"components.yaml: 'components' must be a mapping"
|
|
210
|
+
)
|
|
211
|
+
for name, info in raw_components.items():
|
|
212
|
+
if not isinstance(info, dict) or "paths" not in info:
|
|
213
|
+
raise ComponentsConfigError(
|
|
214
|
+
"components.yaml: component %r: missing 'paths' list" % name
|
|
215
|
+
)
|
|
216
|
+
if not isinstance(info["paths"], list):
|
|
217
|
+
raise ComponentsConfigError(
|
|
218
|
+
"components.yaml: component %r: 'paths' must be a list" % name
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
component_names = set(raw_components.keys())
|
|
222
|
+
|
|
223
|
+
# (c) depends_on targets must exist; (d) no self-reference.
|
|
224
|
+
for name, info in raw_components.items():
|
|
225
|
+
for target in info.get("depends_on", []):
|
|
226
|
+
if target == name:
|
|
227
|
+
raise ComponentsConfigError(
|
|
228
|
+
"components.yaml: self-reference '%s' -> '%s'"
|
|
229
|
+
% (name, name)
|
|
230
|
+
)
|
|
231
|
+
if target not in component_names:
|
|
232
|
+
raise ComponentsConfigError(
|
|
233
|
+
"components.yaml: depends_on '%s' (from '%s') is undefined"
|
|
234
|
+
% (target, name)
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# (e) cycle detection via DFS.
|
|
238
|
+
_detect_cycles(raw_components)
|
|
239
|
+
|
|
240
|
+
# (f) e2e_absent_ok entries: each .component must exist.
|
|
241
|
+
absent_ok_raw = data.get("e2e_absent_ok", [])
|
|
242
|
+
if not isinstance(absent_ok_raw, list):
|
|
243
|
+
raise ComponentsConfigError(
|
|
244
|
+
"components.yaml: 'e2e_absent_ok' must be a list"
|
|
245
|
+
)
|
|
246
|
+
for entry in absent_ok_raw:
|
|
247
|
+
if not isinstance(entry, dict):
|
|
248
|
+
raise ComponentsConfigError(
|
|
249
|
+
"components.yaml: each e2e_absent_ok entry must be a mapping"
|
|
250
|
+
)
|
|
251
|
+
comp = entry.get("component", "")
|
|
252
|
+
if comp not in component_names:
|
|
253
|
+
raise ComponentsConfigError(
|
|
254
|
+
"components.yaml: e2e_absent_ok component %r is undefined"
|
|
255
|
+
% comp
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# (g) data_paths: each entry is a list of exactly 2 elements; each name
|
|
259
|
+
# must exist.
|
|
260
|
+
data_paths_raw = data.get("data_paths", [])
|
|
261
|
+
if not isinstance(data_paths_raw, list):
|
|
262
|
+
raise ComponentsConfigError(
|
|
263
|
+
"components.yaml: 'data_paths' must be a list"
|
|
264
|
+
)
|
|
265
|
+
for entry in data_paths_raw:
|
|
266
|
+
if not isinstance(entry, list) or len(entry) != 2:
|
|
267
|
+
raise ComponentsConfigError(
|
|
268
|
+
"components.yaml: data_paths entry %r must be length 2, got %d"
|
|
269
|
+
% (entry, len(entry) if isinstance(entry, list) else -1)
|
|
270
|
+
)
|
|
271
|
+
for comp in entry:
|
|
272
|
+
if comp not in component_names:
|
|
273
|
+
raise ComponentsConfigError(
|
|
274
|
+
"components.yaml: data_paths component %r is undefined"
|
|
275
|
+
% comp
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# (h) default e2e_patterns when absent.
|
|
279
|
+
if "e2e_patterns" not in data or not data["e2e_patterns"]:
|
|
280
|
+
data["e2e_patterns"] = list(_DEFAULT_E2E_PATTERNS)
|
|
281
|
+
|
|
282
|
+
return data
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _detect_cycles(raw_components: dict) -> None:
|
|
286
|
+
"""Raise ComponentsConfigError if depends_on forms a cycle.
|
|
287
|
+
|
|
288
|
+
Uses DFS with three-color marking (white/gray/black).
|
|
289
|
+
"""
|
|
290
|
+
WHITE, GRAY, BLACK = 0, 1, 2
|
|
291
|
+
color: dict[str, int] = {name: WHITE for name in raw_components}
|
|
292
|
+
path: list[str] = []
|
|
293
|
+
|
|
294
|
+
def dfs(node: str) -> None:
|
|
295
|
+
color[node] = GRAY
|
|
296
|
+
path.append(node)
|
|
297
|
+
for neighbor in raw_components[node].get("depends_on", []):
|
|
298
|
+
if neighbor not in color:
|
|
299
|
+
# Undefined references are caught by the caller before this
|
|
300
|
+
# function runs; hitting this branch indicates a call-order bug.
|
|
301
|
+
raise AssertionError(
|
|
302
|
+
"depends_on target %r not in component set; "
|
|
303
|
+
"validate before calling _detect_cycles" % neighbor
|
|
304
|
+
)
|
|
305
|
+
if color[neighbor] == GRAY:
|
|
306
|
+
# cycle: reconstruct the cycle segment from path
|
|
307
|
+
cycle_start = path.index(neighbor)
|
|
308
|
+
cycle_nodes = path[cycle_start:] + [neighbor]
|
|
309
|
+
raise ComponentsConfigError(
|
|
310
|
+
"components.yaml: cycle detected: %s"
|
|
311
|
+
% " -> ".join(cycle_nodes)
|
|
312
|
+
)
|
|
313
|
+
if color[neighbor] == WHITE:
|
|
314
|
+
dfs(neighbor)
|
|
315
|
+
path.pop()
|
|
316
|
+
color[node] = BLACK
|
|
317
|
+
|
|
318
|
+
for node in list(raw_components.keys()):
|
|
319
|
+
if color[node] == WHITE:
|
|
320
|
+
dfs(node)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# ---------------------------------------------------------------------------
|
|
324
|
+
# Layer 2 co-occurrence detection and e2e artifact matching
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
def sorted_pair_hash(a: str, b: str) -> str:
|
|
328
|
+
"""Commutative 16-char sha256 hash of a pair of component names.
|
|
329
|
+
|
|
330
|
+
Uses the same scheme as the Layer 1 fingerprint so both layers produce
|
|
331
|
+
comparable identifiers; must stay in sync if the scheme changes.
|
|
332
|
+
"""
|
|
333
|
+
names = sorted([a, b])
|
|
334
|
+
return hashlib.sha256("|".join(names).encode("utf-8")).hexdigest()[:16]
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def find_e2e_artifacts(repo_root: Path, patterns: list[str]) -> set[str]:
|
|
338
|
+
"""Return repo-relative POSIX paths matching any e2e pattern.
|
|
339
|
+
|
|
340
|
+
Uses pathlib.glob (not fnmatch) because patterns may contain **
|
|
341
|
+
(recursive glob). Each path is converted via Path.relative_to(repo_root)
|
|
342
|
+
.as_posix() before insertion -- never mix Path and str in the returned set.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
repo_root: repository root path.
|
|
346
|
+
patterns: list of glob patterns (may include **).
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
set[str] of repo-relative forward-slash paths.
|
|
350
|
+
"""
|
|
351
|
+
artifacts: set[str] = set()
|
|
352
|
+
for pattern in patterns:
|
|
353
|
+
try:
|
|
354
|
+
for p in repo_root.glob(pattern):
|
|
355
|
+
if p.is_file():
|
|
356
|
+
artifacts.add(p.relative_to(repo_root).as_posix())
|
|
357
|
+
except (OSError, ValueError):
|
|
358
|
+
# glob errors (bad pattern, permission) are non-fatal; skip.
|
|
359
|
+
pass
|
|
360
|
+
return artifacts
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _artifact_satisfies_pair(
|
|
364
|
+
artifacts: set[str],
|
|
365
|
+
component_paths: list[str],
|
|
366
|
+
) -> bool:
|
|
367
|
+
"""Return True iff at least one artifact lies within the component's paths.
|
|
368
|
+
|
|
369
|
+
Uses fnmatch for component path globs; pathlib.glob is not needed here
|
|
370
|
+
because component paths do not require recursive ** expansion.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
artifacts: set[str] of repo-relative POSIX artifact paths.
|
|
374
|
+
component_paths: list of glob patterns from the component's 'paths'.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
True on first match found; False if no artifact matches any pattern.
|
|
378
|
+
"""
|
|
379
|
+
for artifact in artifacts:
|
|
380
|
+
for pattern in component_paths:
|
|
381
|
+
if fnmatch(artifact, pattern):
|
|
382
|
+
return True
|
|
383
|
+
return False
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def check_layer_2(
|
|
387
|
+
diff_text: str,
|
|
388
|
+
repo_root: Path,
|
|
389
|
+
components: Optional[dict] = None,
|
|
390
|
+
) -> list[StateFinding]:
|
|
391
|
+
"""Layer 2 co-occurrence trigger.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
diff_text: unified diff text.
|
|
395
|
+
repo_root: repository root path (for glob-based artifact search).
|
|
396
|
+
components: validated dict from load_components_yaml, or None.
|
|
397
|
+
When None, returns [] (Layer 2 is opt-in).
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
list[StateFinding] with source="E2E_CHECK", disposition=UNCERTAIN,
|
|
401
|
+
id="e2e-layer2", file="", line_range=[], fingerprint "e2e-l2:<hash>".
|
|
402
|
+
"""
|
|
403
|
+
if components is None:
|
|
404
|
+
return []
|
|
405
|
+
|
|
406
|
+
changed = get_changed_files(diff_text)
|
|
407
|
+
|
|
408
|
+
# Extract name->paths mapping before passing to group_source_files.
|
|
409
|
+
# The full YAML dict has structural keys ("version", "data_paths",
|
|
410
|
+
# "e2e_patterns") that group_source_files would silently iterate over.
|
|
411
|
+
component_paths_map = {
|
|
412
|
+
name: info["paths"]
|
|
413
|
+
for name, info in components["components"].items()
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
# Touched components: keys from group_source_files that are real component
|
|
417
|
+
# names. Filter out first-segment fallback groups that are not components.
|
|
418
|
+
groups = group_source_files(changed, component_paths_map)
|
|
419
|
+
touched_components: set[str] = {
|
|
420
|
+
key for key in groups if key in component_paths_map
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
artifacts = find_e2e_artifacts(repo_root, components["e2e_patterns"])
|
|
424
|
+
absent_ok: set[str] = {
|
|
425
|
+
entry["component"]
|
|
426
|
+
for entry in components.get("e2e_absent_ok", [])
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Compute hub set by reverse-scanning depends_on. A component is a hub
|
|
430
|
+
# when other components list it in their depends_on.
|
|
431
|
+
hubs: set[str] = set()
|
|
432
|
+
for name, info in components["components"].items():
|
|
433
|
+
for target in info.get("depends_on", []):
|
|
434
|
+
hubs.add(target)
|
|
435
|
+
|
|
436
|
+
findings: list[StateFinding] = []
|
|
437
|
+
seen_fingerprints: set[str] = set()
|
|
438
|
+
|
|
439
|
+
def _emit_p2(a: str, b: str, description: str) -> None:
|
|
440
|
+
"""Emit a P2 finding for the (a, b) pair if not already emitted."""
|
|
441
|
+
fp = "e2e-l2:" + sorted_pair_hash(a, b)
|
|
442
|
+
if fp in seen_fingerprints:
|
|
443
|
+
return
|
|
444
|
+
seen_fingerprints.add(fp)
|
|
445
|
+
findings.append(
|
|
446
|
+
StateFinding(
|
|
447
|
+
id="e2e-layer2",
|
|
448
|
+
fingerprint=fp,
|
|
449
|
+
source="E2E_CHECK",
|
|
450
|
+
disposition=Disposition.UNCERTAIN,
|
|
451
|
+
file="",
|
|
452
|
+
line_range=[],
|
|
453
|
+
description=description,
|
|
454
|
+
)
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# HUB+DEPENDENT arm (one-level only; co-occurrence, not blast-radius).
|
|
458
|
+
for h_name, h_info in components["components"].items():
|
|
459
|
+
if h_name not in hubs:
|
|
460
|
+
continue
|
|
461
|
+
if h_name not in touched_components:
|
|
462
|
+
# Hub not touched in this diff; skip (co-occurrence requires H).
|
|
463
|
+
continue
|
|
464
|
+
# Enumerate dependents (those that list H in their depends_on).
|
|
465
|
+
for d_name, d_info in components["components"].items():
|
|
466
|
+
if h_name not in d_info.get("depends_on", []):
|
|
467
|
+
continue
|
|
468
|
+
if d_name not in touched_components:
|
|
469
|
+
# Dependent not touched -> no co-occurrence; Layer 1 handles
|
|
470
|
+
# hub-only changes.
|
|
471
|
+
continue
|
|
472
|
+
# Escape hatch: e2e_absent_ok suppresses P2s for either endpoint.
|
|
473
|
+
if d_name in absent_ok or h_name in absent_ok:
|
|
474
|
+
continue
|
|
475
|
+
# PER-PAIR: artifact must be within the dependent's paths.
|
|
476
|
+
satisfied = _artifact_satisfies_pair(
|
|
477
|
+
artifacts,
|
|
478
|
+
components["components"][d_name]["paths"],
|
|
479
|
+
)
|
|
480
|
+
if satisfied:
|
|
481
|
+
continue
|
|
482
|
+
desc = (
|
|
483
|
+
"cross-component change: hub '%s' + dependent '%s' both "
|
|
484
|
+
"touched; no e2e artifact under '%s' paths matches e2e_patterns"
|
|
485
|
+
% (h_name, d_name, d_name)
|
|
486
|
+
)
|
|
487
|
+
_emit_p2(h_name, d_name, desc)
|
|
488
|
+
|
|
489
|
+
# PEER data_path arm (symmetric: both endpoints must be touched).
|
|
490
|
+
for pair in components.get("data_paths", []):
|
|
491
|
+
a, b = pair[0], pair[1]
|
|
492
|
+
if a not in touched_components or b not in touched_components:
|
|
493
|
+
continue
|
|
494
|
+
# Escape hatch: e2e_absent_ok suppresses P2s for either endpoint.
|
|
495
|
+
if a in absent_ok or b in absent_ok:
|
|
496
|
+
continue
|
|
497
|
+
# EITHER endpoint's component paths satisfies the pair.
|
|
498
|
+
satisfied = _artifact_satisfies_pair(
|
|
499
|
+
artifacts,
|
|
500
|
+
components["components"][a]["paths"],
|
|
501
|
+
) or _artifact_satisfies_pair(
|
|
502
|
+
artifacts,
|
|
503
|
+
components["components"][b]["paths"],
|
|
504
|
+
)
|
|
505
|
+
if satisfied:
|
|
506
|
+
continue
|
|
507
|
+
desc = (
|
|
508
|
+
"cross-component change: peer pair ('%s', '%s') both touched; "
|
|
509
|
+
"no e2e artifact under either component's paths matches e2e_patterns"
|
|
510
|
+
% (a, b)
|
|
511
|
+
)
|
|
512
|
+
_emit_p2(a, b, desc)
|
|
513
|
+
|
|
514
|
+
return findings
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def check_layer_1(
|
|
518
|
+
diff_text: str,
|
|
519
|
+
components: Optional[dict] = None,
|
|
520
|
+
) -> list[StateFinding]:
|
|
521
|
+
"""Layer 1 heuristic: cross-component change with a signature modification.
|
|
522
|
+
|
|
523
|
+
Fires only when:
|
|
524
|
+
- detect_signature_changes finds at least one changed signature, AND
|
|
525
|
+
- group_source_files yields >=2 distinct source groups.
|
|
526
|
+
|
|
527
|
+
Returns at most ONE finding, disposition=DISMISSED (advisory, never blocks).
|
|
528
|
+
Fingerprint is deterministic: sha256 of canonical groups+sig_files string,
|
|
529
|
+
truncated to 16 hex chars, prefixed "e2e-l1:".
|
|
530
|
+
"""
|
|
531
|
+
sig_files = detect_signature_changes(diff_text)
|
|
532
|
+
if not sig_files:
|
|
533
|
+
return []
|
|
534
|
+
|
|
535
|
+
changed = get_changed_files(diff_text)
|
|
536
|
+
groups = group_source_files(changed, components)
|
|
537
|
+
|
|
538
|
+
if len(groups) < 2:
|
|
539
|
+
return []
|
|
540
|
+
|
|
541
|
+
# Defensive: sig_files should be a subset of changed; if somehow disjoint,
|
|
542
|
+
# do not emit (would be a spurious finding with no anchor in the diff).
|
|
543
|
+
if sig_files.isdisjoint(set(changed)):
|
|
544
|
+
return []
|
|
545
|
+
|
|
546
|
+
group_keys_str = "|".join(sorted(groups.keys()))
|
|
547
|
+
sig_files_str = "|".join(sorted(sig_files))
|
|
548
|
+
fp_input = (group_keys_str + "::" + sig_files_str).encode("utf-8")
|
|
549
|
+
fingerprint = "e2e-l1:" + hashlib.sha256(fp_input).hexdigest()[:16]
|
|
550
|
+
|
|
551
|
+
group_names = sorted(groups.keys())
|
|
552
|
+
sig_names = sorted(sig_files)
|
|
553
|
+
description = (
|
|
554
|
+
"cross-component change spans groups {%s}; "
|
|
555
|
+
"signature changed in {%s}; "
|
|
556
|
+
"is there an e2e test for the joined path?"
|
|
557
|
+
% (", ".join(group_names), ", ".join(sig_names))
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
finding = StateFinding(
|
|
561
|
+
id="e2e-layer1",
|
|
562
|
+
fingerprint=fingerprint,
|
|
563
|
+
source="E2E_CHECK",
|
|
564
|
+
disposition=Disposition.DISMISSED,
|
|
565
|
+
file="",
|
|
566
|
+
line_range=[],
|
|
567
|
+
description=description,
|
|
568
|
+
)
|
|
569
|
+
return [finding]
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
# ---------------------------------------------------------------------------
|
|
573
|
+
# Orchestration: load config, run both layers, deduplicate findings
|
|
574
|
+
# ---------------------------------------------------------------------------
|
|
575
|
+
|
|
576
|
+
def run_e2e_check(
|
|
577
|
+
diff_text: str,
|
|
578
|
+
repo_root: Path,
|
|
579
|
+
) -> tuple[list[StateFinding], list[str]]:
|
|
580
|
+
"""Orchestrate Layer 1 + Layer 2 e2e coverage checks.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
diff_text: unified diff text (from caller via git module).
|
|
584
|
+
repo_root: repository root path (used by Layer 2 for path resolution).
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
(findings, infra_errors) where findings is a list of StateFinding
|
|
588
|
+
with source="E2E_CHECK" and infra_errors is a list of error strings.
|
|
589
|
+
On unexpected exception, returns ([], [str(e)]) so a malformed diff
|
|
590
|
+
never crashes the review pipeline.
|
|
591
|
+
|
|
592
|
+
Dedup: if Layer 2 fires, Layer 1 is suppressed entirely. Layer 2 is
|
|
593
|
+
strictly stronger (enforceable, opt-in); Layer 1 adds no signal when
|
|
594
|
+
Layer 2 already covers the same change. This is whole-diff
|
|
595
|
+
simplification: even a partial L2 match drops the L1 finding.
|
|
596
|
+
"""
|
|
597
|
+
infra_errors: list[str] = []
|
|
598
|
+
config_error_findings: list[StateFinding] = []
|
|
599
|
+
try:
|
|
600
|
+
# Load components.yaml (Layer 2 config; None = opt-in not exercised).
|
|
601
|
+
components_dict: Optional[dict] = None
|
|
602
|
+
try:
|
|
603
|
+
components_dict = load_components_yaml(repo_root)
|
|
604
|
+
except ComponentsConfigError as cfg_err:
|
|
605
|
+
# Surface the config error as a single UNCERTAIN finding so humans
|
|
606
|
+
# see it. Layer 1 still runs (on default grouping = no config).
|
|
607
|
+
config_error_findings.append(
|
|
608
|
+
StateFinding(
|
|
609
|
+
id="e2e-layer2",
|
|
610
|
+
fingerprint="e2e-config-error",
|
|
611
|
+
source="E2E_CHECK",
|
|
612
|
+
disposition=Disposition.UNCERTAIN,
|
|
613
|
+
file="",
|
|
614
|
+
line_range=[],
|
|
615
|
+
description=str(cfg_err),
|
|
616
|
+
)
|
|
617
|
+
)
|
|
618
|
+
components_dict = None
|
|
619
|
+
|
|
620
|
+
# Extract name->paths mapping for Layer 1. group_source_files expects
|
|
621
|
+
# {name: [patterns]}, not the full YAML dict whose top-level keys
|
|
622
|
+
# ("version", "data_paths", "e2e_patterns") would be silently iterated.
|
|
623
|
+
if components_dict is None:
|
|
624
|
+
component_paths_map: Optional[dict] = None
|
|
625
|
+
else:
|
|
626
|
+
component_paths_map = {
|
|
627
|
+
name: info["paths"]
|
|
628
|
+
for name, info in components_dict["components"].items()
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
l1 = check_layer_1(diff_text, components=component_paths_map)
|
|
632
|
+
l2 = check_layer_2(diff_text, repo_root, components=components_dict)
|
|
633
|
+
|
|
634
|
+
# Dedup: Layer 2 is strictly stronger; drop Layer 1 when Layer 2 fires.
|
|
635
|
+
kept_l1 = [] if l2 else l1
|
|
636
|
+
|
|
637
|
+
return (kept_l1 + l2 + config_error_findings, infra_errors)
|
|
638
|
+
|
|
639
|
+
except Exception as exc: # noqa: BLE001
|
|
640
|
+
infra_errors.append(str(exc))
|
|
641
|
+
return ([], infra_errors)
|