rein-engine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rein/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """rein: agent-agnostic guardrails that keep AI-written code clean and safe."""
2
+
3
+ __version__ = "0.1.1"
rein/cli/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Command-line adapter around rein.core."""
rein/cli/__main__.py ADDED
@@ -0,0 +1,108 @@
1
+ """`rein` command line.
2
+
3
+ This is a thin adapter: it gathers input (files, git state), calls the pure
4
+ checks in :mod:`rein.core`, renders the resulting findings, and chooses an
5
+ exit code. All real logic lives in core so the MCP server and git hook can
6
+ reuse it unchanged.
7
+
8
+ Subcommands:
9
+ rein scan [PATH ...] Scan files/dirs for leaked secrets.
10
+ rein commit-check [-m MSG] Check a commit message + staged files.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import sys
17
+ from typing import Any
18
+
19
+ from .. import __version__
20
+ from .commands import (
21
+ cmd_baseline,
22
+ cmd_commit_check,
23
+ cmd_drift,
24
+ cmd_learn,
25
+ cmd_lint,
26
+ cmd_scan,
27
+ cmd_review,
28
+ cmd_security,
29
+ )
30
+
31
+
32
+ def _format_parent() -> argparse.ArgumentParser:
33
+ parent = argparse.ArgumentParser(add_help=False)
34
+ parent.add_argument(
35
+ "--format", choices=["text", "json", "sarif"], default="text",
36
+ help="Output format (default: text).",
37
+ )
38
+ return parent
39
+
40
+
41
+ def _add_basic_parsers(sub: Any, fmt_parent: argparse.ArgumentParser) -> None:
42
+ p_scan = sub.add_parser("scan", help="Scan files/dirs for leaked secrets.", parents=[fmt_parent])
43
+ p_scan.add_argument("paths", nargs="*", help="Files or directories (default: .).")
44
+ p_scan.add_argument("--diff", action="store_true", help="Read a unified diff from stdin and scan only added lines.")
45
+ p_scan.set_defaults(func=cmd_scan)
46
+
47
+ p_commit = sub.add_parser("commit-check", help="Check a commit message + staged files.", parents=[fmt_parent])
48
+ p_commit.add_argument("-m", "--message", help="Commit message text.")
49
+ p_commit.add_argument("-F", "--message-file", help="Read the message from a file.")
50
+ p_commit.set_defaults(func=cmd_commit_check)
51
+
52
+ p_lint = sub.add_parser("lint", help="Lint Python files.", parents=[fmt_parent])
53
+ p_lint.add_argument("paths", nargs="*", help="Files or directories (default: .).")
54
+ p_lint.add_argument("--ruff", action="store_true", help="Run ruff alongside core rules.")
55
+ p_lint.set_defaults(func=cmd_lint)
56
+
57
+ p_sec = sub.add_parser("security", help="Scan Python files for unsafe-code patterns.", parents=[fmt_parent])
58
+ p_sec.add_argument("paths", nargs="*", help="Files or directories (default: .).")
59
+ p_sec.set_defaults(func=cmd_security)
60
+
61
+
62
+ def build_parser() -> argparse.ArgumentParser:
63
+ parser = argparse.ArgumentParser(
64
+ prog="rein",
65
+ description="Guardrails that keep AI-written code clean and secure.",
66
+ )
67
+ parser.add_argument("--version", action="version", version=f"rein {__version__}")
68
+ sub = parser.add_subparsers(dest="command", required=True)
69
+
70
+ fmt_parent = _format_parent()
71
+ _add_basic_parsers(sub, fmt_parent)
72
+
73
+ p_review = sub.add_parser("review", help="Run all guardrails and return a verdict.", parents=[fmt_parent])
74
+ p_review.add_argument("paths", nargs="*", help="Files or directories (default: .).")
75
+ p_review.add_argument("--diff", action="store_true", help="Read a unified diff from stdin; review only added lines (file content read from the working tree).")
76
+ p_review.add_argument("--explain", action="store_true", help="Show how to fix each finding.")
77
+ p_review.add_argument("--baseline", help="Suppress findings recorded in this baseline file.")
78
+ p_review.add_argument("--config", help="Path to .rein.toml configuration file.")
79
+ p_review.add_argument("--bandit", action="store_true", help="Run bandit alongside core rules.")
80
+ p_review.add_argument("--gitleaks", action="store_true", help="Run gitleaks alongside core rules.")
81
+ p_review.add_argument("--semgrep", action="store_true", help="Run semgrep alongside core rules.")
82
+ p_review.set_defaults(func=cmd_review)
83
+
84
+ p_base = sub.add_parser("baseline", help="Record current findings as an accepted baseline.")
85
+ p_base.add_argument("paths", nargs="*", help="Files or directories (default: .).")
86
+ p_base.add_argument("-o", "--output", default=None, help="Output file (default: .rein-baseline.json).")
87
+ p_base.set_defaults(func=cmd_baseline)
88
+
89
+ p_learn = sub.add_parser("learn", help="Measure conventions and draft a profile.")
90
+ p_learn.add_argument("paths", nargs="*", help="Files or directories (default: .).")
91
+ p_learn.add_argument("-o", "--output", help="Output file to write draft (default: stdout).")
92
+ p_learn.set_defaults(func=cmd_learn)
93
+
94
+ p_drift = sub.add_parser("drift", help="Check convention profile drift.")
95
+ p_drift.add_argument("paths", nargs="*", help="Files or directories (default: .).")
96
+ p_drift.set_defaults(func=cmd_drift)
97
+
98
+ return parser
99
+
100
+
101
+ def main(argv: list[str] | None = None) -> int:
102
+ parser = build_parser()
103
+ args = parser.parse_args(argv)
104
+ return args.func(args)
105
+
106
+
107
+ if __name__ == "__main__":
108
+ sys.exit(main())
rein/cli/_helpers.py ADDED
@@ -0,0 +1,169 @@
1
+ """CLI helpers for `rein`.
2
+
3
+ Purely utility functions and detector wrappers for the thin CLI adapter.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import os
10
+ import subprocess
11
+ import sys
12
+ import tempfile
13
+ import tomllib
14
+ from collections.abc import Iterable
15
+ from typing import Any
16
+
17
+ from ..core import bandit, custom, gitleaks, lint, ruff, secrets, semgrep
18
+ from ..core.code import code_domain
19
+ from ..core.conventions import scan_profile
20
+ from ..core.diffs import parse_added_lines
21
+ from ..core.findings import Finding
22
+ from ..core.profile import Profile, ProfileError, parse_profile, profile_invalid_finding
23
+ from ..core.review import review_diff_findings
24
+
25
+ _SKIP_DIRS = {
26
+ ".git", "node_modules", "__pycache__", ".venv", "venv", "dist", "build",
27
+ ".pytest_cache", ".mypy_cache", ".ruff_cache",
28
+ }
29
+
30
+
31
+ def _iter_files(paths: Iterable[str]) -> Iterable[str]:
32
+ for path in paths:
33
+ if os.path.isfile(path):
34
+ yield path
35
+ elif os.path.isdir(path):
36
+ for root, dirs, files in os.walk(path):
37
+ dirs[:] = [d for d in dirs if d not in _SKIP_DIRS]
38
+ for name in files:
39
+ yield os.path.join(root, name)
40
+
41
+
42
+ def _git(*args: str) -> str:
43
+ try:
44
+ return subprocess.check_output(["git", *args], text=True, stderr=subprocess.DEVNULL)
45
+ except (subprocess.CalledProcessError, FileNotFoundError):
46
+ return ""
47
+
48
+
49
+ _DETECTOR_TIMEOUT = 60 # seconds; a detector must not hang the review
50
+
51
+
52
+ def _run_stdout_detector(cmd: list[str]) -> str:
53
+ """Run a detector that writes JSON to stdout; fail open on any failure."""
54
+ try:
55
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=_DETECTOR_TIMEOUT)
56
+ return proc.stdout
57
+ except FileNotFoundError:
58
+ print(f"rein: {cmd[0]} not found; skipping.", file=sys.stderr)
59
+ return ""
60
+ except subprocess.TimeoutExpired:
61
+ print(f"rein: {cmd[0]} timed out after {_DETECTOR_TIMEOUT}s; skipping.", file=sys.stderr)
62
+ return ""
63
+ except OSError as exc:
64
+ print(f"rein: {cmd[0]} failed ({exc}); skipping.", file=sys.stderr)
65
+ return ""
66
+
67
+
68
+ def _run_ruff(targets: list[str]) -> str:
69
+ return _run_stdout_detector(["ruff", "check", "--output-format=json", *targets])
70
+
71
+
72
+ def _run_bandit(targets: list[str]) -> str:
73
+ return _run_stdout_detector(["bandit", "-r", "-f", "json", "-q", *targets])
74
+
75
+
76
+ def _run_gitleaks(targets: list[str]) -> str:
77
+ report = ""
78
+ try:
79
+ fd, report = tempfile.mkstemp(suffix=".json")
80
+ os.close(fd)
81
+ subprocess.run(
82
+ ["gitleaks", "dir", *targets, "--report-format", "json", "--report-path", report, "--no-banner"],
83
+ capture_output=True,
84
+ timeout=_DETECTOR_TIMEOUT,
85
+ )
86
+ with open(report, encoding="utf-8") as fh:
87
+ return fh.read()
88
+ except FileNotFoundError:
89
+ print("rein: gitleaks not found; skipping gitleaks checks.", file=sys.stderr)
90
+ return ""
91
+ except subprocess.TimeoutExpired:
92
+ print(f"rein: gitleaks timed out after {_DETECTOR_TIMEOUT}s; skipping.", file=sys.stderr)
93
+ return ""
94
+ except OSError as exc:
95
+ print(f"rein: gitleaks failed ({exc}); skipping.", file=sys.stderr)
96
+ return ""
97
+ finally:
98
+ if report and os.path.exists(report):
99
+ try:
100
+ os.unlink(report)
101
+ except OSError:
102
+ pass
103
+
104
+
105
+ def _run_semgrep(targets: list[str]) -> str:
106
+ return _run_stdout_detector(["semgrep", "--json", "--quiet", "--config", "auto", *targets])
107
+
108
+
109
+ _DETECTORS = {
110
+ "ruff": (lambda t: _run_ruff(t), ruff.parse_ruff_output),
111
+ "bandit": (lambda t: _run_bandit(t), bandit.parse_bandit_output),
112
+ "gitleaks": (lambda t: _run_gitleaks(t), gitleaks.parse_gitleaks_output),
113
+ "semgrep": (lambda t: _run_semgrep(t), semgrep.parse_semgrep_output),
114
+ }
115
+
116
+
117
+ def _collect_review_findings(targets: list[str], custom_rules: tuple[custom.CustomRule, ...] = (), profile: Profile | None = None) -> list[Finding]:
118
+ findings: list[Finding] = []
119
+ for p in _iter_files(targets):
120
+ if p.endswith(".py"):
121
+ try:
122
+ with open(p, encoding="utf-8") as fh:
123
+ text = fh.read()
124
+ except (OSError, UnicodeDecodeError):
125
+ continue
126
+ findings.extend(code_domain(text, p))
127
+ findings.extend(custom.scan_custom(text, p, custom_rules))
128
+ if profile is not None:
129
+ findings.extend(scan_profile(text, p, profile))
130
+ return findings
131
+
132
+
133
+ def _collect_diff_findings(diff_text: str, custom_rules: tuple[custom.CustomRule, ...] = ()) -> list[Finding]:
134
+ findings: list[Finding] = []
135
+ for p in sorted({al.path for al in parse_added_lines(diff_text) if al.path}):
136
+ try:
137
+ with open(p, encoding="utf-8") as fh:
138
+ content = fh.read()
139
+ except (OSError, UnicodeDecodeError):
140
+ continue
141
+
142
+ def custom_domain(t: str, p_: str | None) -> list[Finding]:
143
+ return code_domain(t, p_) + custom.scan_custom(t, p_, custom_rules)
144
+
145
+ findings.extend(review_diff_findings(content, diff_text, p, domain=custom_domain))
146
+ return findings
147
+
148
+
149
+ def _load_baseline(path: str) -> set[str]:
150
+ """Fingerprints from a baseline file. Fail open: warn and return empty on
151
+ any read/parse error, so a bad baseline never crashes or hides findings."""
152
+ try:
153
+ with open(path, encoding="utf-8") as fh:
154
+ data = json.load(fh)
155
+ return {entry["fingerprint"] for entry in data.get("findings", [])}
156
+ except (OSError, UnicodeDecodeError, ValueError, KeyError, TypeError):
157
+ print(f"rein: could not read baseline '{path}'; ignoring it.", file=sys.stderr)
158
+ return set()
159
+
160
+
161
+ def _load_profile(path: str = ".rein-profile.toml") -> tuple[Profile | None, list[Finding]]:
162
+ if not os.path.exists(path):
163
+ return None, [] # missing -> silent, opt-in
164
+ try:
165
+ with open(path, "rb") as fh:
166
+ data = tomllib.load(fh)
167
+ return parse_profile(data), []
168
+ except (OSError, tomllib.TOMLDecodeError, ProfileError) as exc:
169
+ return None, [profile_invalid_finding(str(exc), path=path)]
rein/cli/commands.py ADDED
@@ -0,0 +1,194 @@
1
+ """CLI commands for `rein`.
2
+
3
+ Provides individual command implementations called by the parser.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import argparse
9
+ import datetime
10
+ import json
11
+ import os
12
+ import sys
13
+ import tomllib
14
+
15
+ from ..core import baseline, lint, ruff, secrets, security
16
+ from ..core.commits import check_commit
17
+ from ..core.config import DEFAULT_CONFIG, apply_disabled, config_from_dict
18
+ from ..core.drift import measure_drift
19
+ from ..core.findings import Finding
20
+ from ..core.learn import filter_net_new, measure_naming, measure_test_layout, render_profile_draft
21
+ from ..core.profile import parse_profile, ProfileError
22
+ from ..core.review import ReviewResult
23
+ from ..report import emit, emit_report, report_exit_code, worst_exit_code
24
+ from . import _helpers
25
+
26
+
27
+ def cmd_scan(args: argparse.Namespace) -> int:
28
+ if args.diff:
29
+ findings = secrets.scan_diff(sys.stdin.read())
30
+ else:
31
+ targets = args.paths or ["."]
32
+ findings: list[Finding] = []
33
+ for filepath in _helpers._iter_files(targets):
34
+ findings.extend(secrets.scan_file(filepath))
35
+ emit(findings, args.format)
36
+ return worst_exit_code(findings)
37
+
38
+
39
+ def cmd_commit_check(args: argparse.Namespace) -> int:
40
+ if args.message is not None:
41
+ message = args.message
42
+ elif args.message_file is not None:
43
+ with open(args.message_file, encoding="utf-8") as fh:
44
+ message = fh.read()
45
+ else:
46
+ message = _helpers._git("log", "-1", "--pretty=%B") or ""
47
+
48
+ staged = [p for p in _helpers._git("diff", "--cached", "--name-only").splitlines() if p]
49
+ findings = check_commit(message, staged)
50
+ emit(findings, args.format)
51
+ return worst_exit_code(findings)
52
+
53
+
54
+ def cmd_lint(args: argparse.Namespace) -> int:
55
+ targets = args.paths or ["."]
56
+ findings = [f for p in _helpers._iter_files(targets) if p.endswith(".py") for f in lint.lint_file(p)]
57
+ if args.ruff:
58
+ findings.extend(ruff.parse_ruff_output(_helpers._run_ruff(targets)))
59
+ emit(findings, args.format)
60
+ return worst_exit_code(findings)
61
+
62
+
63
+ def cmd_security(args: argparse.Namespace) -> int:
64
+ targets = args.paths or ["."]
65
+ findings = [f for p in _helpers._iter_files(targets) if p.endswith(".py") for f in security.scan_security_file(p)]
66
+ emit(findings, args.format)
67
+ return worst_exit_code(findings)
68
+
69
+
70
+ def cmd_review(args: argparse.Namespace) -> int:
71
+ config = DEFAULT_CONFIG
72
+ config_path = args.config or ".rein.toml"
73
+ if os.path.exists(config_path):
74
+ try:
75
+ with open(config_path, "rb") as fh:
76
+ data = tomllib.load(fh)
77
+ config = config_from_dict(data)
78
+ except (OSError, tomllib.TOMLDecodeError, ValueError) as exc:
79
+ print(f"rein: config error in '{config_path}': {exc}", file=sys.stderr)
80
+ return 1
81
+ elif args.config:
82
+ print(f"rein: could not find config '{args.config}'", file=sys.stderr)
83
+ return 1
84
+
85
+ profile, profile_findings = _helpers._load_profile()
86
+
87
+ if args.diff:
88
+ findings = _helpers._collect_diff_findings(sys.stdin.read(), config.custom_rules)
89
+ else:
90
+ findings = _helpers._collect_review_findings(args.paths or ["."], config.custom_rules, profile)
91
+
92
+ enabled = set(config.detectors)
93
+ if args.bandit:
94
+ enabled.add("bandit")
95
+ if args.gitleaks:
96
+ enabled.add("gitleaks")
97
+ if args.semgrep:
98
+ enabled.add("semgrep")
99
+
100
+ for name in sorted(enabled):
101
+ if name in _helpers._DETECTORS:
102
+ runner, parser = _helpers._DETECTORS[name]
103
+ findings.extend(parser(runner(args.paths or ["."])))
104
+ if args.baseline:
105
+ findings = baseline.apply_baseline(findings, _helpers._load_baseline(args.baseline))
106
+
107
+ findings = apply_disabled(findings, config.disabled)
108
+ findings = profile_findings + findings
109
+ result = ReviewResult.from_findings(findings, config.policy)
110
+ emit_report(result, args.format, args.explain)
111
+ return report_exit_code(result)
112
+
113
+
114
+ def cmd_baseline(args: argparse.Namespace) -> int:
115
+ findings = _helpers._collect_review_findings(args.paths or ["."])
116
+ data = {"version": 1, "findings": baseline.make_baseline(findings)}
117
+ out = args.output or ".rein-baseline.json"
118
+ with open(out, "w", encoding="utf-8") as fh:
119
+ json.dump(data, fh, indent=2)
120
+ fh.write("\n")
121
+ print(f"rein: wrote baseline with {len(data['findings'])} finding(s) to {out}")
122
+ return 0
123
+
124
+
125
+ def cmd_learn(args: argparse.Namespace) -> int:
126
+ targets = args.paths or ["."]
127
+ files = []
128
+ for p in _helpers._iter_files(targets):
129
+ if p.endswith(".py"):
130
+ try:
131
+ with open(p, encoding="utf-8") as fh:
132
+ files.append((p, fh.read()))
133
+ except (OSError, UnicodeDecodeError):
134
+ continue
135
+
136
+ measured = measure_naming([text for _, text in files])
137
+ measured.extend(measure_test_layout(files))
138
+
139
+ existing = None
140
+ if os.path.exists(".rein-profile.toml"):
141
+ try:
142
+ with open(".rein-profile.toml", "rb") as fh:
143
+ data = tomllib.load(fh)
144
+ existing = parse_profile(data)
145
+ except (OSError, tomllib.TOMLDecodeError, ProfileError) as exc:
146
+ print(f"rein: could not parse existing profile: {exc}", file=sys.stderr)
147
+ existing = None
148
+
149
+ filtered = filter_net_new(measured, existing)
150
+ draft = render_profile_draft(filtered, datetime.date.today().isoformat())
151
+
152
+ if args.output:
153
+ if os.path.exists(args.output):
154
+ print(f"rein: error: output file '{args.output}' already exists. Refusing to overwrite.", file=sys.stderr)
155
+ return 1
156
+ with open(args.output, "w", encoding="utf-8") as fh:
157
+ fh.write(draft)
158
+ print(f"rein: wrote draft profile to {args.output}", file=sys.stderr)
159
+ else:
160
+ print(draft, end="")
161
+ return 0
162
+
163
+
164
+ def cmd_drift(args: argparse.Namespace) -> int:
165
+ profile, profile_findings = _helpers._load_profile()
166
+ if profile is None:
167
+ if not os.path.exists(".rein-profile.toml"):
168
+ print("rein: no profile found; skipping drift check.", file=sys.stderr)
169
+ return 0
170
+ else:
171
+ for f in profile_findings:
172
+ print(f"rein: error: {f.message}", file=sys.stderr)
173
+ return 1
174
+
175
+ targets = args.paths or ["."]
176
+ files = []
177
+ for p in _helpers._iter_files(targets):
178
+ if p.endswith(".py"):
179
+ try:
180
+ with open(p, encoding="utf-8") as fh:
181
+ files.append((p, fh.read()))
182
+ except (OSError, UnicodeDecodeError):
183
+ continue
184
+
185
+ reports = measure_drift(profile, files)
186
+
187
+ has_drift = False
188
+ for r in reports:
189
+ status = "DRIFT" if r.drifted else "OK"
190
+ print(f"[{status}] {r.convention_id}: {r.summary} (conformance: {r.current_conformance:.2f}, ratified: {r.ratified_agreement:.2f}, sample: {r.sample_size})")
191
+ if r.drifted:
192
+ has_drift = True
193
+
194
+ return 1 if has_drift else 0
rein/core/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Pure checking logic shared by every adapter (CLI, MCP, git hook)."""
2
+
3
+ from .findings import Finding, Severity, max_severity
4
+
5
+ __all__ = ["Finding", "Severity", "max_severity"]
rein/core/bandit.py ADDED
@@ -0,0 +1,66 @@
1
+ """Parse output from the bandit security scanner.
2
+
3
+ This is a pure parsing adapter. It takes bandit's JSON output and turns it
4
+ into rein Findings. It does no I/O.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+
11
+ from .findings import Finding, Severity
12
+
13
+ _SEVERITY_MAP = {
14
+ "high": Severity.HIGH,
15
+ "medium": Severity.MEDIUM,
16
+ "low": Severity.LOW,
17
+ }
18
+
19
+
20
+ def parse_bandit_output(json_text: str) -> list[Finding]:
21
+ """Parse bandit -f json output into a list of Findings.
22
+
23
+ Tolerates bad JSON or empty input by returning an empty list.
24
+ """
25
+ if not json_text or not json_text.strip():
26
+ return []
27
+
28
+ try:
29
+ data = json.loads(json_text)
30
+ except json.JSONDecodeError:
31
+ return []
32
+
33
+ if not isinstance(data, dict):
34
+ return []
35
+
36
+ results = data.get("results")
37
+ if not isinstance(results, list):
38
+ return []
39
+
40
+ findings = []
41
+ for item in results:
42
+ if not isinstance(item, dict):
43
+ continue
44
+ test_id = item.get("test_id")
45
+ filename = item.get("filename")
46
+ line_number = item.get("line_number")
47
+
48
+ if not test_id or not filename or line_number is None:
49
+ continue
50
+
51
+ issue_text = item.get("issue_text", "")
52
+ sev_str = str(item.get("issue_severity", "")).lower()
53
+ severity = _SEVERITY_MAP.get(sev_str, Severity.LOW)
54
+
55
+ findings.append(
56
+ Finding(
57
+ rule_id=f"bandit.{test_id}",
58
+ severity=severity,
59
+ message=issue_text,
60
+ path=filename,
61
+ line=line_number,
62
+ snippet=None,
63
+ tags=("security", "bandit"),
64
+ )
65
+ )
66
+ return findings
rein/core/baseline.py ADDED
@@ -0,0 +1,45 @@
1
+ """Baseline fingerprinting: accept current findings, block only new ones.
2
+
3
+ The fingerprint excludes line numbers so a finding survives code moving around;
4
+ it keys on the rule, the file, and the finding's content (redacted snippet, or
5
+ message when there is no snippet). Stored as a hash so the file leaks nothing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+
12
+ from .findings import Finding
13
+
14
+
15
+ def fingerprint(finding: Finding) -> str:
16
+ """Stable id for a finding, independent of its line number.
17
+
18
+ Uses the redacted snippet when present (distinguishes e.g. two different
19
+ secrets in one file), else the message (carries the function/var name for
20
+ snippet-less findings). Line is deliberately excluded so the fingerprint
21
+ survives code shifting up or down.
22
+ """
23
+ content = finding.snippet if finding.snippet is not None else finding.message
24
+ raw = f"{finding.rule_id}\0{finding.path or ''}\0{content}"
25
+ return hashlib.sha256(raw.encode("utf-8")).hexdigest()
26
+
27
+
28
+ def make_baseline(findings: list[Finding]) -> list[dict]:
29
+ """Build de-duplicated, serializable baseline entries from current findings.
30
+
31
+ Each entry carries rule_id and path for human auditing plus the fingerprint
32
+ used for matching. No raw secret values are stored.
33
+ """
34
+ entries: dict[str, dict] = {}
35
+ for f in findings:
36
+ fp = fingerprint(f)
37
+ entries[fp] = {"rule_id": f.rule_id, "path": f.path, "fingerprint": fp}
38
+ return list(entries.values())
39
+
40
+
41
+ def apply_baseline(
42
+ findings: list[Finding], fingerprints: set[str],
43
+ ) -> list[Finding]:
44
+ """Drop findings whose fingerprint is in the baseline; keep the rest."""
45
+ return [f for f in findings if fingerprint(f) not in fingerprints]
rein/core/code.py ADDED
@@ -0,0 +1,41 @@
1
+ """The code domain: runs code guardrails (secrets, lint, security) over source.
2
+
3
+ This encapsulates the code-specific logic so the review engine can run it
4
+ as a generic Domain.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import ast
10
+
11
+ from .findings import Finding
12
+ from .lint import lint_text
13
+ from .secrets import scan_text
14
+ from .security import scan_security
15
+
16
+
17
+ def _safe_parse(text: str) -> ast.Module | None:
18
+ try:
19
+ return ast.parse(text)
20
+ except (SyntaxError, ValueError, RecursionError):
21
+ return None
22
+
23
+
24
+ def _python_findings(text: str, path: str | None) -> list[Finding]:
25
+ """Lint + security over text, parsing the AST exactly once on the happy path."""
26
+ tree = _safe_parse(text)
27
+ if tree is None:
28
+ return lint_text(text, path) # rare error path: lint reports it; security no-ops
29
+ return lint_text(text, path, tree=tree) + scan_security(text, path, tree=tree)
30
+
31
+
32
+ def code_domain(text: str, path: str | None = None) -> list[Finding]:
33
+ """Run all code guardrails over text.
34
+
35
+ Always runs the secret scanner. If the path implies Python (or is None),
36
+ also runs lint and security checks, parsing the AST at most once.
37
+ """
38
+ findings = list(scan_text(text, path))
39
+ if path is None or path.endswith(".py"):
40
+ findings.extend(_python_findings(text, path))
41
+ return findings