rein-engine 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rein/__init__.py +3 -0
- rein/cli/__init__.py +1 -0
- rein/cli/__main__.py +108 -0
- rein/cli/_helpers.py +169 -0
- rein/cli/commands.py +194 -0
- rein/core/__init__.py +5 -0
- rein/core/bandit.py +66 -0
- rein/core/baseline.py +45 -0
- rein/core/code.py +41 -0
- rein/core/commits.py +197 -0
- rein/core/config.py +111 -0
- rein/core/conventions.py +207 -0
- rein/core/conventions_base.py +72 -0
- rein/core/conventions_complexity.py +125 -0
- rein/core/conventions_layering.py +221 -0
- rein/core/custom.py +82 -0
- rein/core/diffs.py +84 -0
- rein/core/drift.py +153 -0
- rein/core/findings.py +77 -0
- rein/core/gitleaks.py +61 -0
- rein/core/junk.py +90 -0
- rein/core/learn.py +194 -0
- rein/core/lint.py +233 -0
- rein/core/lint_comments.py +107 -0
- rein/core/pragmas.py +43 -0
- rein/core/profile.py +228 -0
- rein/core/remediation.py +75 -0
- rein/core/review.py +125 -0
- rein/core/ruff.py +41 -0
- rein/core/sarif.py +95 -0
- rein/core/secrets.py +237 -0
- rein/core/security.py +239 -0
- rein/core/semgrep.py +87 -0
- rein/hooks/__init__.py +1 -0
- rein/hooks/precommit.py +108 -0
- rein/loop.py +85 -0
- rein/mcp/__init__.py +1 -0
- rein/mcp/server.py +71 -0
- rein/mcp/tools.py +101 -0
- rein/report.py +120 -0
- rein_engine-0.1.1.dist-info/METADATA +240 -0
- rein_engine-0.1.1.dist-info/RECORD +46 -0
- rein_engine-0.1.1.dist-info/WHEEL +4 -0
- rein_engine-0.1.1.dist-info/entry_points.txt +3 -0
- rein_engine-0.1.1.dist-info/licenses/LICENSE +201 -0
- rein_engine-0.1.1.dist-info/licenses/NOTICE +4 -0
rein/__init__.py
ADDED
rein/cli/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Command-line adapter around rein.core."""
|
rein/cli/__main__.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""`rein` command line.
|
|
2
|
+
|
|
3
|
+
This is a thin adapter: it gathers input (files, git state), calls the pure
|
|
4
|
+
checks in :mod:`rein.core`, renders the resulting findings, and chooses an
|
|
5
|
+
exit code. All real logic lives in core so the MCP server and git hook can
|
|
6
|
+
reuse it unchanged.
|
|
7
|
+
|
|
8
|
+
Subcommands:
|
|
9
|
+
rein scan [PATH ...] Scan files/dirs for leaked secrets.
|
|
10
|
+
rein commit-check [-m MSG] Check a commit message + staged files.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import sys
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from .. import __version__
|
|
20
|
+
from .commands import (
|
|
21
|
+
cmd_baseline,
|
|
22
|
+
cmd_commit_check,
|
|
23
|
+
cmd_drift,
|
|
24
|
+
cmd_learn,
|
|
25
|
+
cmd_lint,
|
|
26
|
+
cmd_scan,
|
|
27
|
+
cmd_review,
|
|
28
|
+
cmd_security,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _format_parent() -> argparse.ArgumentParser:
|
|
33
|
+
parent = argparse.ArgumentParser(add_help=False)
|
|
34
|
+
parent.add_argument(
|
|
35
|
+
"--format", choices=["text", "json", "sarif"], default="text",
|
|
36
|
+
help="Output format (default: text).",
|
|
37
|
+
)
|
|
38
|
+
return parent
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _add_basic_parsers(sub: Any, fmt_parent: argparse.ArgumentParser) -> None:
|
|
42
|
+
p_scan = sub.add_parser("scan", help="Scan files/dirs for leaked secrets.", parents=[fmt_parent])
|
|
43
|
+
p_scan.add_argument("paths", nargs="*", help="Files or directories (default: .).")
|
|
44
|
+
p_scan.add_argument("--diff", action="store_true", help="Read a unified diff from stdin and scan only added lines.")
|
|
45
|
+
p_scan.set_defaults(func=cmd_scan)
|
|
46
|
+
|
|
47
|
+
p_commit = sub.add_parser("commit-check", help="Check a commit message + staged files.", parents=[fmt_parent])
|
|
48
|
+
p_commit.add_argument("-m", "--message", help="Commit message text.")
|
|
49
|
+
p_commit.add_argument("-F", "--message-file", help="Read the message from a file.")
|
|
50
|
+
p_commit.set_defaults(func=cmd_commit_check)
|
|
51
|
+
|
|
52
|
+
p_lint = sub.add_parser("lint", help="Lint Python files.", parents=[fmt_parent])
|
|
53
|
+
p_lint.add_argument("paths", nargs="*", help="Files or directories (default: .).")
|
|
54
|
+
p_lint.add_argument("--ruff", action="store_true", help="Run ruff alongside core rules.")
|
|
55
|
+
p_lint.set_defaults(func=cmd_lint)
|
|
56
|
+
|
|
57
|
+
p_sec = sub.add_parser("security", help="Scan Python files for unsafe-code patterns.", parents=[fmt_parent])
|
|
58
|
+
p_sec.add_argument("paths", nargs="*", help="Files or directories (default: .).")
|
|
59
|
+
p_sec.set_defaults(func=cmd_security)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
63
|
+
parser = argparse.ArgumentParser(
|
|
64
|
+
prog="rein",
|
|
65
|
+
description="Guardrails that keep AI-written code clean and secure.",
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument("--version", action="version", version=f"rein {__version__}")
|
|
68
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
69
|
+
|
|
70
|
+
fmt_parent = _format_parent()
|
|
71
|
+
_add_basic_parsers(sub, fmt_parent)
|
|
72
|
+
|
|
73
|
+
p_review = sub.add_parser("review", help="Run all guardrails and return a verdict.", parents=[fmt_parent])
|
|
74
|
+
p_review.add_argument("paths", nargs="*", help="Files or directories (default: .).")
|
|
75
|
+
p_review.add_argument("--diff", action="store_true", help="Read a unified diff from stdin; review only added lines (file content read from the working tree).")
|
|
76
|
+
p_review.add_argument("--explain", action="store_true", help="Show how to fix each finding.")
|
|
77
|
+
p_review.add_argument("--baseline", help="Suppress findings recorded in this baseline file.")
|
|
78
|
+
p_review.add_argument("--config", help="Path to .rein.toml configuration file.")
|
|
79
|
+
p_review.add_argument("--bandit", action="store_true", help="Run bandit alongside core rules.")
|
|
80
|
+
p_review.add_argument("--gitleaks", action="store_true", help="Run gitleaks alongside core rules.")
|
|
81
|
+
p_review.add_argument("--semgrep", action="store_true", help="Run semgrep alongside core rules.")
|
|
82
|
+
p_review.set_defaults(func=cmd_review)
|
|
83
|
+
|
|
84
|
+
p_base = sub.add_parser("baseline", help="Record current findings as an accepted baseline.")
|
|
85
|
+
p_base.add_argument("paths", nargs="*", help="Files or directories (default: .).")
|
|
86
|
+
p_base.add_argument("-o", "--output", default=None, help="Output file (default: .rein-baseline.json).")
|
|
87
|
+
p_base.set_defaults(func=cmd_baseline)
|
|
88
|
+
|
|
89
|
+
p_learn = sub.add_parser("learn", help="Measure conventions and draft a profile.")
|
|
90
|
+
p_learn.add_argument("paths", nargs="*", help="Files or directories (default: .).")
|
|
91
|
+
p_learn.add_argument("-o", "--output", help="Output file to write draft (default: stdout).")
|
|
92
|
+
p_learn.set_defaults(func=cmd_learn)
|
|
93
|
+
|
|
94
|
+
p_drift = sub.add_parser("drift", help="Check convention profile drift.")
|
|
95
|
+
p_drift.add_argument("paths", nargs="*", help="Files or directories (default: .).")
|
|
96
|
+
p_drift.set_defaults(func=cmd_drift)
|
|
97
|
+
|
|
98
|
+
return parser
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def main(argv: list[str] | None = None) -> int:
|
|
102
|
+
parser = build_parser()
|
|
103
|
+
args = parser.parse_args(argv)
|
|
104
|
+
return args.func(args)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
sys.exit(main())
|
rein/cli/_helpers.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""CLI helpers for `rein`.
|
|
2
|
+
|
|
3
|
+
Purely utility functions and detector wrappers for the thin CLI adapter.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
import tempfile
|
|
13
|
+
import tomllib
|
|
14
|
+
from collections.abc import Iterable
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from ..core import bandit, custom, gitleaks, lint, ruff, secrets, semgrep
|
|
18
|
+
from ..core.code import code_domain
|
|
19
|
+
from ..core.conventions import scan_profile
|
|
20
|
+
from ..core.diffs import parse_added_lines
|
|
21
|
+
from ..core.findings import Finding
|
|
22
|
+
from ..core.profile import Profile, ProfileError, parse_profile, profile_invalid_finding
|
|
23
|
+
from ..core.review import review_diff_findings
|
|
24
|
+
|
|
25
|
+
_SKIP_DIRS = {
|
|
26
|
+
".git", "node_modules", "__pycache__", ".venv", "venv", "dist", "build",
|
|
27
|
+
".pytest_cache", ".mypy_cache", ".ruff_cache",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _iter_files(paths: Iterable[str]) -> Iterable[str]:
|
|
32
|
+
for path in paths:
|
|
33
|
+
if os.path.isfile(path):
|
|
34
|
+
yield path
|
|
35
|
+
elif os.path.isdir(path):
|
|
36
|
+
for root, dirs, files in os.walk(path):
|
|
37
|
+
dirs[:] = [d for d in dirs if d not in _SKIP_DIRS]
|
|
38
|
+
for name in files:
|
|
39
|
+
yield os.path.join(root, name)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _git(*args: str) -> str:
|
|
43
|
+
try:
|
|
44
|
+
return subprocess.check_output(["git", *args], text=True, stderr=subprocess.DEVNULL)
|
|
45
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
46
|
+
return ""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_DETECTOR_TIMEOUT = 60 # seconds; a detector must not hang the review
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _run_stdout_detector(cmd: list[str]) -> str:
|
|
53
|
+
"""Run a detector that writes JSON to stdout; fail open on any failure."""
|
|
54
|
+
try:
|
|
55
|
+
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=_DETECTOR_TIMEOUT)
|
|
56
|
+
return proc.stdout
|
|
57
|
+
except FileNotFoundError:
|
|
58
|
+
print(f"rein: {cmd[0]} not found; skipping.", file=sys.stderr)
|
|
59
|
+
return ""
|
|
60
|
+
except subprocess.TimeoutExpired:
|
|
61
|
+
print(f"rein: {cmd[0]} timed out after {_DETECTOR_TIMEOUT}s; skipping.", file=sys.stderr)
|
|
62
|
+
return ""
|
|
63
|
+
except OSError as exc:
|
|
64
|
+
print(f"rein: {cmd[0]} failed ({exc}); skipping.", file=sys.stderr)
|
|
65
|
+
return ""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _run_ruff(targets: list[str]) -> str:
|
|
69
|
+
return _run_stdout_detector(["ruff", "check", "--output-format=json", *targets])
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _run_bandit(targets: list[str]) -> str:
|
|
73
|
+
return _run_stdout_detector(["bandit", "-r", "-f", "json", "-q", *targets])
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _run_gitleaks(targets: list[str]) -> str:
|
|
77
|
+
report = ""
|
|
78
|
+
try:
|
|
79
|
+
fd, report = tempfile.mkstemp(suffix=".json")
|
|
80
|
+
os.close(fd)
|
|
81
|
+
subprocess.run(
|
|
82
|
+
["gitleaks", "dir", *targets, "--report-format", "json", "--report-path", report, "--no-banner"],
|
|
83
|
+
capture_output=True,
|
|
84
|
+
timeout=_DETECTOR_TIMEOUT,
|
|
85
|
+
)
|
|
86
|
+
with open(report, encoding="utf-8") as fh:
|
|
87
|
+
return fh.read()
|
|
88
|
+
except FileNotFoundError:
|
|
89
|
+
print("rein: gitleaks not found; skipping gitleaks checks.", file=sys.stderr)
|
|
90
|
+
return ""
|
|
91
|
+
except subprocess.TimeoutExpired:
|
|
92
|
+
print(f"rein: gitleaks timed out after {_DETECTOR_TIMEOUT}s; skipping.", file=sys.stderr)
|
|
93
|
+
return ""
|
|
94
|
+
except OSError as exc:
|
|
95
|
+
print(f"rein: gitleaks failed ({exc}); skipping.", file=sys.stderr)
|
|
96
|
+
return ""
|
|
97
|
+
finally:
|
|
98
|
+
if report and os.path.exists(report):
|
|
99
|
+
try:
|
|
100
|
+
os.unlink(report)
|
|
101
|
+
except OSError:
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _run_semgrep(targets: list[str]) -> str:
|
|
106
|
+
return _run_stdout_detector(["semgrep", "--json", "--quiet", "--config", "auto", *targets])
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
_DETECTORS = {
|
|
110
|
+
"ruff": (lambda t: _run_ruff(t), ruff.parse_ruff_output),
|
|
111
|
+
"bandit": (lambda t: _run_bandit(t), bandit.parse_bandit_output),
|
|
112
|
+
"gitleaks": (lambda t: _run_gitleaks(t), gitleaks.parse_gitleaks_output),
|
|
113
|
+
"semgrep": (lambda t: _run_semgrep(t), semgrep.parse_semgrep_output),
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _collect_review_findings(targets: list[str], custom_rules: tuple[custom.CustomRule, ...] = (), profile: Profile | None = None) -> list[Finding]:
|
|
118
|
+
findings: list[Finding] = []
|
|
119
|
+
for p in _iter_files(targets):
|
|
120
|
+
if p.endswith(".py"):
|
|
121
|
+
try:
|
|
122
|
+
with open(p, encoding="utf-8") as fh:
|
|
123
|
+
text = fh.read()
|
|
124
|
+
except (OSError, UnicodeDecodeError):
|
|
125
|
+
continue
|
|
126
|
+
findings.extend(code_domain(text, p))
|
|
127
|
+
findings.extend(custom.scan_custom(text, p, custom_rules))
|
|
128
|
+
if profile is not None:
|
|
129
|
+
findings.extend(scan_profile(text, p, profile))
|
|
130
|
+
return findings
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _collect_diff_findings(diff_text: str, custom_rules: tuple[custom.CustomRule, ...] = ()) -> list[Finding]:
|
|
134
|
+
findings: list[Finding] = []
|
|
135
|
+
for p in sorted({al.path for al in parse_added_lines(diff_text) if al.path}):
|
|
136
|
+
try:
|
|
137
|
+
with open(p, encoding="utf-8") as fh:
|
|
138
|
+
content = fh.read()
|
|
139
|
+
except (OSError, UnicodeDecodeError):
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
def custom_domain(t: str, p_: str | None) -> list[Finding]:
|
|
143
|
+
return code_domain(t, p_) + custom.scan_custom(t, p_, custom_rules)
|
|
144
|
+
|
|
145
|
+
findings.extend(review_diff_findings(content, diff_text, p, domain=custom_domain))
|
|
146
|
+
return findings
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _load_baseline(path: str) -> set[str]:
|
|
150
|
+
"""Fingerprints from a baseline file. Fail open: warn and return empty on
|
|
151
|
+
any read/parse error, so a bad baseline never crashes or hides findings."""
|
|
152
|
+
try:
|
|
153
|
+
with open(path, encoding="utf-8") as fh:
|
|
154
|
+
data = json.load(fh)
|
|
155
|
+
return {entry["fingerprint"] for entry in data.get("findings", [])}
|
|
156
|
+
except (OSError, UnicodeDecodeError, ValueError, KeyError, TypeError):
|
|
157
|
+
print(f"rein: could not read baseline '{path}'; ignoring it.", file=sys.stderr)
|
|
158
|
+
return set()
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _load_profile(path: str = ".rein-profile.toml") -> tuple[Profile | None, list[Finding]]:
|
|
162
|
+
if not os.path.exists(path):
|
|
163
|
+
return None, [] # missing -> silent, opt-in
|
|
164
|
+
try:
|
|
165
|
+
with open(path, "rb") as fh:
|
|
166
|
+
data = tomllib.load(fh)
|
|
167
|
+
return parse_profile(data), []
|
|
168
|
+
except (OSError, tomllib.TOMLDecodeError, ProfileError) as exc:
|
|
169
|
+
return None, [profile_invalid_finding(str(exc), path=path)]
|
rein/cli/commands.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""CLI commands for `rein`.
|
|
2
|
+
|
|
3
|
+
Provides individual command implementations called by the parser.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import datetime
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
import tomllib
|
|
14
|
+
|
|
15
|
+
from ..core import baseline, lint, ruff, secrets, security
|
|
16
|
+
from ..core.commits import check_commit
|
|
17
|
+
from ..core.config import DEFAULT_CONFIG, apply_disabled, config_from_dict
|
|
18
|
+
from ..core.drift import measure_drift
|
|
19
|
+
from ..core.findings import Finding
|
|
20
|
+
from ..core.learn import filter_net_new, measure_naming, measure_test_layout, render_profile_draft
|
|
21
|
+
from ..core.profile import parse_profile, ProfileError
|
|
22
|
+
from ..core.review import ReviewResult
|
|
23
|
+
from ..report import emit, emit_report, report_exit_code, worst_exit_code
|
|
24
|
+
from . import _helpers
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def cmd_scan(args: argparse.Namespace) -> int:
|
|
28
|
+
if args.diff:
|
|
29
|
+
findings = secrets.scan_diff(sys.stdin.read())
|
|
30
|
+
else:
|
|
31
|
+
targets = args.paths or ["."]
|
|
32
|
+
findings: list[Finding] = []
|
|
33
|
+
for filepath in _helpers._iter_files(targets):
|
|
34
|
+
findings.extend(secrets.scan_file(filepath))
|
|
35
|
+
emit(findings, args.format)
|
|
36
|
+
return worst_exit_code(findings)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cmd_commit_check(args: argparse.Namespace) -> int:
|
|
40
|
+
if args.message is not None:
|
|
41
|
+
message = args.message
|
|
42
|
+
elif args.message_file is not None:
|
|
43
|
+
with open(args.message_file, encoding="utf-8") as fh:
|
|
44
|
+
message = fh.read()
|
|
45
|
+
else:
|
|
46
|
+
message = _helpers._git("log", "-1", "--pretty=%B") or ""
|
|
47
|
+
|
|
48
|
+
staged = [p for p in _helpers._git("diff", "--cached", "--name-only").splitlines() if p]
|
|
49
|
+
findings = check_commit(message, staged)
|
|
50
|
+
emit(findings, args.format)
|
|
51
|
+
return worst_exit_code(findings)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def cmd_lint(args: argparse.Namespace) -> int:
|
|
55
|
+
targets = args.paths or ["."]
|
|
56
|
+
findings = [f for p in _helpers._iter_files(targets) if p.endswith(".py") for f in lint.lint_file(p)]
|
|
57
|
+
if args.ruff:
|
|
58
|
+
findings.extend(ruff.parse_ruff_output(_helpers._run_ruff(targets)))
|
|
59
|
+
emit(findings, args.format)
|
|
60
|
+
return worst_exit_code(findings)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def cmd_security(args: argparse.Namespace) -> int:
|
|
64
|
+
targets = args.paths or ["."]
|
|
65
|
+
findings = [f for p in _helpers._iter_files(targets) if p.endswith(".py") for f in security.scan_security_file(p)]
|
|
66
|
+
emit(findings, args.format)
|
|
67
|
+
return worst_exit_code(findings)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def cmd_review(args: argparse.Namespace) -> int:
|
|
71
|
+
config = DEFAULT_CONFIG
|
|
72
|
+
config_path = args.config or ".rein.toml"
|
|
73
|
+
if os.path.exists(config_path):
|
|
74
|
+
try:
|
|
75
|
+
with open(config_path, "rb") as fh:
|
|
76
|
+
data = tomllib.load(fh)
|
|
77
|
+
config = config_from_dict(data)
|
|
78
|
+
except (OSError, tomllib.TOMLDecodeError, ValueError) as exc:
|
|
79
|
+
print(f"rein: config error in '{config_path}': {exc}", file=sys.stderr)
|
|
80
|
+
return 1
|
|
81
|
+
elif args.config:
|
|
82
|
+
print(f"rein: could not find config '{args.config}'", file=sys.stderr)
|
|
83
|
+
return 1
|
|
84
|
+
|
|
85
|
+
profile, profile_findings = _helpers._load_profile()
|
|
86
|
+
|
|
87
|
+
if args.diff:
|
|
88
|
+
findings = _helpers._collect_diff_findings(sys.stdin.read(), config.custom_rules)
|
|
89
|
+
else:
|
|
90
|
+
findings = _helpers._collect_review_findings(args.paths or ["."], config.custom_rules, profile)
|
|
91
|
+
|
|
92
|
+
enabled = set(config.detectors)
|
|
93
|
+
if args.bandit:
|
|
94
|
+
enabled.add("bandit")
|
|
95
|
+
if args.gitleaks:
|
|
96
|
+
enabled.add("gitleaks")
|
|
97
|
+
if args.semgrep:
|
|
98
|
+
enabled.add("semgrep")
|
|
99
|
+
|
|
100
|
+
for name in sorted(enabled):
|
|
101
|
+
if name in _helpers._DETECTORS:
|
|
102
|
+
runner, parser = _helpers._DETECTORS[name]
|
|
103
|
+
findings.extend(parser(runner(args.paths or ["."])))
|
|
104
|
+
if args.baseline:
|
|
105
|
+
findings = baseline.apply_baseline(findings, _helpers._load_baseline(args.baseline))
|
|
106
|
+
|
|
107
|
+
findings = apply_disabled(findings, config.disabled)
|
|
108
|
+
findings = profile_findings + findings
|
|
109
|
+
result = ReviewResult.from_findings(findings, config.policy)
|
|
110
|
+
emit_report(result, args.format, args.explain)
|
|
111
|
+
return report_exit_code(result)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def cmd_baseline(args: argparse.Namespace) -> int:
|
|
115
|
+
findings = _helpers._collect_review_findings(args.paths or ["."])
|
|
116
|
+
data = {"version": 1, "findings": baseline.make_baseline(findings)}
|
|
117
|
+
out = args.output or ".rein-baseline.json"
|
|
118
|
+
with open(out, "w", encoding="utf-8") as fh:
|
|
119
|
+
json.dump(data, fh, indent=2)
|
|
120
|
+
fh.write("\n")
|
|
121
|
+
print(f"rein: wrote baseline with {len(data['findings'])} finding(s) to {out}")
|
|
122
|
+
return 0
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def cmd_learn(args: argparse.Namespace) -> int:
|
|
126
|
+
targets = args.paths or ["."]
|
|
127
|
+
files = []
|
|
128
|
+
for p in _helpers._iter_files(targets):
|
|
129
|
+
if p.endswith(".py"):
|
|
130
|
+
try:
|
|
131
|
+
with open(p, encoding="utf-8") as fh:
|
|
132
|
+
files.append((p, fh.read()))
|
|
133
|
+
except (OSError, UnicodeDecodeError):
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
measured = measure_naming([text for _, text in files])
|
|
137
|
+
measured.extend(measure_test_layout(files))
|
|
138
|
+
|
|
139
|
+
existing = None
|
|
140
|
+
if os.path.exists(".rein-profile.toml"):
|
|
141
|
+
try:
|
|
142
|
+
with open(".rein-profile.toml", "rb") as fh:
|
|
143
|
+
data = tomllib.load(fh)
|
|
144
|
+
existing = parse_profile(data)
|
|
145
|
+
except (OSError, tomllib.TOMLDecodeError, ProfileError) as exc:
|
|
146
|
+
print(f"rein: could not parse existing profile: {exc}", file=sys.stderr)
|
|
147
|
+
existing = None
|
|
148
|
+
|
|
149
|
+
filtered = filter_net_new(measured, existing)
|
|
150
|
+
draft = render_profile_draft(filtered, datetime.date.today().isoformat())
|
|
151
|
+
|
|
152
|
+
if args.output:
|
|
153
|
+
if os.path.exists(args.output):
|
|
154
|
+
print(f"rein: error: output file '{args.output}' already exists. Refusing to overwrite.", file=sys.stderr)
|
|
155
|
+
return 1
|
|
156
|
+
with open(args.output, "w", encoding="utf-8") as fh:
|
|
157
|
+
fh.write(draft)
|
|
158
|
+
print(f"rein: wrote draft profile to {args.output}", file=sys.stderr)
|
|
159
|
+
else:
|
|
160
|
+
print(draft, end="")
|
|
161
|
+
return 0
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def cmd_drift(args: argparse.Namespace) -> int:
|
|
165
|
+
profile, profile_findings = _helpers._load_profile()
|
|
166
|
+
if profile is None:
|
|
167
|
+
if not os.path.exists(".rein-profile.toml"):
|
|
168
|
+
print("rein: no profile found; skipping drift check.", file=sys.stderr)
|
|
169
|
+
return 0
|
|
170
|
+
else:
|
|
171
|
+
for f in profile_findings:
|
|
172
|
+
print(f"rein: error: {f.message}", file=sys.stderr)
|
|
173
|
+
return 1
|
|
174
|
+
|
|
175
|
+
targets = args.paths or ["."]
|
|
176
|
+
files = []
|
|
177
|
+
for p in _helpers._iter_files(targets):
|
|
178
|
+
if p.endswith(".py"):
|
|
179
|
+
try:
|
|
180
|
+
with open(p, encoding="utf-8") as fh:
|
|
181
|
+
files.append((p, fh.read()))
|
|
182
|
+
except (OSError, UnicodeDecodeError):
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
reports = measure_drift(profile, files)
|
|
186
|
+
|
|
187
|
+
has_drift = False
|
|
188
|
+
for r in reports:
|
|
189
|
+
status = "DRIFT" if r.drifted else "OK"
|
|
190
|
+
print(f"[{status}] {r.convention_id}: {r.summary} (conformance: {r.current_conformance:.2f}, ratified: {r.ratified_agreement:.2f}, sample: {r.sample_size})")
|
|
191
|
+
if r.drifted:
|
|
192
|
+
has_drift = True
|
|
193
|
+
|
|
194
|
+
return 1 if has_drift else 0
|
rein/core/__init__.py
ADDED
rein/core/bandit.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Parse output from the bandit security scanner.
|
|
2
|
+
|
|
3
|
+
This is a pure parsing adapter. It takes bandit's JSON output and turns it
|
|
4
|
+
into rein Findings. It does no I/O.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
from .findings import Finding, Severity
|
|
12
|
+
|
|
13
|
+
_SEVERITY_MAP = {
|
|
14
|
+
"high": Severity.HIGH,
|
|
15
|
+
"medium": Severity.MEDIUM,
|
|
16
|
+
"low": Severity.LOW,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_bandit_output(json_text: str) -> list[Finding]:
|
|
21
|
+
"""Parse bandit -f json output into a list of Findings.
|
|
22
|
+
|
|
23
|
+
Tolerates bad JSON or empty input by returning an empty list.
|
|
24
|
+
"""
|
|
25
|
+
if not json_text or not json_text.strip():
|
|
26
|
+
return []
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
data = json.loads(json_text)
|
|
30
|
+
except json.JSONDecodeError:
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
if not isinstance(data, dict):
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
results = data.get("results")
|
|
37
|
+
if not isinstance(results, list):
|
|
38
|
+
return []
|
|
39
|
+
|
|
40
|
+
findings = []
|
|
41
|
+
for item in results:
|
|
42
|
+
if not isinstance(item, dict):
|
|
43
|
+
continue
|
|
44
|
+
test_id = item.get("test_id")
|
|
45
|
+
filename = item.get("filename")
|
|
46
|
+
line_number = item.get("line_number")
|
|
47
|
+
|
|
48
|
+
if not test_id or not filename or line_number is None:
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
issue_text = item.get("issue_text", "")
|
|
52
|
+
sev_str = str(item.get("issue_severity", "")).lower()
|
|
53
|
+
severity = _SEVERITY_MAP.get(sev_str, Severity.LOW)
|
|
54
|
+
|
|
55
|
+
findings.append(
|
|
56
|
+
Finding(
|
|
57
|
+
rule_id=f"bandit.{test_id}",
|
|
58
|
+
severity=severity,
|
|
59
|
+
message=issue_text,
|
|
60
|
+
path=filename,
|
|
61
|
+
line=line_number,
|
|
62
|
+
snippet=None,
|
|
63
|
+
tags=("security", "bandit"),
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
return findings
|
rein/core/baseline.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Baseline fingerprinting: accept current findings, block only new ones.
|
|
2
|
+
|
|
3
|
+
The fingerprint excludes line numbers so a finding survives code moving around;
|
|
4
|
+
it keys on the rule, the file, and the finding's content (redacted snippet, or
|
|
5
|
+
message when there is no snippet). Stored as a hash so the file leaks nothing.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
|
|
12
|
+
from .findings import Finding
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def fingerprint(finding: Finding) -> str:
|
|
16
|
+
"""Stable id for a finding, independent of its line number.
|
|
17
|
+
|
|
18
|
+
Uses the redacted snippet when present (distinguishes e.g. two different
|
|
19
|
+
secrets in one file), else the message (carries the function/var name for
|
|
20
|
+
snippet-less findings). Line is deliberately excluded so the fingerprint
|
|
21
|
+
survives code shifting up or down.
|
|
22
|
+
"""
|
|
23
|
+
content = finding.snippet if finding.snippet is not None else finding.message
|
|
24
|
+
raw = f"{finding.rule_id}\0{finding.path or ''}\0{content}"
|
|
25
|
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def make_baseline(findings: list[Finding]) -> list[dict]:
|
|
29
|
+
"""Build de-duplicated, serializable baseline entries from current findings.
|
|
30
|
+
|
|
31
|
+
Each entry carries rule_id and path for human auditing plus the fingerprint
|
|
32
|
+
used for matching. No raw secret values are stored.
|
|
33
|
+
"""
|
|
34
|
+
entries: dict[str, dict] = {}
|
|
35
|
+
for f in findings:
|
|
36
|
+
fp = fingerprint(f)
|
|
37
|
+
entries[fp] = {"rule_id": f.rule_id, "path": f.path, "fingerprint": fp}
|
|
38
|
+
return list(entries.values())
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def apply_baseline(
|
|
42
|
+
findings: list[Finding], fingerprints: set[str],
|
|
43
|
+
) -> list[Finding]:
|
|
44
|
+
"""Drop findings whose fingerprint is in the baseline; keep the rest."""
|
|
45
|
+
return [f for f in findings if fingerprint(f) not in fingerprints]
|
rein/core/code.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""The code domain: runs code guardrails (secrets, lint, security) over source.
|
|
2
|
+
|
|
3
|
+
This encapsulates the code-specific logic so the review engine can run it
|
|
4
|
+
as a generic Domain.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import ast
|
|
10
|
+
|
|
11
|
+
from .findings import Finding
|
|
12
|
+
from .lint import lint_text
|
|
13
|
+
from .secrets import scan_text
|
|
14
|
+
from .security import scan_security
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _safe_parse(text: str) -> ast.Module | None:
|
|
18
|
+
try:
|
|
19
|
+
return ast.parse(text)
|
|
20
|
+
except (SyntaxError, ValueError, RecursionError):
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _python_findings(text: str, path: str | None) -> list[Finding]:
|
|
25
|
+
"""Lint + security over text, parsing the AST exactly once on the happy path."""
|
|
26
|
+
tree = _safe_parse(text)
|
|
27
|
+
if tree is None:
|
|
28
|
+
return lint_text(text, path) # rare error path: lint reports it; security no-ops
|
|
29
|
+
return lint_text(text, path, tree=tree) + scan_security(text, path, tree=tree)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def code_domain(text: str, path: str | None = None) -> list[Finding]:
|
|
33
|
+
"""Run all code guardrails over text.
|
|
34
|
+
|
|
35
|
+
Always runs the secret scanner. If the path implies Python (or is None),
|
|
36
|
+
also runs lint and security checks, parsing the AST at most once.
|
|
37
|
+
"""
|
|
38
|
+
findings = list(scan_text(text, path))
|
|
39
|
+
if path is None or path.endswith(".py"):
|
|
40
|
+
findings.extend(_python_findings(text, path))
|
|
41
|
+
return findings
|