auditr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auditor/__init__.py +45 -0
- auditor/aggregate.py +93 -0
- auditor/ast_util.py +85 -0
- auditor/baseline.py +69 -0
- auditor/builtins.py +20 -0
- auditor/cli/__init__.py +33 -0
- auditor/cli/__main__.py +6 -0
- auditor/cli/aggregate.py +27 -0
- auditor/cli/apps.py +18 -0
- auditor/cli/config.py +28 -0
- auditor/cli/crossfile.py +29 -0
- auditor/cli/discover.py +43 -0
- auditor/cli/helpers.py +106 -0
- auditor/cli/ignore.py +130 -0
- auditor/cli/index.py +66 -0
- auditor/cli/manifest.py +22 -0
- auditor/cli/options.py +181 -0
- auditor/cli/plugins.py +24 -0
- auditor/cli/report.py +54 -0
- auditor/cli/rules.py +66 -0
- auditor/cli/scan.py +265 -0
- auditor/cli/summary.py +91 -0
- auditor/config.py +530 -0
- auditor/crossfile.py +177 -0
- auditor/dead_code.py +80 -0
- auditor/discovery.py +249 -0
- auditor/engine.py +553 -0
- auditor/fingerprints.py +27 -0
- auditor/fixture_usage.py +42 -0
- auditor/ignores.py +85 -0
- auditor/index.py +697 -0
- auditor/languages/__init__.py +0 -0
- auditor/languages/base.py +298 -0
- auditor/languages/bash/__init__.py +2 -0
- auditor/languages/bash/auditor.py +42 -0
- auditor/languages/bash/base.py +62 -0
- auditor/languages/bash/detectors/__init__.py +6 -0
- auditor/languages/bash/detectors/malware.py +209 -0
- auditor/languages/bash/detectors/secrets.py +10 -0
- auditor/languages/malware_signatures.py +43 -0
- auditor/languages/malware_sweeps.py +73 -0
- auditor/languages/manifest/__init__.py +3 -0
- auditor/languages/manifest/auditor.py +43 -0
- auditor/languages/manifest/base.py +85 -0
- auditor/languages/manifest/detectors/__init__.py +3 -0
- auditor/languages/manifest/detectors/supply_chain.py +43 -0
- auditor/languages/python/__init__.py +0 -0
- auditor/languages/python/auditor.py +89 -0
- auditor/languages/python/detectors/__init__.py +18 -0
- auditor/languages/python/detectors/_util.py +135 -0
- auditor/languages/python/detectors/async_rules.py +335 -0
- auditor/languages/python/detectors/config_rules.py +91 -0
- auditor/languages/python/detectors/correctness.py +188 -0
- auditor/languages/python/detectors/malware.py +347 -0
- auditor/languages/python/detectors/oop.py +697 -0
- auditor/languages/python/detectors/secrets.py +10 -0
- auditor/languages/python/detectors/security/__init__.py +9 -0
- auditor/languages/python/detectors/security/_base.py +32 -0
- auditor/languages/python/detectors/security/crypto.py +177 -0
- auditor/languages/python/detectors/security/deserialize.py +119 -0
- auditor/languages/python/detectors/security/framework.py +174 -0
- auditor/languages/python/detectors/security/injection.py +226 -0
- auditor/languages/python/detectors/security/network.py +230 -0
- auditor/languages/python/detectors/sqlalchemy_rules.py +428 -0
- auditor/languages/python/detectors/style.py +141 -0
- auditor/languages/python/detectors/suggestions.py +203 -0
- auditor/languages/python/detectors/supply_chain.py +105 -0
- auditor/languages/python/detectors/testing.py +442 -0
- auditor/languages/python/detectors/typing_rules.py +115 -0
- auditor/languages/python/detectors/xfile.py +62 -0
- auditor/languages/python/shapes.py +330 -0
- auditor/languages/secret_sweeps.py +37 -0
- auditor/languages/sweep.py +65 -0
- auditor/languages/typescript/__init__.py +1 -0
- auditor/languages/typescript/auditor.py +68 -0
- auditor/languages/typescript/base.py +47 -0
- auditor/languages/typescript/detectors/__init__.py +14 -0
- auditor/languages/typescript/detectors/a11y.py +323 -0
- auditor/languages/typescript/detectors/complexity.py +120 -0
- auditor/languages/typescript/detectors/design_system.py +165 -0
- auditor/languages/typescript/detectors/dry.py +191 -0
- auditor/languages/typescript/detectors/malware.py +212 -0
- auditor/languages/typescript/detectors/react.py +306 -0
- auditor/languages/typescript/detectors/secrets.py +10 -0
- auditor/languages/typescript/detectors/security.py +155 -0
- auditor/languages/typescript/detectors/style.py +40 -0
- auditor/languages/typescript/detectors/xfile.py +37 -0
- auditor/languages/typescript/manifest.py +47 -0
- auditor/languages/typescript/nodes.py +203 -0
- auditor/languages/typescript/parser.py +42 -0
- auditor/languages/typescript/shapes.py +162 -0
- auditor/logconfig.py +25 -0
- auditor/mcp_server.py +239 -0
- auditor/models.py +235 -0
- auditor/paths.py +29 -0
- auditor/plugins.py +78 -0
- auditor/profiles/__init__.py +0 -0
- auditor/profiles/all-strict.toml +11 -0
- auditor/profiles/base.toml +55 -0
- auditor/profiles/pydantic.toml +2 -0
- auditor/profiles/strict.toml +5 -0
- auditor/registry.py +132 -0
- auditor/reporters/__init__.py +11 -0
- auditor/reporters/base.py +47 -0
- auditor/reporters/html_reporter.py +324 -0
- auditor/reporters/json_reporter.py +62 -0
- auditor/reporters/markdown_reporter.py +53 -0
- auditor/reporters/sarif_reporter.py +74 -0
- auditor/roles.py +123 -0
- auditor/secrets_signatures.py +253 -0
- auditor/serve.py +91 -0
- auditor/settings_cohesion.py +109 -0
- auditor/skips.py +126 -0
- auditr-0.1.0.dist-info/METADATA +721 -0
- auditr-0.1.0.dist-info/RECORD +118 -0
- auditr-0.1.0.dist-info/WHEEL +4 -0
- auditr-0.1.0.dist-info/entry_points.txt +3 -0
- auditr-0.1.0.dist-info/licenses/LICENSE +21 -0
auditor/__init__.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""auditor — a token-efficient repo auditor for coding agents.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
from auditor import scan_file, scan_path, load_config, render, ScanEngine, IndexStore
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
|
|
9
|
+
from auditor.config import AuditorSettings, ResolvedConfig, load_config
|
|
10
|
+
from auditor.engine import ScanEngine, audit_target
|
|
11
|
+
from auditor.index import IndexStore
|
|
12
|
+
from auditor.models import (
|
|
13
|
+
Category,
|
|
14
|
+
FileRole,
|
|
15
|
+
Finding,
|
|
16
|
+
IndexEntry,
|
|
17
|
+
ManifestEntry,
|
|
18
|
+
ScanResult,
|
|
19
|
+
Severity,
|
|
20
|
+
VerdictKind,
|
|
21
|
+
)
|
|
22
|
+
from auditor.reporters import render
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"AuditorSettings",
|
|
26
|
+
"Category",
|
|
27
|
+
"FileRole",
|
|
28
|
+
"Finding",
|
|
29
|
+
"IndexEntry",
|
|
30
|
+
"IndexStore",
|
|
31
|
+
"ManifestEntry",
|
|
32
|
+
"ResolvedConfig",
|
|
33
|
+
"ScanEngine",
|
|
34
|
+
"ScanResult",
|
|
35
|
+
"Severity",
|
|
36
|
+
"VerdictKind",
|
|
37
|
+
"audit_target",
|
|
38
|
+
"load_config",
|
|
39
|
+
"render",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
__version__ = "0.1.0"
|
|
43
|
+
|
|
44
|
+
# Stay silent when embedded / under MCP; the CLI's logconfig.configure() re-enables it.
|
|
45
|
+
logger.disable("auditor")
|
auditor/aggregate.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Roll up the index into a single AUDIT.md (replaces the prior ad-hoc /tmp/aggregate.py).
|
|
2
|
+
|
|
3
|
+
Reads cached findings from the index — no re-scan — so `auditor aggregate` is cheap and
|
|
4
|
+
reflects the last scan of the registered scope. Persistent ignores are applied here too, so the
|
|
5
|
+
consolidated report matches what `scan` shows.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from auditor.ignores import IgnoreList
|
|
11
|
+
from auditor.index import IndexStore
|
|
12
|
+
from auditor.models import FileRole, ScanResult, Severity, severity_rank
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AuditAggregator:
|
|
16
|
+
"""Builds the consolidated AUDIT.md from the index for a scope."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, index: IndexStore) -> None:
|
|
19
|
+
self.index = index
|
|
20
|
+
|
|
21
|
+
async def _results(self) -> list[ScanResult]:
|
|
22
|
+
"""Reconstruct per-file results from the index and drop ignored findings."""
|
|
23
|
+
entries = await self.index.files()
|
|
24
|
+
grouped = await self.index.findings_grouped()
|
|
25
|
+
results = [
|
|
26
|
+
ScanResult(
|
|
27
|
+
file=e.path,
|
|
28
|
+
language=e.language,
|
|
29
|
+
role=FileRole(e.role),
|
|
30
|
+
findings=grouped.get(e.path, []),
|
|
31
|
+
)
|
|
32
|
+
for e in entries
|
|
33
|
+
]
|
|
34
|
+
IgnoreList.from_rows(await self.index.ignores()).filter(results)
|
|
35
|
+
return results
|
|
36
|
+
|
|
37
|
+
async def markdown(self) -> str:
|
|
38
|
+
return _render(await self._results())
|
|
39
|
+
|
|
40
|
+
async def write(self, out_path: Path) -> Path:
|
|
41
|
+
out_path.write_text(await self.markdown())
|
|
42
|
+
return out_path
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _render(results: list[ScanResult]) -> str:
|
|
46
|
+
totals = {s: 0 for s in Severity}
|
|
47
|
+
for r in results:
|
|
48
|
+
for sev, n in r.counts.items():
|
|
49
|
+
totals[sev] += n
|
|
50
|
+
flagged = sorted(
|
|
51
|
+
(r for r in results if r.findings),
|
|
52
|
+
key=lambda r: -len(r.findings),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
lines = [
|
|
56
|
+
"# Audit — consolidated report",
|
|
57
|
+
"",
|
|
58
|
+
f"Scope: {len(results)} files audited.",
|
|
59
|
+
"",
|
|
60
|
+
(
|
|
61
|
+
f"**Totals — blocking: {totals[Severity.BLOCKING]} · high: {totals[Severity.HIGH]} · "
|
|
62
|
+
f"medium: {totals[Severity.MEDIUM]} · low: {totals[Severity.LOW]} · "
|
|
63
|
+
f"suggestion: {totals[Severity.SUGGESTION]}**"
|
|
64
|
+
),
|
|
65
|
+
"",
|
|
66
|
+
"## Files with findings (most severe first)",
|
|
67
|
+
"",
|
|
68
|
+
"| File | Role | Blocking | High | Medium | Low |",
|
|
69
|
+
"| --- | --- | --- | --- | --- | --- |",
|
|
70
|
+
]
|
|
71
|
+
if flagged:
|
|
72
|
+
for r in flagged:
|
|
73
|
+
c = r.counts
|
|
74
|
+
lines.append(
|
|
75
|
+
f"| `{r.file}` | {r.role.value} | {c[Severity.BLOCKING]} | "
|
|
76
|
+
f"{c[Severity.HIGH]} | {c[Severity.MEDIUM]} | {c[Severity.LOW]} |"
|
|
77
|
+
)
|
|
78
|
+
else:
|
|
79
|
+
lines.append("| _(none)_ | | 0 | 0 | 0 | 0 |")
|
|
80
|
+
lines.append("")
|
|
81
|
+
|
|
82
|
+
candidates = [
|
|
83
|
+
f for r in results for f in r.findings if f.verdict_kind.value == "candidate"
|
|
84
|
+
]
|
|
85
|
+
if candidates:
|
|
86
|
+
lines += ["## Candidates to judge", ""]
|
|
87
|
+
for f in sorted(
|
|
88
|
+
candidates, key=lambda f: (-severity_rank(f.severity), f.rule_id)
|
|
89
|
+
):
|
|
90
|
+
lines.append(f"- **{f.severity.value}** `{f.rule_id}` — {f.message}")
|
|
91
|
+
lines.append("")
|
|
92
|
+
|
|
93
|
+
return "\n".join(lines).rstrip() + "\n"
|
auditor/ast_util.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Low-level Python-AST helpers shared across the auditor (manifest construction,
|
|
2
|
+
detectors). Pure functions over ``ast`` nodes — no project imports, no state."""
|
|
3
|
+
|
|
4
|
+
import ast
|
|
5
|
+
|
|
6
|
+
_FuncDef = ast.FunctionDef | ast.AsyncFunctionDef
|
|
7
|
+
_PYDANTIC_BASES = frozenset({"BaseModel", "BaseSettings"})
|
|
8
|
+
_UNTYPED_DICT_RETURNS = ("dict[str, Any]", "dict[str, typing.Any]")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def dotted(node: ast.AST) -> str:
|
|
12
|
+
"""Best-effort dotted name for a Name/Attribute/Call func, else an unparse fallback."""
|
|
13
|
+
if isinstance(node, ast.Call):
|
|
14
|
+
return dotted(node.func)
|
|
15
|
+
if isinstance(node, ast.Name):
|
|
16
|
+
return node.id
|
|
17
|
+
if isinstance(node, ast.Attribute):
|
|
18
|
+
base = dotted(node.value)
|
|
19
|
+
return f"{base}.{node.attr}" if base else node.attr
|
|
20
|
+
try:
|
|
21
|
+
return ast.unparse(node)
|
|
22
|
+
except (ValueError, TypeError):
|
|
23
|
+
return ""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def base_name(node: ast.AST) -> str:
|
|
27
|
+
"""The final segment of a dotted name — a class base, callee, or decorator: ``a.b.c`` -> ``c``,
|
|
28
|
+
``f`` -> ``f``. Receiver-blind, for matching a node against a set of bare names."""
|
|
29
|
+
return dotted(node).rsplit(".", 1)[-1]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def decorator_names(node: ast.ClassDef | _FuncDef) -> tuple[str, ...]:
|
|
33
|
+
return tuple(
|
|
34
|
+
dotted(d.func if isinstance(d, ast.Call) else d) for d in node.decorator_list
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def class_field_count(cls: ast.ClassDef) -> int:
|
|
39
|
+
"""Annotated class-level attributes — a proxy for Pydantic/dataclass field count."""
|
|
40
|
+
return sum(1 for stmt in cls.body if isinstance(stmt, ast.AnnAssign))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def method_line_set(tree: ast.AST) -> set[int]:
|
|
44
|
+
"""Line numbers of every method — a function defined directly in a class body."""
|
|
45
|
+
return {
|
|
46
|
+
sub.lineno
|
|
47
|
+
for node in ast.walk(tree)
|
|
48
|
+
if isinstance(node, ast.ClassDef)
|
|
49
|
+
for sub in node.body
|
|
50
|
+
if isinstance(sub, _FuncDef)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def function_flags(fn: _FuncDef, *, is_method: bool) -> tuple[str, ...]:
|
|
55
|
+
flags: list[str] = []
|
|
56
|
+
if isinstance(fn, ast.AsyncFunctionDef):
|
|
57
|
+
flags.append("ASYNC")
|
|
58
|
+
if fn.returns is None and fn.name not in ("__init__", "__post_init__"):
|
|
59
|
+
flags.append("UNTYPED_RETURN")
|
|
60
|
+
positional = fn.args.posonlyargs + fn.args.args
|
|
61
|
+
untyped = [
|
|
62
|
+
p
|
|
63
|
+
for i, p in enumerate(positional)
|
|
64
|
+
if p.annotation is None and not (is_method and i == 0)
|
|
65
|
+
]
|
|
66
|
+
if untyped or any(p.annotation is None for p in fn.args.kwonlyargs):
|
|
67
|
+
flags.append("UNTYPED_ARGS")
|
|
68
|
+
if fn.returns is not None and dotted(fn.returns) in _UNTYPED_DICT_RETURNS:
|
|
69
|
+
flags.append("UNTYPED_DICT_RETURN")
|
|
70
|
+
return tuple(flags)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def class_flags(cls: ast.ClassDef) -> tuple[str, ...]:
|
|
74
|
+
flags: list[str] = []
|
|
75
|
+
base_names = {base_name(b) for b in cls.bases}
|
|
76
|
+
if base_names & _PYDANTIC_BASES:
|
|
77
|
+
flags.append("BASEMODEL")
|
|
78
|
+
if "dataclass" in {d.split(".")[-1] for d in decorator_names(cls)}:
|
|
79
|
+
flags.append("DATACLASS")
|
|
80
|
+
methods = [s for s in cls.body if isinstance(s, _FuncDef)]
|
|
81
|
+
if methods and all(
|
|
82
|
+
any(base_name(d) == "staticmethod" for d in m.decorator_list) for m in methods
|
|
83
|
+
):
|
|
84
|
+
flags.append("ALL_STATICMETHODS")
|
|
85
|
+
return tuple(flags)
|
auditor/baseline.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Baseline support: snapshot today's findings, then on later scans report only the *new* ones.
|
|
2
|
+
|
|
3
|
+
A baseline stores a line-independent fingerprint per finding — ``(file, rule_id, hash(evidence))``
|
|
4
|
+
— so a finding survives line shifts (edits elsewhere in the file) but a genuinely new issue (new
|
|
5
|
+
offending text) is still surfaced. This is what makes the auditor adoptable on a large existing
|
|
6
|
+
repo: accept the current findings as the baseline, then gate only on what you add.
|
|
7
|
+
|
|
8
|
+
Fingerprints are stored as a **multiset** (one entry per occurrence), so when several distinct
|
|
9
|
+
findings in a file legitimately share a snippet — e.g. three untyped ``def __init__(`` — all three
|
|
10
|
+
are recorded and a fourth, newly-added one still surfaces. A set would collapse them and silently
|
|
11
|
+
hide the new occurrence. ``filter`` therefore hides up to the recorded count per fingerprint."""
|
|
12
|
+
|
|
13
|
+
import hashlib
|
|
14
|
+
from collections import Counter
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
18
|
+
|
|
19
|
+
from auditor.models import Finding, ScanResult
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def finding_fingerprint(file: str, finding: Finding) -> str:
|
|
23
|
+
"""A stable, line-independent identity: file + rule + a hash of the offending text. Survives
|
|
24
|
+
line moves; changed/new offending text yields a new fingerprint (reported as new)."""
|
|
25
|
+
evidence = (finding.evidence or "").strip()
|
|
26
|
+
return hashlib.sha256(
|
|
27
|
+
f"{file}\x00{finding.rule_id}\x00{evidence}".encode()
|
|
28
|
+
).hexdigest()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Baseline(BaseModel):
|
|
32
|
+
"""A recorded multiset of accepted findings, by fingerprint. Stored as JSON — ``fingerprints``
|
|
33
|
+
is sorted with one entry per baselined occurrence (repeats are meaningful)."""
|
|
34
|
+
|
|
35
|
+
version: int = 1
|
|
36
|
+
fingerprints: list[str] = Field(default_factory=list) # sorted; one entry per occurrence
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def from_results(cls, results: list[ScanResult]) -> "Baseline":
|
|
40
|
+
fps = [finding_fingerprint(r.file, f) for r in results for f in r.findings]
|
|
41
|
+
return cls(fingerprints=sorted(fps))
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def load(cls, path: Path) -> "Baseline":
|
|
45
|
+
return cls.model_validate_json(path.read_text(encoding="utf-8"))
|
|
46
|
+
|
|
47
|
+
def write(self, path: Path) -> int:
|
|
48
|
+
"""Persist the baseline; return the number of finding occurrences recorded."""
|
|
49
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
path.write_text(self.model_dump_json(indent=2) + "\n", encoding="utf-8")
|
|
51
|
+
return len(self.fingerprints)
|
|
52
|
+
|
|
53
|
+
def filter(self, results: list[ScanResult]) -> int:
|
|
54
|
+
"""Drop already-baselined findings from each result in place; return how many were hidden.
|
|
55
|
+
Hides up to the recorded count per fingerprint, so an occurrence beyond what was baselined
|
|
56
|
+
(e.g. a newly-added finding sharing a snippet with baselined ones) still surfaces."""
|
|
57
|
+
budget = Counter(self.fingerprints)
|
|
58
|
+
hidden = 0
|
|
59
|
+
for result in results:
|
|
60
|
+
kept: list[Finding] = []
|
|
61
|
+
for finding in result.findings:
|
|
62
|
+
fp = finding_fingerprint(result.file, finding)
|
|
63
|
+
if budget[fp] > 0:
|
|
64
|
+
budget[fp] -= 1
|
|
65
|
+
hidden += 1
|
|
66
|
+
else:
|
|
67
|
+
kept.append(finding)
|
|
68
|
+
result.findings = kept
|
|
69
|
+
return hidden
|
auditor/builtins.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Importing this module registers all built-in languages and detectors.
|
|
2
|
+
|
|
3
|
+
Centralizes the one bootstrap import so other modules can depend on registration via a
|
|
4
|
+
plain top-level import (no inline imports, no import cycle: ``PythonAuditor`` references
|
|
5
|
+
``ResolvedConfig`` only under TYPE_CHECKING).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import auditor.languages.bash.auditor # noqa: F401 (registers BashAuditor + shell detectors)
|
|
9
|
+
import auditor.languages.manifest.auditor # noqa: F401 (registers ManifestAuditor + supply-chain detectors)
|
|
10
|
+
import auditor.languages.python.auditor # noqa: F401 (registers PythonAuditor + every detector)
|
|
11
|
+
import auditor.reporters # noqa: F401 (registers json/sarif/markdown reporters)
|
|
12
|
+
|
|
13
|
+
# TypeScript support needs the optional `ts` extra (tree-sitter). Register it when present;
|
|
14
|
+
# without the extra the core Python auditor still works.
|
|
15
|
+
try:
|
|
16
|
+
import auditor.languages.typescript.auditor # noqa: F401 (registers TypeScriptAuditor + TS detectors)
|
|
17
|
+
|
|
18
|
+
TYPESCRIPT_AVAILABLE = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
TYPESCRIPT_AVAILABLE = False
|
auditor/cli/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Command-line interface (typer), split one file per command. ``apps`` holds the root ``app``
|
|
2
|
+
and the status console, ``options`` / ``helpers`` / ``summary`` the shared pieces, and each
|
|
3
|
+
command module owns its handler (and its sub-app, when it has one). This package ``__init__`` is
|
|
4
|
+
the composition root: importing the root commands registers them, and the sub-apps are mounted
|
|
5
|
+
here. The ``auditor.cli:app`` entry point resolves to the ``app`` exported below.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from auditor.cli import ( # noqa: F401 — imported for their @app.command() side effects
|
|
9
|
+
aggregate,
|
|
10
|
+
crossfile,
|
|
11
|
+
discover,
|
|
12
|
+
manifest,
|
|
13
|
+
report,
|
|
14
|
+
scan,
|
|
15
|
+
)
|
|
16
|
+
from auditor.cli.apps import app
|
|
17
|
+
from auditor.cli.config import config_app
|
|
18
|
+
from auditor.cli.ignore import ignore_app
|
|
19
|
+
from auditor.cli.index import index_app
|
|
20
|
+
from auditor.cli.plugins import plugins_app
|
|
21
|
+
from auditor.cli.rules import rules_app
|
|
22
|
+
from auditor.languages.python.auditor import PythonAuditor
|
|
23
|
+
|
|
24
|
+
app.add_typer(index_app, name="index")
|
|
25
|
+
app.add_typer(ignore_app, name="ignore")
|
|
26
|
+
app.add_typer(config_app, name="config")
|
|
27
|
+
app.add_typer(rules_app, name="rules")
|
|
28
|
+
app.add_typer(plugins_app, name="plugins")
|
|
29
|
+
|
|
30
|
+
# ensure all built-in languages register for discovery's suffix list
|
|
31
|
+
_ = PythonAuditor
|
|
32
|
+
|
|
33
|
+
__all__ = ["app"]
|
auditor/cli/__main__.py
ADDED
auditor/cli/aggregate.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""``auditor aggregate`` — roll up the index into AUDIT.md."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from auditor.aggregate import AuditAggregator
|
|
8
|
+
from auditor.cli.apps import app
|
|
9
|
+
from auditor.cli.helpers import _open_index, _run
|
|
10
|
+
from auditor.cli.options import AggregateOut, DirTarget
|
|
11
|
+
from auditor.discovery import find_root
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.command()
|
|
15
|
+
def aggregate(
|
|
16
|
+
target: DirTarget = Path("."),
|
|
17
|
+
out: AggregateOut = Path("AUDIT.md"),
|
|
18
|
+
) -> None:
|
|
19
|
+
"""Roll up the index into AUDIT.md (run `scan --incremental` first)."""
|
|
20
|
+
root = find_root(target)
|
|
21
|
+
path = _run(_aggregate(root, out), "aggregating…")
|
|
22
|
+
typer.echo(f"wrote {path}")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def _aggregate(root: Path, out: Path) -> Path:
|
|
26
|
+
async with await _open_index(root) as index:
|
|
27
|
+
return await AuditAggregator(index).write(out)
|
auditor/cli/apps.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""The root typer ``app`` and the stderr status console — the two things every command module
|
|
2
|
+
shares. Each command module registers its handler on ``app`` (or defines its own sub-app);
|
|
3
|
+
``cli/__init__`` is the composition root that imports every command module and mounts the
|
|
4
|
+
sub-apps. Kept dependency-free so it stays a safe leaf import for the command modules.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(
|
|
11
|
+
no_args_is_help=True, add_completion=False, help="A token-efficient repo auditor."
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Goes to STDERR so it never corrupts the JSON/SARIF stdout that agents parse; rich auto-disables
|
|
16
|
+
# the spinner when stderr isn't a TTY (piped/captured output). The human summary uses its own
|
|
17
|
+
# stdout console in `cli.summary`.
|
|
18
|
+
_status = Console(stderr=True)
|
auditor/cli/config.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""``auditor config show`` — print the resolved configuration."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from pydantic import ValidationError
|
|
7
|
+
|
|
8
|
+
from auditor.cli.helpers import (
|
|
9
|
+
_echo_json,
|
|
10
|
+
_fail,
|
|
11
|
+
_format_config_error,
|
|
12
|
+
_parse_config_json,
|
|
13
|
+
)
|
|
14
|
+
from auditor.cli.options import ConfigJson, RootArg
|
|
15
|
+
from auditor.config import load_config
|
|
16
|
+
from auditor.discovery import find_root
|
|
17
|
+
|
|
18
|
+
config_app = typer.Typer(no_args_is_help=True, help="Inspect resolved configuration.")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@config_app.command("show")
|
|
22
|
+
def config_show(target: RootArg = Path("."), config_json: ConfigJson = None) -> None:
|
|
23
|
+
"""Print the resolved configuration."""
|
|
24
|
+
try:
|
|
25
|
+
settings = load_config(find_root(target), overrides=_parse_config_json(config_json))
|
|
26
|
+
except ValidationError as exc:
|
|
27
|
+
_fail(f"invalid config — {_format_config_error(exc)}")
|
|
28
|
+
_echo_json(settings.model_dump(mode="json"))
|
auditor/cli/crossfile.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""``auditor crossfile`` — recompute cross-file duplicate findings from the index."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from auditor import crossfile as crossfile_pass
|
|
6
|
+
from auditor.cli.apps import app
|
|
7
|
+
from auditor.cli.helpers import _echo_json, _open_index, _run
|
|
8
|
+
from auditor.cli.options import DirTarget
|
|
9
|
+
from auditor.config import load_config
|
|
10
|
+
from auditor.discovery import find_root
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@app.command()
|
|
14
|
+
def crossfile(target: DirTarget = Path(".")) -> None:
|
|
15
|
+
"""Recompute cross-file duplicate findings from the index."""
|
|
16
|
+
root = find_root(target)
|
|
17
|
+
count = _run(_crossfile(root), "cross-file pass…")
|
|
18
|
+
_echo_json({"cross_file_findings": count})
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def _crossfile(root: Path) -> int:
|
|
22
|
+
settings = load_config(root)
|
|
23
|
+
async with await _open_index(root) as index:
|
|
24
|
+
per_file = await crossfile_pass.run(
|
|
25
|
+
index,
|
|
26
|
+
settings_modules=settings.settings_modules,
|
|
27
|
+
settings_cohesion_on=settings.settings_cohesion,
|
|
28
|
+
)
|
|
29
|
+
return sum(len(v) for v in per_file.values())
|
auditor/cli/discover.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""``auditor discover`` — list auditable files with their classified role."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import ValidationError
|
|
6
|
+
|
|
7
|
+
from auditor.cli.apps import app
|
|
8
|
+
from auditor.cli.helpers import (
|
|
9
|
+
_echo_json,
|
|
10
|
+
_fail,
|
|
11
|
+
_format_config_error,
|
|
12
|
+
_parse_config_json,
|
|
13
|
+
_require_exists,
|
|
14
|
+
)
|
|
15
|
+
from auditor.cli.options import ConfigJson, DirTarget
|
|
16
|
+
from auditor.config import load_config
|
|
17
|
+
from auditor.discovery import FileDiscovery, find_root
|
|
18
|
+
from auditor.roles import RoleClassifier
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@app.command()
|
|
22
|
+
def discover(target: DirTarget = Path("."), config_json: ConfigJson = None) -> None:
|
|
23
|
+
"""List auditable files with their classified role."""
|
|
24
|
+
_require_exists(target)
|
|
25
|
+
root = find_root(target)
|
|
26
|
+
try:
|
|
27
|
+
settings = load_config(root, overrides=_parse_config_json(config_json))
|
|
28
|
+
except ValidationError as exc:
|
|
29
|
+
_fail(f"invalid config — {_format_config_error(exc)}")
|
|
30
|
+
classifier = RoleClassifier(settings.role_globs)
|
|
31
|
+
out = []
|
|
32
|
+
discovery = FileDiscovery(
|
|
33
|
+
root,
|
|
34
|
+
exclude_globs=tuple(settings.exclude),
|
|
35
|
+
respect_gitignore=settings.respect_gitignore,
|
|
36
|
+
)
|
|
37
|
+
for path in discovery.files(target):
|
|
38
|
+
rel = str(path.relative_to(root)) if path.is_relative_to(root) else str(path)
|
|
39
|
+
role = classifier.classify(
|
|
40
|
+
rel, path.read_text(encoding="utf-8", errors="replace")
|
|
41
|
+
)
|
|
42
|
+
out.append({"file": rel, "role": role.value})
|
|
43
|
+
_echo_json(out)
|
auditor/cli/helpers.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Shared CLI helpers: clean one-line error exits, the async-run spinner bridge, JSON echo,
|
|
2
|
+
format validation, report emission, and index-path resolution. Command modules import what
|
|
3
|
+
they need; anything used by a single command lives in that command's module instead.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import difflib
|
|
8
|
+
import json
|
|
9
|
+
from collections.abc import Coroutine, Iterable
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, NoReturn, TypeVar
|
|
12
|
+
|
|
13
|
+
import typer
|
|
14
|
+
from pydantic import ValidationError
|
|
15
|
+
|
|
16
|
+
from auditor.cli.apps import _status
|
|
17
|
+
from auditor.index import IndexStore
|
|
18
|
+
from auditor.paths import index_db_path, repo_key
|
|
19
|
+
from auditor.registry import REGISTRY
|
|
20
|
+
|
|
21
|
+
_T = TypeVar("_T")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _echo_json(payload: object) -> None:
|
|
25
|
+
typer.echo(json.dumps(payload, indent=2))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _fail(message: str) -> NoReturn:
|
|
29
|
+
"""Emit a clean one-line error to stderr and exit non-zero (no traceback)."""
|
|
30
|
+
_status.print(f"[red]error:[/red] {message}")
|
|
31
|
+
raise typer.Exit(1)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _suggest(value: str, candidates: Iterable[str]) -> str:
|
|
35
|
+
"""`" Did you mean 'X'?"` when a candidate closely matches ``value``, else ``""`` — for
|
|
36
|
+
friendlier 'unknown rule/category/…' errors."""
|
|
37
|
+
match = difflib.get_close_matches(value, list(candidates), n=1, cutoff=0.6)
|
|
38
|
+
return f" Did you mean '{match[0]}'?" if match else ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_config_json(raw: str | None) -> dict | None:
|
|
42
|
+
"""Parse a ``--config-json`` blob to a dict, or exit cleanly on bad JSON / non-object."""
|
|
43
|
+
if raw is None:
|
|
44
|
+
return None
|
|
45
|
+
try:
|
|
46
|
+
value = json.loads(raw)
|
|
47
|
+
except json.JSONDecodeError as exc:
|
|
48
|
+
_fail(f"invalid --config-json: {exc}")
|
|
49
|
+
if not isinstance(value, dict):
|
|
50
|
+
_fail("--config-json must be a JSON object")
|
|
51
|
+
return value
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _format_config_error(exc: ValidationError) -> str:
|
|
55
|
+
"""First validation error as ``'<dotted loc>: <msg>'`` for a clean one-line failure."""
|
|
56
|
+
err = exc.errors()[0]
|
|
57
|
+
loc = ".".join(str(p) for p in err["loc"])
|
|
58
|
+
return f"{loc}: {err['msg']}" if loc else err["msg"]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _require_exists(path: Path) -> None:
|
|
62
|
+
if not path.exists():
|
|
63
|
+
_fail(f"no such file or directory: {path}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _require_file(path: Path) -> None:
|
|
67
|
+
if not path.is_file():
|
|
68
|
+
_fail(f"no such file: {path}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _check_format(fmt: str) -> str:
|
|
72
|
+
if REGISTRY.reporter(fmt) is None:
|
|
73
|
+
_fail(f"unknown format {fmt!r}; choose from {sorted(REGISTRY.formats())}")
|
|
74
|
+
return fmt
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _run(
|
|
78
|
+
coro: Coroutine[Any, Any, _T], message: str = "auditing…", *, spinner: bool = True
|
|
79
|
+
) -> _T:
|
|
80
|
+
"""Run an async core call. Shows a stderr spinner unless ``spinner`` is off (e.g. when
|
|
81
|
+
``-v`` logging is driving the progress output instead)."""
|
|
82
|
+
if not spinner:
|
|
83
|
+
return asyncio.run(coro)
|
|
84
|
+
with _status.status(message, spinner="dots"):
|
|
85
|
+
return asyncio.run(coro)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _open_index(root: Path) -> Coroutine[Any, Any, IndexStore]:
|
|
89
|
+
"""Connect to the shared global index, scoped to ``root``'s partition. Returns the
|
|
90
|
+
awaitable from ``IndexStore.connect`` (use as ``async with await _open_index(root)``)."""
|
|
91
|
+
return IndexStore.connect(index_db_path(), repo_key(root))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _open_shared_index() -> Coroutine[Any, Any, IndexStore]:
|
|
95
|
+
"""Connect to the shared global index for cross-repo operations (listing/forgetting repos),
|
|
96
|
+
not bound to any one repo's partition."""
|
|
97
|
+
return IndexStore.connect(index_db_path())
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _emit(rendered: str, output: Path | None) -> None:
|
|
101
|
+
"""Write a rendered report to ``output`` (with a stderr note) or echo it to stdout."""
|
|
102
|
+
if output is None:
|
|
103
|
+
typer.echo(rendered)
|
|
104
|
+
return
|
|
105
|
+
output.write_text(rendered, encoding="utf-8")
|
|
106
|
+
typer.echo(f"wrote {output}", err=True)
|