secscan-mcp 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- secscan_mcp/__init__.py +3 -0
- secscan_mcp/engines/.gitkeep +0 -0
- secscan_mcp/engines/__init__.py +41 -0
- secscan_mcp/engines/_util.py +39 -0
- secscan_mcp/engines/bandit.py +81 -0
- secscan_mcp/engines/base.py +21 -0
- secscan_mcp/engines/checkov.py +74 -0
- secscan_mcp/engines/custom.py +90 -0
- secscan_mcp/engines/git_history.py +174 -0
- secscan_mcp/engines/gitleaks.py +108 -0
- secscan_mcp/engines/osv.py +77 -0
- secscan_mcp/engines/semgrep.py +95 -0
- secscan_mcp/normalize.py +219 -0
- secscan_mcp/paths.py +25 -0
- secscan_mcp/py.typed +0 -0
- secscan_mcp/rules/.gitkeep +0 -0
- secscan_mcp/rules/__init__.py +1 -0
- secscan_mcp/rules/custom_secrets.yaml +19 -0
- secscan_mcp/rules/loader.py +27 -0
- secscan_mcp/runner.py +189 -0
- secscan_mcp/server.py +205 -0
- secscan_mcp-0.1.2.dist-info/METADATA +161 -0
- secscan_mcp-0.1.2.dist-info/RECORD +26 -0
- secscan_mcp-0.1.2.dist-info/WHEEL +4 -0
- secscan_mcp-0.1.2.dist-info/entry_points.txt +2 -0
- secscan_mcp-0.1.2.dist-info/licenses/LICENSE +21 -0
secscan_mcp/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Scanner engine registry."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from secscan_mcp.engines.bandit import BanditEngine
|
|
6
|
+
from secscan_mcp.engines.base import Engine
|
|
7
|
+
from secscan_mcp.engines.checkov import CheckovEngine
|
|
8
|
+
from secscan_mcp.engines.custom import CustomSecretsEngine
|
|
9
|
+
from secscan_mcp.engines.git_history import GitHistoryEngine
|
|
10
|
+
from secscan_mcp.engines.gitleaks import GitleaksEngine
|
|
11
|
+
from secscan_mcp.engines.osv import OsvEngine
|
|
12
|
+
from secscan_mcp.engines.semgrep import SemgrepEngine
|
|
13
|
+
from secscan_mcp.normalize import Category
|
|
14
|
+
|
|
15
|
+
SECRET_ENGINES: list[Engine] = [
|
|
16
|
+
CustomSecretsEngine(),
|
|
17
|
+
GitHistoryEngine(),
|
|
18
|
+
GitleaksEngine(),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
SAST_ENGINES: list[Engine] = [
|
|
22
|
+
SemgrepEngine(),
|
|
23
|
+
BanditEngine(),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
DEPENDENCY_ENGINES: list[Engine] = [
|
|
27
|
+
OsvEngine(),
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
IAC_ENGINES: list[Engine] = [
|
|
31
|
+
CheckovEngine(),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
ALL_ENGINES: list[Engine] = SECRET_ENGINES + SAST_ENGINES + DEPENDENCY_ENGINES + IAC_ENGINES
|
|
35
|
+
|
|
36
|
+
ENGINES_BY_CATEGORY: dict[Category, list[Engine]] = {
|
|
37
|
+
Category.SECRET: SECRET_ENGINES,
|
|
38
|
+
Category.SAST: SAST_ENGINES,
|
|
39
|
+
Category.DEPENDENCY: DEPENDENCY_ENGINES,
|
|
40
|
+
Category.IAC: IAC_ENGINES,
|
|
41
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Shared helpers for engine adapters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def command_exists(name: str) -> bool:
|
|
16
|
+
return shutil.which(name) is not None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def run_command(
|
|
20
|
+
cmd: list[str],
|
|
21
|
+
*,
|
|
22
|
+
cwd: Path,
|
|
23
|
+
timeout: int,
|
|
24
|
+
) -> subprocess.CompletedProcess[str]:
|
|
25
|
+
return subprocess.run(
|
|
26
|
+
cmd,
|
|
27
|
+
cwd=cwd,
|
|
28
|
+
capture_output=True,
|
|
29
|
+
text=True,
|
|
30
|
+
timeout=timeout,
|
|
31
|
+
check=False,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def parse_json(stdout: str) -> Any:
|
|
36
|
+
text = stdout.strip()
|
|
37
|
+
if not text:
|
|
38
|
+
return None
|
|
39
|
+
return json.loads(text)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Bandit Python SAST adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from secscan_mcp.engines._util import command_exists, parse_json, run_command
|
|
9
|
+
from secscan_mcp.normalize import Category, Finding, Severity, make_finding_id, map_severity
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BanditEngine:
|
|
13
|
+
name = "bandit"
|
|
14
|
+
category = Category.SAST
|
|
15
|
+
|
|
16
|
+
def is_installed(self) -> bool:
|
|
17
|
+
return command_exists("bandit")
|
|
18
|
+
|
|
19
|
+
def run(self, root: Path, *, timeout: int) -> list[Finding]:
|
|
20
|
+
cmd = ["bandit", "-r", str(root), "-f", "json", "-q"]
|
|
21
|
+
proc = run_command(cmd, cwd=root, timeout=timeout)
|
|
22
|
+
if proc.returncode not in (0, 1):
|
|
23
|
+
msg = proc.stderr.strip() or f"bandit exited {proc.returncode}"
|
|
24
|
+
raise RuntimeError(msg)
|
|
25
|
+
return _parse_bandit(proc.stdout, root)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _parse_bandit(stdout: str, root: Path) -> list[Finding]:
|
|
29
|
+
data: Any = parse_json(stdout) if stdout.strip() else {}
|
|
30
|
+
if not isinstance(data, dict):
|
|
31
|
+
return []
|
|
32
|
+
results = data.get("results", [])
|
|
33
|
+
findings: list[Finding] = []
|
|
34
|
+
for item in results:
|
|
35
|
+
if not isinstance(item, dict):
|
|
36
|
+
continue
|
|
37
|
+
rule_id = str(item.get("test_id") or item.get("test_name") or "bandit")
|
|
38
|
+
rel = _relative(str(item.get("filename") or ""), root)
|
|
39
|
+
line = int(item.get("line_number") or 0)
|
|
40
|
+
severity = map_severity(str(item.get("issue_severity")), default=Severity.MEDIUM)
|
|
41
|
+
findings.append(
|
|
42
|
+
Finding(
|
|
43
|
+
id=make_finding_id(
|
|
44
|
+
engine="bandit",
|
|
45
|
+
rule_id=rule_id,
|
|
46
|
+
file=rel,
|
|
47
|
+
line=line,
|
|
48
|
+
category=Category.SAST,
|
|
49
|
+
),
|
|
50
|
+
category=Category.SAST,
|
|
51
|
+
severity=severity,
|
|
52
|
+
title=str(item.get("issue_text") or rule_id),
|
|
53
|
+
file=rel,
|
|
54
|
+
line=line,
|
|
55
|
+
rule_id=rule_id,
|
|
56
|
+
engine="bandit",
|
|
57
|
+
code_snippet=str(item.get("code") or ""),
|
|
58
|
+
cwe=_bandit_cwe(item),
|
|
59
|
+
remediation="See Bandit documentation for this test.",
|
|
60
|
+
confidence=str(item.get("issue_confidence") or "medium").lower(),
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
return findings
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _bandit_cwe(item: dict[str, Any]) -> str | None:
|
|
67
|
+
issue_cwe = item.get("issue_cwe")
|
|
68
|
+
if isinstance(issue_cwe, dict):
|
|
69
|
+
cid = issue_cwe.get("id")
|
|
70
|
+
return str(cid) if cid is not None else None
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _relative(file_path: str, root: Path) -> str:
|
|
75
|
+
if not file_path:
|
|
76
|
+
return ""
|
|
77
|
+
path = Path(file_path)
|
|
78
|
+
try:
|
|
79
|
+
return str(path.resolve().relative_to(root.resolve()))
|
|
80
|
+
except ValueError:
|
|
81
|
+
return file_path
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Engine adapter protocol."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Protocol
|
|
7
|
+
|
|
8
|
+
from secscan_mcp.normalize import Category, Finding
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Engine(Protocol):
|
|
12
|
+
name: str
|
|
13
|
+
category: Category
|
|
14
|
+
|
|
15
|
+
def is_installed(self) -> bool:
|
|
16
|
+
"""Return True if the scanner CLI is available."""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def run(self, root: Path, *, timeout: int) -> list[Finding]:
|
|
20
|
+
"""Run the scanner against root and return normalized findings."""
|
|
21
|
+
...
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Checkov IaC scanner adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from secscan_mcp.engines._util import command_exists, parse_json, run_command
|
|
9
|
+
from secscan_mcp.normalize import Category, Finding, Severity, make_finding_id, map_severity
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CheckovEngine:
|
|
13
|
+
name = "checkov"
|
|
14
|
+
category = Category.IAC
|
|
15
|
+
|
|
16
|
+
def is_installed(self) -> bool:
|
|
17
|
+
return command_exists("checkov")
|
|
18
|
+
|
|
19
|
+
def run(self, root: Path, *, timeout: int) -> list[Finding]:
|
|
20
|
+
cmd = ["checkov", "-d", str(root), "--output", "json", "--quiet"]
|
|
21
|
+
proc = run_command(cmd, cwd=root, timeout=timeout)
|
|
22
|
+
if proc.returncode not in (0, 1):
|
|
23
|
+
msg = proc.stderr.strip() or f"checkov exited {proc.returncode}"
|
|
24
|
+
raise RuntimeError(msg)
|
|
25
|
+
return _parse_checkov(proc.stdout, root)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _parse_checkov(stdout: str, root: Path) -> list[Finding]:
|
|
29
|
+
data: Any = parse_json(stdout) if stdout.strip() else []
|
|
30
|
+
if isinstance(data, list) and data and isinstance(data[0], dict):
|
|
31
|
+
payload = data[0]
|
|
32
|
+
elif isinstance(data, dict):
|
|
33
|
+
payload = data
|
|
34
|
+
else:
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
findings: list[Finding] = []
|
|
38
|
+
for failed in payload.get("results", {}).get("failed_checks", []) or []:
|
|
39
|
+
if not isinstance(failed, dict):
|
|
40
|
+
continue
|
|
41
|
+
rule_id = str(failed.get("check_id") or "checkov")
|
|
42
|
+
rel = _relative(str(failed.get("file_path") or ""), root)
|
|
43
|
+
line = int(failed.get("file_line_range", [0])[0]) if failed.get("file_line_range") else 0
|
|
44
|
+
findings.append(
|
|
45
|
+
Finding(
|
|
46
|
+
id=make_finding_id(
|
|
47
|
+
engine="checkov",
|
|
48
|
+
rule_id=rule_id,
|
|
49
|
+
file=rel,
|
|
50
|
+
line=line,
|
|
51
|
+
category=Category.IAC,
|
|
52
|
+
),
|
|
53
|
+
category=Category.IAC,
|
|
54
|
+
severity=map_severity(str(failed.get("severity")), default=Severity.MEDIUM),
|
|
55
|
+
title=str(failed.get("check_name") or rule_id),
|
|
56
|
+
file=rel,
|
|
57
|
+
line=line,
|
|
58
|
+
rule_id=rule_id,
|
|
59
|
+
engine="checkov",
|
|
60
|
+
remediation=str(failed.get("guideline") or ""),
|
|
61
|
+
confidence="medium",
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
return findings
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _relative(file_path: str, root: Path) -> str:
|
|
68
|
+
if not file_path:
|
|
69
|
+
return ""
|
|
70
|
+
path = Path(file_path)
|
|
71
|
+
try:
|
|
72
|
+
return str(path.resolve().relative_to(root.resolve()))
|
|
73
|
+
except ValueError:
|
|
74
|
+
return file_path
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Custom regex-based secret scanner (no external CLI)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from secscan_mcp.normalize import (
|
|
8
|
+
Category,
|
|
9
|
+
Finding,
|
|
10
|
+
Severity,
|
|
11
|
+
make_finding_id,
|
|
12
|
+
map_severity,
|
|
13
|
+
redact_snippet,
|
|
14
|
+
)
|
|
15
|
+
from secscan_mcp.rules.loader import load_secret_rules
|
|
16
|
+
|
|
17
|
+
_SKIP_DIRS = {
|
|
18
|
+
".git",
|
|
19
|
+
".venv",
|
|
20
|
+
"venv",
|
|
21
|
+
"node_modules",
|
|
22
|
+
"__pycache__",
|
|
23
|
+
".ruff_cache",
|
|
24
|
+
".mypy_cache",
|
|
25
|
+
"dist",
|
|
26
|
+
"build",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class CustomSecretsEngine:
|
|
31
|
+
name = "custom"
|
|
32
|
+
category = Category.SECRET
|
|
33
|
+
|
|
34
|
+
def is_installed(self) -> bool:
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
def run(self, root: Path, *, timeout: int) -> list[Finding]:
|
|
38
|
+
del timeout # unused; scan is in-process
|
|
39
|
+
rules = load_secret_rules()
|
|
40
|
+
findings: list[Finding] = []
|
|
41
|
+
for file_path in _iter_text_files(root):
|
|
42
|
+
try:
|
|
43
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
44
|
+
except OSError:
|
|
45
|
+
continue
|
|
46
|
+
rel = str(file_path.relative_to(root))
|
|
47
|
+
for rule in rules:
|
|
48
|
+
pattern = rule["pattern"]
|
|
49
|
+
for match in pattern.finditer(content):
|
|
50
|
+
line = content[: match.start()].count("\n") + 1
|
|
51
|
+
snippet = redact_snippet(match.group(0))
|
|
52
|
+
rule_id = str(rule["id"])
|
|
53
|
+
findings.append(
|
|
54
|
+
Finding(
|
|
55
|
+
id=make_finding_id(
|
|
56
|
+
engine="custom",
|
|
57
|
+
rule_id=rule_id,
|
|
58
|
+
file=rel,
|
|
59
|
+
line=line,
|
|
60
|
+
category=Category.SECRET,
|
|
61
|
+
),
|
|
62
|
+
category=Category.SECRET,
|
|
63
|
+
severity=map_severity(
|
|
64
|
+
str(rule.get("severity", "high")),
|
|
65
|
+
default=Severity.HIGH,
|
|
66
|
+
),
|
|
67
|
+
title=str(rule.get("title", rule_id)),
|
|
68
|
+
file=rel,
|
|
69
|
+
line=line,
|
|
70
|
+
rule_id=rule_id,
|
|
71
|
+
engine="custom",
|
|
72
|
+
code_snippet=snippet,
|
|
73
|
+
remediation=str(rule.get("remediation", "")),
|
|
74
|
+
confidence="medium",
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
return findings
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _iter_text_files(root: Path) -> list[Path]:
|
|
81
|
+
files: list[Path] = []
|
|
82
|
+
for path in root.rglob("*"):
|
|
83
|
+
if not path.is_file():
|
|
84
|
+
continue
|
|
85
|
+
if any(part in _SKIP_DIRS for part in path.parts):
|
|
86
|
+
continue
|
|
87
|
+
if path.suffix.lower() in {".png", ".jpg", ".jpeg", ".gif", ".woff", ".woff2", ".ico"}:
|
|
88
|
+
continue
|
|
89
|
+
files.append(path)
|
|
90
|
+
return files
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Git commit history secret scanner (requires git, no external CLI)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from secscan_mcp.engines._util import command_exists, run_command
|
|
11
|
+
from secscan_mcp.normalize import (
|
|
12
|
+
Category,
|
|
13
|
+
Finding,
|
|
14
|
+
Severity,
|
|
15
|
+
make_finding_id,
|
|
16
|
+
map_severity,
|
|
17
|
+
redact_snippet,
|
|
18
|
+
)
|
|
19
|
+
from secscan_mcp.rules.loader import load_secret_rules
|
|
20
|
+
|
|
21
|
+
_DEFAULT_MAX_COMMITS = int(os.environ.get("SECSCAN_GIT_MAX_COMMITS", "500"))
|
|
22
|
+
_COMMIT_PREFIX = "COMMIT:"
|
|
23
|
+
_HUNK_RE = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class _CommitContext:
|
|
28
|
+
sha: str
|
|
29
|
+
date: str
|
|
30
|
+
subject: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class GitHistoryEngine:
|
|
34
|
+
name = "git_history"
|
|
35
|
+
category = Category.SECRET
|
|
36
|
+
|
|
37
|
+
def is_installed(self) -> bool:
|
|
38
|
+
return command_exists("git")
|
|
39
|
+
|
|
40
|
+
def run(self, root: Path, *, timeout: int, include_git_history: bool = False) -> list[Finding]:
|
|
41
|
+
if not include_git_history:
|
|
42
|
+
return []
|
|
43
|
+
if not _is_git_repo(root):
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
rules = load_secret_rules()
|
|
47
|
+
max_commits = _DEFAULT_MAX_COMMITS
|
|
48
|
+
proc = run_command(
|
|
49
|
+
[
|
|
50
|
+
"git",
|
|
51
|
+
"log",
|
|
52
|
+
"-p",
|
|
53
|
+
"--all",
|
|
54
|
+
f"-n{max_commits}",
|
|
55
|
+
"--no-color",
|
|
56
|
+
"--no-ext-diff",
|
|
57
|
+
f"--format={_COMMIT_PREFIX}%H|%aI|%s",
|
|
58
|
+
],
|
|
59
|
+
cwd=root,
|
|
60
|
+
timeout=timeout,
|
|
61
|
+
)
|
|
62
|
+
if proc.returncode != 0:
|
|
63
|
+
msg = proc.stderr.strip() or proc.stdout.strip() or f"git log exited {proc.returncode}"
|
|
64
|
+
raise RuntimeError(msg)
|
|
65
|
+
|
|
66
|
+
return _parse_git_log(proc.stdout, rules)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _is_git_repo(root: Path) -> bool:
|
|
70
|
+
proc = run_command(["git", "rev-parse", "--git-dir"], cwd=root, timeout=30)
|
|
71
|
+
return proc.returncode == 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _parse_git_log(raw: str, rules: list[dict[str, object]]) -> list[Finding]:
|
|
75
|
+
findings: list[Finding] = []
|
|
76
|
+
commit = _CommitContext(sha="", date="", subject="")
|
|
77
|
+
current_file = ""
|
|
78
|
+
new_line = 0
|
|
79
|
+
|
|
80
|
+
for line in raw.splitlines():
|
|
81
|
+
if line.startswith(_COMMIT_PREFIX):
|
|
82
|
+
commit = _parse_commit_line(line)
|
|
83
|
+
current_file = ""
|
|
84
|
+
new_line = 0
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
if line.startswith("diff --git "):
|
|
88
|
+
current_file = _parse_diff_path(line)
|
|
89
|
+
new_line = 0
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
hunk_match = _HUNK_RE.match(line)
|
|
93
|
+
if hunk_match:
|
|
94
|
+
new_line = int(hunk_match.group(1))
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
if not line.startswith("+") or line.startswith("+++"):
|
|
98
|
+
if line.startswith("-") and not line.startswith("---"):
|
|
99
|
+
continue
|
|
100
|
+
if line.startswith(" ") and current_file:
|
|
101
|
+
new_line += 1
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
added = line[1:]
|
|
105
|
+
if not current_file or not commit.sha:
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
for rule in rules:
|
|
109
|
+
pattern = rule["pattern"]
|
|
110
|
+
if not isinstance(pattern, re.Pattern):
|
|
111
|
+
continue
|
|
112
|
+
match = pattern.search(added)
|
|
113
|
+
if match is None:
|
|
114
|
+
continue
|
|
115
|
+
rule_id = str(rule["id"])
|
|
116
|
+
short_sha = commit.sha[:7]
|
|
117
|
+
findings.append(
|
|
118
|
+
Finding(
|
|
119
|
+
id=make_finding_id(
|
|
120
|
+
engine="git_history",
|
|
121
|
+
rule_id=rule_id,
|
|
122
|
+
file=current_file,
|
|
123
|
+
line=new_line,
|
|
124
|
+
category=Category.SECRET,
|
|
125
|
+
),
|
|
126
|
+
category=Category.SECRET,
|
|
127
|
+
severity=map_severity(
|
|
128
|
+
str(rule.get("severity", "high")),
|
|
129
|
+
default=Severity.HIGH,
|
|
130
|
+
),
|
|
131
|
+
title=f"{rule.get('title', rule_id)} in git history ({short_sha})",
|
|
132
|
+
file=current_file,
|
|
133
|
+
line=new_line,
|
|
134
|
+
rule_id=rule_id,
|
|
135
|
+
engine="git_history",
|
|
136
|
+
code_snippet=redact_snippet(match.group(0)),
|
|
137
|
+
remediation=_history_remediation(rule, commit),
|
|
138
|
+
confidence="medium",
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
new_line += 1
|
|
142
|
+
|
|
143
|
+
return findings
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _parse_commit_line(line: str) -> _CommitContext:
|
|
147
|
+
payload = line[len(_COMMIT_PREFIX) :]
|
|
148
|
+
parts = payload.split("|", 2)
|
|
149
|
+
if len(parts) < 3:
|
|
150
|
+
return _CommitContext(sha=payload, date="", subject="")
|
|
151
|
+
return _CommitContext(sha=parts[0], date=parts[1], subject=parts[2])
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _parse_diff_path(line: str) -> str:
|
|
155
|
+
# diff --git a/path b/path
|
|
156
|
+
parts = line.split()
|
|
157
|
+
if len(parts) >= 4 and parts[2].startswith("a/"):
|
|
158
|
+
return parts[2][2:]
|
|
159
|
+
if len(parts) >= 3 and parts[-1].startswith("b/"):
|
|
160
|
+
return parts[-1][2:]
|
|
161
|
+
return ""
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _history_remediation(rule: dict[str, object], commit: _CommitContext) -> str:
|
|
165
|
+
base = str(rule.get("remediation", "")).strip()
|
|
166
|
+
short_sha = commit.sha[:7] if commit.sha else "unknown"
|
|
167
|
+
history = (
|
|
168
|
+
f"Found in commit {short_sha}"
|
|
169
|
+
+ (f" ({commit.date})" if commit.date else "")
|
|
170
|
+
+ (f': "{commit.subject}"' if commit.subject else "")
|
|
171
|
+
+ ". Secrets in git history remain exposed even after deletion from the working tree. "
|
|
172
|
+
"Rotate the credential and rewrite history (e.g. git filter-repo) or treat the secret as compromised."
|
|
173
|
+
)
|
|
174
|
+
return f"{base} {history}".strip() if base else history
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Gitleaks secret scanner adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import tempfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from secscan_mcp.engines._util import command_exists, parse_json, run_command
|
|
10
|
+
from secscan_mcp.normalize import (
|
|
11
|
+
Category,
|
|
12
|
+
Finding,
|
|
13
|
+
Severity,
|
|
14
|
+
make_finding_id,
|
|
15
|
+
map_severity,
|
|
16
|
+
redact_secret,
|
|
17
|
+
redact_snippet,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GitleaksEngine:
|
|
22
|
+
name = "gitleaks"
|
|
23
|
+
category = Category.SECRET
|
|
24
|
+
|
|
25
|
+
def is_installed(self) -> bool:
|
|
26
|
+
return command_exists("gitleaks")
|
|
27
|
+
|
|
28
|
+
def run(self, root: Path, *, timeout: int, include_git_history: bool = False) -> list[Finding]:
|
|
29
|
+
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp:
|
|
30
|
+
report_path = Path(tmp.name)
|
|
31
|
+
|
|
32
|
+
cmd = [
|
|
33
|
+
"gitleaks",
|
|
34
|
+
"detect",
|
|
35
|
+
"--source",
|
|
36
|
+
str(root),
|
|
37
|
+
"--report-format",
|
|
38
|
+
"json",
|
|
39
|
+
"--report-path",
|
|
40
|
+
str(report_path),
|
|
41
|
+
]
|
|
42
|
+
if not include_git_history:
|
|
43
|
+
cmd.append("--no-git")
|
|
44
|
+
|
|
45
|
+
proc = run_command(cmd, cwd=root, timeout=timeout)
|
|
46
|
+
if proc.returncode not in (0, 1): # 1 = leaks found
|
|
47
|
+
msg = proc.stderr.strip() or proc.stdout.strip() or f"gitleaks exited {proc.returncode}"
|
|
48
|
+
raise RuntimeError(msg)
|
|
49
|
+
|
|
50
|
+
raw = report_path.read_text(encoding="utf-8") if report_path.exists() else "[]"
|
|
51
|
+
report_path.unlink(missing_ok=True)
|
|
52
|
+
return _parse_gitleaks(raw, root)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _parse_gitleaks(raw: str, root: Path) -> list[Finding]:
|
|
56
|
+
data: Any = parse_json(raw) if raw.strip() else []
|
|
57
|
+
if isinstance(data, dict):
|
|
58
|
+
items = data.get("findings") or data.get("results") or []
|
|
59
|
+
elif isinstance(data, list):
|
|
60
|
+
items = data
|
|
61
|
+
else:
|
|
62
|
+
items = []
|
|
63
|
+
|
|
64
|
+
findings: list[Finding] = []
|
|
65
|
+
for item in items:
|
|
66
|
+
if not isinstance(item, dict):
|
|
67
|
+
continue
|
|
68
|
+
file_path = _relative_path(item.get("File") or item.get("file") or "", root)
|
|
69
|
+
line = int(item.get("StartLine") or item.get("startLine") or item.get("line") or 0)
|
|
70
|
+
rule_id = str(item.get("RuleID") or item.get("ruleID") or item.get("rule") or "gitleaks")
|
|
71
|
+
secret = str(item.get("Secret") or item.get("secret") or "")
|
|
72
|
+
snippet = redact_snippet(str(item.get("Match") or item.get("match") or secret))
|
|
73
|
+
title = str(item.get("Description") or item.get("description") or rule_id)
|
|
74
|
+
severity = map_severity(
|
|
75
|
+
str(item.get("Severity") or item.get("severity")), default=Severity.HIGH
|
|
76
|
+
)
|
|
77
|
+
findings.append(
|
|
78
|
+
Finding(
|
|
79
|
+
id=make_finding_id(
|
|
80
|
+
engine="gitleaks",
|
|
81
|
+
rule_id=rule_id,
|
|
82
|
+
file=file_path,
|
|
83
|
+
line=line,
|
|
84
|
+
category=Category.SECRET,
|
|
85
|
+
),
|
|
86
|
+
category=Category.SECRET,
|
|
87
|
+
severity=severity,
|
|
88
|
+
title=title,
|
|
89
|
+
file=file_path,
|
|
90
|
+
line=line,
|
|
91
|
+
rule_id=rule_id,
|
|
92
|
+
engine="gitleaks",
|
|
93
|
+
code_snippet=snippet or redact_secret(secret),
|
|
94
|
+
remediation="Remove the secret and rotate credentials; use environment variables or a secret manager.",
|
|
95
|
+
confidence="high",
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
return findings
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _relative_path(file_path: str, root: Path) -> str:
|
|
102
|
+
if not file_path:
|
|
103
|
+
return ""
|
|
104
|
+
path = Path(file_path)
|
|
105
|
+
try:
|
|
106
|
+
return str(path.resolve().relative_to(root.resolve()))
|
|
107
|
+
except ValueError:
|
|
108
|
+
return file_path
|