devguard 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devguard/INTEGRATION_SUMMARY.md +121 -0
- devguard/__init__.py +3 -0
- devguard/__main__.py +6 -0
- devguard/checkers/__init__.py +41 -0
- devguard/checkers/api_usage.py +523 -0
- devguard/checkers/aws_cost.py +331 -0
- devguard/checkers/aws_iam.py +284 -0
- devguard/checkers/base.py +25 -0
- devguard/checkers/container.py +137 -0
- devguard/checkers/domain.py +189 -0
- devguard/checkers/firecrawl.py +117 -0
- devguard/checkers/fly.py +225 -0
- devguard/checkers/github.py +210 -0
- devguard/checkers/npm.py +327 -0
- devguard/checkers/npm_security.py +244 -0
- devguard/checkers/redteam.py +290 -0
- devguard/checkers/secret.py +279 -0
- devguard/checkers/swarm.py +376 -0
- devguard/checkers/tailscale.py +143 -0
- devguard/checkers/tailsnitch.py +303 -0
- devguard/checkers/tavily.py +179 -0
- devguard/checkers/vercel.py +192 -0
- devguard/cli.py +1510 -0
- devguard/cli_helpers.py +189 -0
- devguard/config.py +249 -0
- devguard/core.py +293 -0
- devguard/dashboard.py +715 -0
- devguard/discovery.py +363 -0
- devguard/http_client.py +142 -0
- devguard/llm_service.py +481 -0
- devguard/mcp_server.py +259 -0
- devguard/metrics.py +144 -0
- devguard/models.py +208 -0
- devguard/reporting.py +1571 -0
- devguard/sarif.py +295 -0
- devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
- devguard/scripts/README.md +221 -0
- devguard/scripts/auto_fix_recommendations.py +145 -0
- devguard/scripts/generate_npmignore.py +175 -0
- devguard/scripts/generate_security_report.py +324 -0
- devguard/scripts/prepublish_check.sh +29 -0
- devguard/scripts/redteam_npm_packages.py +1262 -0
- devguard/scripts/review_all_repos.py +300 -0
- devguard/spec.py +617 -0
- devguard/sweeps/__init__.py +23 -0
- devguard/sweeps/ai_editor_config_audit.py +697 -0
- devguard/sweeps/cargo_publish_audit.py +655 -0
- devguard/sweeps/dependency_audit.py +419 -0
- devguard/sweeps/gitignore_audit.py +336 -0
- devguard/sweeps/local_dev.py +260 -0
- devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
- devguard/sweeps/project_flaudit.py +636 -0
- devguard/sweeps/public_github_secrets.py +680 -0
- devguard/sweeps/publish_audit.py +478 -0
- devguard/sweeps/ssh_key_audit.py +327 -0
- devguard/utils.py +174 -0
- devguard-0.2.0.dist-info/METADATA +225 -0
- devguard-0.2.0.dist-info/RECORD +60 -0
- devguard-0.2.0.dist-info/WHEEL +4 -0
- devguard-0.2.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Secret scanning checker using trufflehog with fallback."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from devguard.checkers.base import BaseChecker
|
|
11
|
+
from devguard.config import Settings
|
|
12
|
+
from devguard.models import CheckResult, Severity, Vulnerability
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SecretChecker(BaseChecker):
|
|
18
|
+
"""Check git repositories for leaked secrets using trufflehog or fallback regex.
|
|
19
|
+
|
|
20
|
+
Scans:
|
|
21
|
+
1. Configured git repos for secrets in history
|
|
22
|
+
2. Filesystem for current secrets (excluding .env files)
|
|
23
|
+
|
|
24
|
+
Uses trufflehog (Go-based, fast) if available.
|
|
25
|
+
Falls back to regex scanning if trufflehog is not installed.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
check_type = "secret"
|
|
29
|
+
|
|
30
|
+
# Fallback patterns if trufflehog is missing
|
|
31
|
+
FALLBACK_PATTERNS = [
|
|
32
|
+
(r"AWS_ACCESS_KEY_ID\s*=\s*['\"]?(AKIA[0-9A-Z]{16})['\"]?", "AWS Access Key"),
|
|
33
|
+
(r"AWS_SECRET_ACCESS_KEY\s*=\s*['\"]?([0-9a-zA-Z/+]{40})['\"]?", "AWS Secret Key"),
|
|
34
|
+
(r"PRIVATE_KEY\s*=\s*['\"]?(-+BEGIN PRIVATE KEY-+)['\"]?", "Private Key"),
|
|
35
|
+
(r"ghp_[a-zA-Z0-9]{36}", "GitHub Personal Access Token"),
|
|
36
|
+
(r"xox[baprs]-([0-9a-zA-Z]{10,48})", "Slack Token"),
|
|
37
|
+
(r"sk_live_[0-9a-zA-Z]{24}", "Stripe Secret Key"),
|
|
38
|
+
(r"api_key\s*=\s*['\"]?([a-zA-Z0-9]{32,})['\"]?", "Generic API Key"),
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
def __init__(self, settings: Settings):
|
|
42
|
+
"""Initialize secret checker."""
|
|
43
|
+
super().__init__(settings)
|
|
44
|
+
self.trufflehog_path = shutil.which("trufflehog")
|
|
45
|
+
|
|
46
|
+
async def check(self) -> CheckResult:
|
|
47
|
+
"""Check for leaked secrets."""
|
|
48
|
+
vulnerabilities: list[Vulnerability] = []
|
|
49
|
+
errors: list[str] = []
|
|
50
|
+
metadata: dict = {
|
|
51
|
+
"repos_scanned": [],
|
|
52
|
+
"total_findings": 0,
|
|
53
|
+
"engine": "trufflehog" if self.trufflehog_path else "regex-fallback",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Scan configured repos.
|
|
57
|
+
#
|
|
58
|
+
# Important: when TruffleHog is missing, the regex fallback is both limited
|
|
59
|
+
# in quality and potentially very slow on large repos. In that mode we only
|
|
60
|
+
# scan explicitly configured paths to avoid accidental multi-repo sweeps.
|
|
61
|
+
repos_to_scan = self._get_repos_to_scan()
|
|
62
|
+
if not self.trufflehog_path and not self.settings.secret_scan_paths:
|
|
63
|
+
repos_to_scan = []
|
|
64
|
+
|
|
65
|
+
for repo_path in repos_to_scan:
|
|
66
|
+
try:
|
|
67
|
+
if self.trufflehog_path:
|
|
68
|
+
findings = await self._scan_git_history(repo_path)
|
|
69
|
+
else:
|
|
70
|
+
findings = await self._scan_with_regex(repo_path)
|
|
71
|
+
if not findings and not errors: # Only warn once or if we find nothing
|
|
72
|
+
pass # Silence is golden, but we should note it
|
|
73
|
+
|
|
74
|
+
vulnerabilities.extend(findings)
|
|
75
|
+
metadata["repos_scanned"].append(str(repo_path))
|
|
76
|
+
except Exception as e:
|
|
77
|
+
errors.append(f"Error scanning {repo_path}: {str(e)}")
|
|
78
|
+
|
|
79
|
+
if not self.trufflehog_path:
|
|
80
|
+
errors.append(
|
|
81
|
+
"Warning: trufflehog not found. Using limited regex fallback. Install with `brew install trufflehog` for better security."
|
|
82
|
+
)
|
|
83
|
+
if not self.settings.secret_scan_paths:
|
|
84
|
+
errors.append(
|
|
85
|
+
"No secret_scan_paths configured; skipping regex fallback scan to avoid slow large-repo sweeps."
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
metadata["total_findings"] = len(vulnerabilities)
|
|
89
|
+
|
|
90
|
+
return CheckResult(
|
|
91
|
+
check_type=self.check_type,
|
|
92
|
+
success=len(vulnerabilities) == 0,
|
|
93
|
+
vulnerabilities=vulnerabilities,
|
|
94
|
+
errors=errors,
|
|
95
|
+
metadata=metadata,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _get_repos_to_scan(self) -> list[Path]:
|
|
99
|
+
"""Get list of git repos to scan for secrets."""
|
|
100
|
+
repos = []
|
|
101
|
+
|
|
102
|
+
def is_git_repo(p: Path) -> bool:
|
|
103
|
+
# Support both .git directories and gitfiles (worktrees/submodules)
|
|
104
|
+
return (p / ".git").is_dir() or (p / ".git").is_file()
|
|
105
|
+
|
|
106
|
+
def find_git_root(start: Path) -> Path | None:
|
|
107
|
+
cur = start.resolve()
|
|
108
|
+
for parent in [cur, *cur.parents]:
|
|
109
|
+
if is_git_repo(parent):
|
|
110
|
+
return parent
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# Check configured secret scan paths
|
|
114
|
+
if self.settings.secret_scan_paths:
|
|
115
|
+
for path_str in self.settings.secret_scan_paths:
|
|
116
|
+
path = Path(path_str).expanduser()
|
|
117
|
+
if path.exists() and is_git_repo(path):
|
|
118
|
+
repos.append(path)
|
|
119
|
+
elif path.is_dir():
|
|
120
|
+
# Look for .git in subdirectories
|
|
121
|
+
for git_dir in path.glob("*/.git"):
|
|
122
|
+
repos.append(git_dir.parent)
|
|
123
|
+
|
|
124
|
+
# Default: try to find "nearby" repos (works both when Guardian lives inside
|
|
125
|
+
# a larger super-workspace and when it's a standalone repo).
|
|
126
|
+
if not repos:
|
|
127
|
+
# 1) If we are in a super-workspace, scan sibling repos if present.
|
|
128
|
+
# We use the current repo's parent as the "workspace root" candidate.
|
|
129
|
+
# When Guardian is installed (e.g., in CI), `__file__` will live under
|
|
130
|
+
# site-packages and won't have a `.git` ancestor. Prefer CWD first.
|
|
131
|
+
this_repo = find_git_root(Path.cwd()) or find_git_root(Path(__file__))
|
|
132
|
+
workspace_root = this_repo.parent if this_repo else Path.cwd()
|
|
133
|
+
|
|
134
|
+
for rel in [
|
|
135
|
+
"_infra", # umbrella dir (may itself be a git repo in some setups)
|
|
136
|
+
"_infra/infra", # common layout: infra repo inside _infra
|
|
137
|
+
"accounting",
|
|
138
|
+
"dossier",
|
|
139
|
+
"www",
|
|
140
|
+
"ops",
|
|
141
|
+
]:
|
|
142
|
+
p = (workspace_root / rel).resolve()
|
|
143
|
+
if p.exists() and is_git_repo(p):
|
|
144
|
+
repos.append(p)
|
|
145
|
+
|
|
146
|
+
# 2) Always fall back to scanning Guardian itself (first application on itself).
|
|
147
|
+
if not repos and this_repo and is_git_repo(this_repo):
|
|
148
|
+
repos.append(this_repo)
|
|
149
|
+
|
|
150
|
+
return repos
|
|
151
|
+
|
|
152
|
+
async def _scan_git_history(self, repo_path: Path) -> list[Vulnerability]:
|
|
153
|
+
"""Scan a git repo's history for secrets using trufflehog."""
|
|
154
|
+
if not self.trufflehog_path:
|
|
155
|
+
return []
|
|
156
|
+
|
|
157
|
+
vulnerabilities: list[Vulnerability] = []
|
|
158
|
+
|
|
159
|
+
cmd = [
|
|
160
|
+
self.trufflehog_path,
|
|
161
|
+
"git",
|
|
162
|
+
f"file://{repo_path}",
|
|
163
|
+
"--no-update",
|
|
164
|
+
"--only-verified",
|
|
165
|
+
"--json",
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
proc = None
|
|
169
|
+
try:
|
|
170
|
+
proc = await asyncio.create_subprocess_exec(
|
|
171
|
+
*cmd,
|
|
172
|
+
stdout=asyncio.subprocess.PIPE,
|
|
173
|
+
stderr=asyncio.subprocess.PIPE,
|
|
174
|
+
)
|
|
175
|
+
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=60.0)
|
|
176
|
+
except TimeoutError:
|
|
177
|
+
logger.warning(f"Timeout scanning {repo_path}")
|
|
178
|
+
if proc:
|
|
179
|
+
try:
|
|
180
|
+
proc.kill()
|
|
181
|
+
await proc.wait()
|
|
182
|
+
except ProcessLookupError:
|
|
183
|
+
pass # Process already terminated
|
|
184
|
+
return []
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.warning(f"Error running trufflehog on {repo_path}: {e}")
|
|
187
|
+
if proc:
|
|
188
|
+
try:
|
|
189
|
+
proc.kill()
|
|
190
|
+
await proc.wait()
|
|
191
|
+
except ProcessLookupError:
|
|
192
|
+
pass
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
# Parse JSON lines output
|
|
196
|
+
for line in stdout.decode("utf-8", errors="replace").splitlines():
|
|
197
|
+
line = line.strip()
|
|
198
|
+
if not line:
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
finding = json.loads(line)
|
|
203
|
+
vuln = self._parse_finding(finding, repo_path)
|
|
204
|
+
if vuln:
|
|
205
|
+
vulnerabilities.append(vuln)
|
|
206
|
+
except json.JSONDecodeError:
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
return vulnerabilities
|
|
210
|
+
|
|
211
|
+
async def _scan_with_regex(self, repo_path: Path) -> list[Vulnerability]:
|
|
212
|
+
"""Scan files in repo using fallback regex."""
|
|
213
|
+
vulnerabilities = []
|
|
214
|
+
|
|
215
|
+
# Walk through files, ignoring .git and node_modules
|
|
216
|
+
for path in repo_path.rglob("*"):
|
|
217
|
+
if not path.is_file():
|
|
218
|
+
continue
|
|
219
|
+
if any(p in str(path) for p in [".git", "node_modules", "venv", "__pycache__"]):
|
|
220
|
+
continue
|
|
221
|
+
# Skip large files
|
|
222
|
+
if path.stat().st_size > 1024 * 1024: # 1MB
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
content = path.read_text(errors="ignore")
|
|
227
|
+
for pattern, name in self.FALLBACK_PATTERNS:
|
|
228
|
+
if re.search(pattern, content):
|
|
229
|
+
vulnerabilities.append(
|
|
230
|
+
Vulnerability(
|
|
231
|
+
package_name=f"{repo_path.name}/{path.relative_to(repo_path)}",
|
|
232
|
+
package_version="HEAD",
|
|
233
|
+
severity=Severity.HIGH,
|
|
234
|
+
summary=f"Possible {name} found (Regex)",
|
|
235
|
+
description=f"Found pattern matching {name} in file. Please verify.",
|
|
236
|
+
source="devguard-regex-fallback",
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
except Exception:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
return vulnerabilities
|
|
243
|
+
|
|
244
|
+
def _parse_finding(self, finding: dict, repo_path: Path) -> Vulnerability | None:
|
|
245
|
+
"""Parse a trufflehog finding into a Vulnerability."""
|
|
246
|
+
try:
|
|
247
|
+
detector_type = finding.get("DetectorName", "unknown")
|
|
248
|
+
verified = finding.get("Verified", False)
|
|
249
|
+
|
|
250
|
+
# Only report verified findings
|
|
251
|
+
if not verified:
|
|
252
|
+
return None
|
|
253
|
+
|
|
254
|
+
# Get source metadata
|
|
255
|
+
source_metadata = finding.get("SourceMetadata", {}).get("Data", {})
|
|
256
|
+
git_data = source_metadata.get("Git", {})
|
|
257
|
+
|
|
258
|
+
file_path = git_data.get("file", "unknown")
|
|
259
|
+
commit = git_data.get("commit", "")[:8] if git_data.get("commit") else ""
|
|
260
|
+
|
|
261
|
+
# Redact the actual secret
|
|
262
|
+
raw = finding.get("Raw", "")
|
|
263
|
+
redacted = raw[:4] + "..." + raw[-4:] if len(raw) > 8 else "[redacted]"
|
|
264
|
+
|
|
265
|
+
summary = f"Verified {detector_type} secret in {repo_path.name}/{file_path}"
|
|
266
|
+
if commit:
|
|
267
|
+
summary += f" (commit {commit})"
|
|
268
|
+
|
|
269
|
+
return Vulnerability(
|
|
270
|
+
package_name=f"{repo_path.name}/{file_path}",
|
|
271
|
+
package_version=commit or "HEAD",
|
|
272
|
+
severity=Severity.CRITICAL, # Verified secrets are always critical
|
|
273
|
+
summary=summary,
|
|
274
|
+
description=f"Verified {detector_type} credential found. Value: {redacted}",
|
|
275
|
+
source="trufflehog",
|
|
276
|
+
)
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logger.warning(f"Error parsing trufflehog finding: {e}")
|
|
279
|
+
return None
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""Docker Swarm health checker."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from devguard.checkers.base import BaseChecker
|
|
8
|
+
from devguard.models import CheckResult, CheckStatus, DeploymentStatus, Finding, Severity
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SwarmChecker(BaseChecker):
|
|
14
|
+
"""Check Docker Swarm cluster health."""
|
|
15
|
+
|
|
16
|
+
check_type = "swarm"
|
|
17
|
+
|
|
18
|
+
async def check(self) -> CheckResult:
|
|
19
|
+
"""Check Docker Swarm cluster status."""
|
|
20
|
+
deployments: list[DeploymentStatus] = []
|
|
21
|
+
findings: list[Finding] = []
|
|
22
|
+
errors: list[str] = []
|
|
23
|
+
metadata: dict = {}
|
|
24
|
+
|
|
25
|
+
expected_nodes = set(self.settings.swarm_expected_nodes)
|
|
26
|
+
critical_services = set(self.settings.swarm_critical_services)
|
|
27
|
+
|
|
28
|
+
# Check if we're in a swarm
|
|
29
|
+
swarm_info = await self._get_swarm_info()
|
|
30
|
+
if swarm_info.get("error"):
|
|
31
|
+
errors.append(swarm_info["error"])
|
|
32
|
+
return CheckResult(
|
|
33
|
+
check_type=self.check_type,
|
|
34
|
+
success=False,
|
|
35
|
+
errors=errors,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
swarm_state = swarm_info.get("state", "inactive")
|
|
39
|
+
is_manager = swarm_info.get("is_manager", False)
|
|
40
|
+
metadata["local_state"] = swarm_state
|
|
41
|
+
metadata["is_manager"] = is_manager
|
|
42
|
+
|
|
43
|
+
if swarm_state == "inactive":
|
|
44
|
+
findings.append(
|
|
45
|
+
Finding(
|
|
46
|
+
severity=Severity.WARNING,
|
|
47
|
+
title="Local node not in swarm",
|
|
48
|
+
description="This node is not part of a Docker Swarm cluster",
|
|
49
|
+
resource="swarm",
|
|
50
|
+
remediation="Run 'docker swarm init' or 'docker swarm join'",
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
return CheckResult(
|
|
54
|
+
check_type=self.check_type,
|
|
55
|
+
success=True, # Not being in swarm isn't a failure
|
|
56
|
+
findings=findings,
|
|
57
|
+
errors=errors,
|
|
58
|
+
metadata=metadata,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# If we're a worker (not manager), we can't query cluster state
|
|
62
|
+
if not is_manager:
|
|
63
|
+
metadata["note"] = (
|
|
64
|
+
"Running on worker node - cannot query cluster state. Run on a manager node for full cluster visibility."
|
|
65
|
+
)
|
|
66
|
+
return CheckResult(
|
|
67
|
+
check_type=self.check_type,
|
|
68
|
+
success=True, # Worker node is fine, just can't query
|
|
69
|
+
findings=findings,
|
|
70
|
+
errors=errors,
|
|
71
|
+
metadata=metadata,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if swarm_state == "pending":
|
|
75
|
+
findings.append(
|
|
76
|
+
Finding(
|
|
77
|
+
severity=Severity.HIGH,
|
|
78
|
+
title="Swarm node stuck in pending state",
|
|
79
|
+
description="This node is trying to join a swarm but cannot connect to the manager",
|
|
80
|
+
resource="swarm",
|
|
81
|
+
remediation="Check network connectivity to swarm manager, or run 'docker swarm leave --force' and rejoin",
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# If we're a manager, we can query cluster state
|
|
86
|
+
if is_manager:
|
|
87
|
+
nodes_result = await self._get_swarm_nodes()
|
|
88
|
+
if nodes_result.get("error"):
|
|
89
|
+
errors.append(nodes_result["error"])
|
|
90
|
+
else:
|
|
91
|
+
nodes = nodes_result.get("nodes", [])
|
|
92
|
+
metadata["total_nodes"] = len(nodes)
|
|
93
|
+
|
|
94
|
+
for node in nodes:
|
|
95
|
+
hostname = node.get("hostname", "unknown")
|
|
96
|
+
status = node.get("status", "unknown")
|
|
97
|
+
availability = node.get("availability", "unknown")
|
|
98
|
+
manager_status = node.get("manager_status", "")
|
|
99
|
+
|
|
100
|
+
is_healthy = status == "ready" and availability == "active"
|
|
101
|
+
is_expected = hostname in expected_nodes
|
|
102
|
+
|
|
103
|
+
if is_healthy:
|
|
104
|
+
check_status = CheckStatus.HEALTHY
|
|
105
|
+
elif is_expected:
|
|
106
|
+
check_status = CheckStatus.UNHEALTHY
|
|
107
|
+
findings.append(
|
|
108
|
+
Finding(
|
|
109
|
+
severity=Severity.HIGH,
|
|
110
|
+
title=f"Expected swarm node unhealthy: {hostname}",
|
|
111
|
+
description=f"{hostname} status={status}, availability={availability}",
|
|
112
|
+
resource=hostname,
|
|
113
|
+
remediation=f"Check {hostname} Docker daemon and network",
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
check_status = CheckStatus.UNKNOWN
|
|
118
|
+
|
|
119
|
+
deployments.append(
|
|
120
|
+
DeploymentStatus(
|
|
121
|
+
platform="swarm",
|
|
122
|
+
project_name=hostname,
|
|
123
|
+
deployment_id=node.get("id", "")[:12],
|
|
124
|
+
status=check_status,
|
|
125
|
+
url=f"docker://{hostname}",
|
|
126
|
+
metadata={
|
|
127
|
+
"status": status,
|
|
128
|
+
"availability": availability,
|
|
129
|
+
"manager_status": manager_status,
|
|
130
|
+
"role": "manager" if manager_status else "worker",
|
|
131
|
+
},
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Check for missing expected nodes
|
|
136
|
+
seen_hostnames = {n.get("hostname") for n in nodes}
|
|
137
|
+
for expected_host in expected_nodes:
|
|
138
|
+
if expected_host not in seen_hostnames:
|
|
139
|
+
findings.append(
|
|
140
|
+
Finding(
|
|
141
|
+
severity=Severity.HIGH,
|
|
142
|
+
title=f"Expected swarm node missing: {expected_host}",
|
|
143
|
+
description=f"{expected_host} is not in the swarm cluster",
|
|
144
|
+
resource=expected_host,
|
|
145
|
+
remediation=f"Join {expected_host} to the swarm cluster",
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Check services
|
|
150
|
+
services_result = await self._get_swarm_services()
|
|
151
|
+
if services_result.get("error"):
|
|
152
|
+
# Not critical - maybe no stack deployed
|
|
153
|
+
logger.debug(f"Could not get swarm services: {services_result['error']}")
|
|
154
|
+
else:
|
|
155
|
+
services = services_result.get("services", [])
|
|
156
|
+
metadata["total_services"] = len(services)
|
|
157
|
+
|
|
158
|
+
for svc in services:
|
|
159
|
+
name = svc.get("name", "unknown")
|
|
160
|
+
replicas = svc.get("replicas", "0/0")
|
|
161
|
+
mode = svc.get("mode", "unknown")
|
|
162
|
+
|
|
163
|
+
# Parse replicas like "1/1" or "0/1"
|
|
164
|
+
try:
|
|
165
|
+
running, desired = replicas.split("/")
|
|
166
|
+
is_healthy = int(running) >= int(desired) and int(desired) > 0
|
|
167
|
+
except (ValueError, ZeroDivisionError):
|
|
168
|
+
is_healthy = False
|
|
169
|
+
|
|
170
|
+
if not is_healthy and name in critical_services:
|
|
171
|
+
findings.append(
|
|
172
|
+
Finding(
|
|
173
|
+
severity=Severity.HIGH,
|
|
174
|
+
title=f"Critical swarm service unhealthy: {name}",
|
|
175
|
+
description=f"Service {name} has {replicas} replicas",
|
|
176
|
+
resource=name,
|
|
177
|
+
remediation=f"Check service logs: docker service logs {name}",
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Check placement constraints compliance
|
|
182
|
+
placement_result = await self._check_service_placement(name)
|
|
183
|
+
if placement_result.get("violations"):
|
|
184
|
+
for violation in placement_result["violations"]:
|
|
185
|
+
findings.append(
|
|
186
|
+
Finding(
|
|
187
|
+
severity=Severity.HIGH,
|
|
188
|
+
title=f"Service placement violation: {name}",
|
|
189
|
+
description=violation["description"],
|
|
190
|
+
resource=name,
|
|
191
|
+
remediation=violation["remediation"],
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
return CheckResult(
|
|
196
|
+
check_type=self.check_type,
|
|
197
|
+
success=len(errors) == 0
|
|
198
|
+
and not any(f.severity in [Severity.CRITICAL, Severity.HIGH] for f in findings),
|
|
199
|
+
deployments=deployments,
|
|
200
|
+
findings=findings,
|
|
201
|
+
errors=errors,
|
|
202
|
+
metadata=metadata,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
async def _get_swarm_info(self) -> dict:
|
|
206
|
+
"""Get local swarm state."""
|
|
207
|
+
try:
|
|
208
|
+
proc = await asyncio.create_subprocess_exec(
|
|
209
|
+
"docker",
|
|
210
|
+
"info",
|
|
211
|
+
"--format",
|
|
212
|
+
'{"state":"{{.Swarm.LocalNodeState}}","is_manager":{{.Swarm.ControlAvailable}},"node_id":"{{.Swarm.NodeID}}"}',
|
|
213
|
+
stdout=asyncio.subprocess.PIPE,
|
|
214
|
+
stderr=asyncio.subprocess.PIPE,
|
|
215
|
+
)
|
|
216
|
+
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=10.0)
|
|
217
|
+
|
|
218
|
+
if proc.returncode != 0:
|
|
219
|
+
return {"error": f"docker info failed: {stderr.decode()}"}
|
|
220
|
+
|
|
221
|
+
return json.loads(stdout.decode())
|
|
222
|
+
|
|
223
|
+
except TimeoutError:
|
|
224
|
+
return {"error": "docker info timed out"}
|
|
225
|
+
except FileNotFoundError:
|
|
226
|
+
return {"error": "docker CLI not found"}
|
|
227
|
+
except json.JSONDecodeError as e:
|
|
228
|
+
return {"error": f"Failed to parse docker info: {e}"}
|
|
229
|
+
except Exception as e:
|
|
230
|
+
return {"error": str(e)}
|
|
231
|
+
|
|
232
|
+
async def _get_swarm_nodes(self) -> dict:
|
|
233
|
+
"""Get swarm nodes (requires manager)."""
|
|
234
|
+
try:
|
|
235
|
+
proc = await asyncio.create_subprocess_exec(
|
|
236
|
+
"docker",
|
|
237
|
+
"node",
|
|
238
|
+
"ls",
|
|
239
|
+
"--format",
|
|
240
|
+
'{"id":"{{.ID}}","hostname":"{{.Hostname}}","status":"{{.Status}}","availability":"{{.Availability}}","manager_status":"{{.ManagerStatus}}"}',
|
|
241
|
+
stdout=asyncio.subprocess.PIPE,
|
|
242
|
+
stderr=asyncio.subprocess.PIPE,
|
|
243
|
+
)
|
|
244
|
+
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=10.0)
|
|
245
|
+
|
|
246
|
+
if proc.returncode != 0:
|
|
247
|
+
return {"error": stderr.decode()}
|
|
248
|
+
|
|
249
|
+
nodes = []
|
|
250
|
+
for line in stdout.decode().strip().split("\n"):
|
|
251
|
+
if line:
|
|
252
|
+
nodes.append(json.loads(line))
|
|
253
|
+
|
|
254
|
+
return {"nodes": nodes}
|
|
255
|
+
|
|
256
|
+
except TimeoutError:
|
|
257
|
+
return {"error": "docker node ls timed out"}
|
|
258
|
+
except Exception as e:
|
|
259
|
+
return {"error": str(e)}
|
|
260
|
+
|
|
261
|
+
async def _get_swarm_services(self) -> dict:
|
|
262
|
+
"""Get swarm services (requires manager)."""
|
|
263
|
+
try:
|
|
264
|
+
proc = await asyncio.create_subprocess_exec(
|
|
265
|
+
"docker",
|
|
266
|
+
"service",
|
|
267
|
+
"ls",
|
|
268
|
+
"--format",
|
|
269
|
+
'{"id":"{{.ID}}","name":"{{.Name}}","mode":"{{.Mode}}","replicas":"{{.Replicas}}"}',
|
|
270
|
+
stdout=asyncio.subprocess.PIPE,
|
|
271
|
+
stderr=asyncio.subprocess.PIPE,
|
|
272
|
+
)
|
|
273
|
+
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=10.0)
|
|
274
|
+
|
|
275
|
+
if proc.returncode != 0:
|
|
276
|
+
return {"error": stderr.decode()}
|
|
277
|
+
|
|
278
|
+
services = []
|
|
279
|
+
for line in stdout.decode().strip().split("\n"):
|
|
280
|
+
if line:
|
|
281
|
+
services.append(json.loads(line))
|
|
282
|
+
|
|
283
|
+
return {"services": services}
|
|
284
|
+
|
|
285
|
+
except TimeoutError:
|
|
286
|
+
return {"error": "docker service ls timed out"}
|
|
287
|
+
except Exception as e:
|
|
288
|
+
return {"error": str(e)}
|
|
289
|
+
|
|
290
|
+
async def _check_service_placement(self, service_name: str) -> dict:
|
|
291
|
+
"""Check if service tasks are placed according to constraints.
|
|
292
|
+
|
|
293
|
+
Verifies that services with node.hostname constraints are actually
|
|
294
|
+
running on the correct nodes (Swarm's equivalent of K8s taints/affinities).
|
|
295
|
+
"""
|
|
296
|
+
violations: list[dict] = []
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
# Get service inspect to see constraints
|
|
300
|
+
proc_inspect = await asyncio.create_subprocess_exec(
|
|
301
|
+
"docker",
|
|
302
|
+
"service",
|
|
303
|
+
"inspect",
|
|
304
|
+
service_name,
|
|
305
|
+
"--format",
|
|
306
|
+
"{{json .Spec.TaskTemplate.Placement.Constraints}}",
|
|
307
|
+
stdout=asyncio.subprocess.PIPE,
|
|
308
|
+
stderr=asyncio.subprocess.PIPE,
|
|
309
|
+
)
|
|
310
|
+
stdout_inspect, stderr_inspect = await asyncio.wait_for(
|
|
311
|
+
proc_inspect.communicate(), timeout=10.0
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if proc_inspect.returncode != 0:
|
|
315
|
+
return {"violations": []} # Can't check, not an error
|
|
316
|
+
|
|
317
|
+
constraints_json = stdout_inspect.decode().strip()
|
|
318
|
+
if not constraints_json or constraints_json == "null":
|
|
319
|
+
return {"violations": []} # No constraints defined
|
|
320
|
+
|
|
321
|
+
constraints = json.loads(constraints_json) if constraints_json else []
|
|
322
|
+
|
|
323
|
+
# Extract expected hostname from constraints (e.g., "node.hostname == mynode")
|
|
324
|
+
expected_hostname = None
|
|
325
|
+
for constraint in constraints:
|
|
326
|
+
if constraint.startswith("node.hostname == "):
|
|
327
|
+
expected_hostname = constraint.replace("node.hostname == ", "").strip()
|
|
328
|
+
break
|
|
329
|
+
|
|
330
|
+
if not expected_hostname:
|
|
331
|
+
return {"violations": []} # No hostname constraint
|
|
332
|
+
|
|
333
|
+
# Get actual task placements
|
|
334
|
+
proc_ps = await asyncio.create_subprocess_exec(
|
|
335
|
+
"docker",
|
|
336
|
+
"service",
|
|
337
|
+
"ps",
|
|
338
|
+
service_name,
|
|
339
|
+
"--format",
|
|
340
|
+
'{"id":"{{.ID}}","node":"{{.Node}}","desired_state":"{{.DesiredState}}","current_state":"{{.CurrentState}}"}',
|
|
341
|
+
"--no-trunc",
|
|
342
|
+
stdout=asyncio.subprocess.PIPE,
|
|
343
|
+
stderr=asyncio.subprocess.PIPE,
|
|
344
|
+
)
|
|
345
|
+
stdout_ps, stderr_ps = await asyncio.wait_for(proc_ps.communicate(), timeout=10.0)
|
|
346
|
+
|
|
347
|
+
if proc_ps.returncode != 0:
|
|
348
|
+
return {"violations": []} # Can't check
|
|
349
|
+
|
|
350
|
+
# Parse task placements
|
|
351
|
+
for line in stdout_ps.decode().strip().split("\n"):
|
|
352
|
+
if not line:
|
|
353
|
+
continue
|
|
354
|
+
try:
|
|
355
|
+
task = json.loads(line)
|
|
356
|
+
if task.get("current_state") == "Running":
|
|
357
|
+
actual_node = task.get("node", "").split(".")[
|
|
358
|
+
0
|
|
359
|
+
] # Extract hostname from FQDN
|
|
360
|
+
if actual_node != expected_hostname:
|
|
361
|
+
violations.append(
|
|
362
|
+
{
|
|
363
|
+
"description": f"Service {service_name} task {task.get('id', '')[:12]} is running on {actual_node}, but constraint requires {expected_hostname}",
|
|
364
|
+
"remediation": f"Check why task was placed on wrong node. Verify node.hostname constraint: docker service inspect {service_name}",
|
|
365
|
+
}
|
|
366
|
+
)
|
|
367
|
+
except (json.JSONDecodeError, KeyError):
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
return {"violations": violations}
|
|
371
|
+
|
|
372
|
+
except TimeoutError:
|
|
373
|
+
return {"violations": []}
|
|
374
|
+
except Exception as e:
|
|
375
|
+
logger.debug(f"Placement check failed for {service_name}: {e}")
|
|
376
|
+
return {"violations": []}
|