devguard 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devguard/INTEGRATION_SUMMARY.md +121 -0
- devguard/__init__.py +3 -0
- devguard/__main__.py +6 -0
- devguard/checkers/__init__.py +41 -0
- devguard/checkers/api_usage.py +523 -0
- devguard/checkers/aws_cost.py +331 -0
- devguard/checkers/aws_iam.py +284 -0
- devguard/checkers/base.py +25 -0
- devguard/checkers/container.py +137 -0
- devguard/checkers/domain.py +189 -0
- devguard/checkers/firecrawl.py +117 -0
- devguard/checkers/fly.py +225 -0
- devguard/checkers/github.py +210 -0
- devguard/checkers/npm.py +327 -0
- devguard/checkers/npm_security.py +244 -0
- devguard/checkers/redteam.py +290 -0
- devguard/checkers/secret.py +279 -0
- devguard/checkers/swarm.py +376 -0
- devguard/checkers/tailscale.py +143 -0
- devguard/checkers/tailsnitch.py +303 -0
- devguard/checkers/tavily.py +179 -0
- devguard/checkers/vercel.py +192 -0
- devguard/cli.py +1510 -0
- devguard/cli_helpers.py +189 -0
- devguard/config.py +249 -0
- devguard/core.py +293 -0
- devguard/dashboard.py +715 -0
- devguard/discovery.py +363 -0
- devguard/http_client.py +142 -0
- devguard/llm_service.py +481 -0
- devguard/mcp_server.py +259 -0
- devguard/metrics.py +144 -0
- devguard/models.py +208 -0
- devguard/reporting.py +1571 -0
- devguard/sarif.py +295 -0
- devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
- devguard/scripts/README.md +221 -0
- devguard/scripts/auto_fix_recommendations.py +145 -0
- devguard/scripts/generate_npmignore.py +175 -0
- devguard/scripts/generate_security_report.py +324 -0
- devguard/scripts/prepublish_check.sh +29 -0
- devguard/scripts/redteam_npm_packages.py +1262 -0
- devguard/scripts/review_all_repos.py +300 -0
- devguard/spec.py +617 -0
- devguard/sweeps/__init__.py +23 -0
- devguard/sweeps/ai_editor_config_audit.py +697 -0
- devguard/sweeps/cargo_publish_audit.py +655 -0
- devguard/sweeps/dependency_audit.py +419 -0
- devguard/sweeps/gitignore_audit.py +336 -0
- devguard/sweeps/local_dev.py +260 -0
- devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
- devguard/sweeps/project_flaudit.py +636 -0
- devguard/sweeps/public_github_secrets.py +680 -0
- devguard/sweeps/publish_audit.py +478 -0
- devguard/sweeps/ssh_key_audit.py +327 -0
- devguard/utils.py +174 -0
- devguard-0.2.0.dist-info/METADATA +225 -0
- devguard-0.2.0.dist-info/RECORD +60 -0
- devguard-0.2.0.dist-info/WHEEL +4 -0
- devguard-0.2.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""Gitignore audit sweep: detect missing .gitignore patterns across local repos.
|
|
2
|
+
|
|
3
|
+
Scans git repos under a dev root and checks whether common hygiene patterns
|
|
4
|
+
(.env, .state/, *.log, etc.) are present in .gitignore. Repos with a LICENSE
|
|
5
|
+
file are flagged as likely public and get higher severity.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import fnmatch
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
from collections import Counter
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import UTC, datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _utc_now() -> str:
|
|
21
|
+
return datetime.now(UTC).isoformat().replace("+00:00", "Z")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _default_dev_root() -> Path:
|
|
25
|
+
return Path(os.getenv("DEV_DIR") or "~/Documents/dev").expanduser()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Patterns to check, grouped by relevance.
|
|
29
|
+
# Each tuple: (pattern_name, gitignore_lines_that_satisfy_it, languages_where_relevant)
|
|
30
|
+
# languages_where_relevant: None = always, otherwise set of {"rust", "python", "js", "go", ...}
|
|
31
|
+
REQUIRED_PATTERNS: list[tuple[str, list[str], set[str] | None]] = [
|
|
32
|
+
(".env files", [".env", ".env.*", ".env.local", ".env.*.local"], None),
|
|
33
|
+
(".state/ dir", [".state", ".state/"], None),
|
|
34
|
+
(".claude/ dir", [".claude", ".claude/"], None),
|
|
35
|
+
("*.log files", ["*.log"], None),
|
|
36
|
+
(".DS_Store", [".DS_Store"], None),
|
|
37
|
+
("*.sqlite/db", ["*.sqlite", "*.sqlite3", "*.db"], None),
|
|
38
|
+
("node_modules/", ["node_modules", "node_modules/"], {"js", "ts"}),
|
|
39
|
+
("target/", ["target", "target/"], {"rust"}),
|
|
40
|
+
(".venv/", [".venv", ".venv/", "venv", "venv/"], {"python"}),
|
|
41
|
+
("dist/", ["dist", "dist/"], {"js", "ts", "python"}),
|
|
42
|
+
("build/", ["build", "build/"], {"js", "ts", "python", "go"}),
|
|
43
|
+
("__pycache__/", ["__pycache__", "__pycache__/"], {"python"}),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _detect_languages(repo: Path) -> set[str]:
|
|
48
|
+
"""Detect project languages from manifest files."""
|
|
49
|
+
langs: set[str] = set()
|
|
50
|
+
if (repo / "Cargo.toml").exists():
|
|
51
|
+
langs.add("rust")
|
|
52
|
+
if (repo / "pyproject.toml").exists() or (repo / "setup.py").exists():
|
|
53
|
+
langs.add("python")
|
|
54
|
+
if (repo / "package.json").exists():
|
|
55
|
+
langs.add("js")
|
|
56
|
+
langs.add("ts")
|
|
57
|
+
if (repo / "go.mod").exists():
|
|
58
|
+
langs.add("go")
|
|
59
|
+
return langs
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _is_likely_public(repo: Path) -> bool:
|
|
63
|
+
"""Heuristic: repo has a LICENSE file -> likely public."""
|
|
64
|
+
for name in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE"):
|
|
65
|
+
if (repo / name).exists():
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _read_gitignore_lines(repo: Path) -> list[str]:
|
|
71
|
+
"""Read .gitignore and return non-empty, non-comment lines."""
|
|
72
|
+
gi = repo / ".gitignore"
|
|
73
|
+
if not gi.is_file():
|
|
74
|
+
return []
|
|
75
|
+
return _read_gitignore_lines_from(gi)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _pattern_satisfied(gitignore_lines: list[str], required_variants: list[str]) -> bool:
|
|
79
|
+
"""Check if any variant of a required pattern appears in .gitignore.
|
|
80
|
+
|
|
81
|
+
Handles leading `/` and trailing `/` normalization.
|
|
82
|
+
"""
|
|
83
|
+
normalized = set()
|
|
84
|
+
for line in gitignore_lines:
|
|
85
|
+
# Strip negation prefix
|
|
86
|
+
if line.startswith("!"):
|
|
87
|
+
continue
|
|
88
|
+
clean = line.lstrip("/").rstrip("/").strip()
|
|
89
|
+
if clean:
|
|
90
|
+
normalized.add(clean)
|
|
91
|
+
# "**/" prefix in gitignore means "at any depth", which covers root.
|
|
92
|
+
# e.g. **/*.log covers *.log, **/dist covers dist
|
|
93
|
+
if clean.startswith("**/"):
|
|
94
|
+
normalized.add(clean[3:])
|
|
95
|
+
for variant in required_variants:
|
|
96
|
+
clean = variant.lstrip("/").rstrip("/").strip()
|
|
97
|
+
if clean in normalized:
|
|
98
|
+
return True
|
|
99
|
+
# Check if any existing pattern would match this variant via fnmatch
|
|
100
|
+
for existing in normalized:
|
|
101
|
+
if fnmatch.fnmatch(clean, existing):
|
|
102
|
+
return True
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass(frozen=True)
|
|
107
|
+
class GitignoreGap:
|
|
108
|
+
repo_path: str
|
|
109
|
+
pattern_name: str
|
|
110
|
+
is_public: bool
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class RepoAuditResult:
|
|
115
|
+
repo_path: str
|
|
116
|
+
has_gitignore: bool
|
|
117
|
+
is_public: bool
|
|
118
|
+
languages: list[str]
|
|
119
|
+
missing_patterns: list[str] = field(default_factory=list)
|
|
120
|
+
case_warnings: list[str] = field(default_factory=list)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# Files that must have exact casing for Claude Code to find them.
|
|
124
|
+
# (expected_name, parent_relative_to_repo)
|
|
125
|
+
_CASE_SENSITIVE_FILES: list[tuple[str, str]] = [
|
|
126
|
+
("CLAUDE.md", "."),
|
|
127
|
+
("CLAUDE.md", ".claude"),
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _check_case_sensitive_files(repo: Path) -> list[str]:
|
|
132
|
+
"""Check for case-sensitive file naming issues (e.g. claude.md vs CLAUDE.md).
|
|
133
|
+
|
|
134
|
+
On case-insensitive filesystems (macOS), wrong-cased files still "exist"
|
|
135
|
+
but git tracks the original case, which breaks on Linux/CI.
|
|
136
|
+
"""
|
|
137
|
+
import subprocess as _sp
|
|
138
|
+
|
|
139
|
+
warnings: list[str] = []
|
|
140
|
+
for expected, parent_rel in _CASE_SENSITIVE_FILES:
|
|
141
|
+
parent = repo / parent_rel
|
|
142
|
+
if not parent.is_dir():
|
|
143
|
+
continue
|
|
144
|
+
# Check if any case variant exists
|
|
145
|
+
target = parent / expected
|
|
146
|
+
if not target.exists():
|
|
147
|
+
continue
|
|
148
|
+
# Ask git what case it actually tracks
|
|
149
|
+
try:
|
|
150
|
+
res = _sp.run(
|
|
151
|
+
["git", "ls-files", str(Path(parent_rel) / expected)],
|
|
152
|
+
cwd=str(repo), capture_output=True, text=True, timeout=5,
|
|
153
|
+
)
|
|
154
|
+
tracked = res.stdout.strip()
|
|
155
|
+
if not tracked:
|
|
156
|
+
# Try lowercase
|
|
157
|
+
res2 = _sp.run(
|
|
158
|
+
["git", "ls-files", str(Path(parent_rel) / expected.lower())],
|
|
159
|
+
cwd=str(repo), capture_output=True, text=True, timeout=5,
|
|
160
|
+
)
|
|
161
|
+
tracked = res2.stdout.strip()
|
|
162
|
+
if tracked and tracked != str(Path(parent_rel) / expected):
|
|
163
|
+
warnings.append(
|
|
164
|
+
f"git tracks '{tracked}' but Claude Code expects '{Path(parent_rel) / expected}'"
|
|
165
|
+
)
|
|
166
|
+
except Exception:
|
|
167
|
+
continue
|
|
168
|
+
return warnings
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _iter_git_repos(root: Path, max_depth: int) -> list[Path]:
|
|
172
|
+
"""Discover git repos under root, bounded by max_depth."""
|
|
173
|
+
root = root.resolve()
|
|
174
|
+
max_depth = max(0, min(int(max_depth), 6))
|
|
175
|
+
junk = {
|
|
176
|
+
"node_modules", ".venv", "venv", "dist", "build", ".git",
|
|
177
|
+
".cache", ".state", "__pycache__", "_trash", "_scratch",
|
|
178
|
+
"_external", "_archive", "_forks",
|
|
179
|
+
}
|
|
180
|
+
repos: list[Path] = []
|
|
181
|
+
stack: list[tuple[Path, int]] = [(root, 0)]
|
|
182
|
+
seen: set[Path] = set()
|
|
183
|
+
while stack:
|
|
184
|
+
cur, depth = stack.pop()
|
|
185
|
+
if cur in seen:
|
|
186
|
+
continue
|
|
187
|
+
seen.add(cur)
|
|
188
|
+
if (cur / ".git").exists():
|
|
189
|
+
repos.append(cur)
|
|
190
|
+
continue
|
|
191
|
+
if depth >= max_depth:
|
|
192
|
+
continue
|
|
193
|
+
try:
|
|
194
|
+
for child in cur.iterdir():
|
|
195
|
+
if not child.is_dir():
|
|
196
|
+
continue
|
|
197
|
+
name = child.name
|
|
198
|
+
if depth == 0 and name in junk:
|
|
199
|
+
continue
|
|
200
|
+
if name.startswith("."):
|
|
201
|
+
continue
|
|
202
|
+
stack.append((child, depth + 1))
|
|
203
|
+
except Exception:
|
|
204
|
+
continue
|
|
205
|
+
return sorted(repos)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _read_global_gitignore_lines() -> list[str]:
|
|
209
|
+
"""Read the global gitignore (core.excludesFile) and return non-empty, non-comment lines."""
|
|
210
|
+
import subprocess as _sp
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
res = _sp.run(
|
|
214
|
+
["git", "config", "--global", "core.excludesFile"],
|
|
215
|
+
capture_output=True, text=True, timeout=5,
|
|
216
|
+
)
|
|
217
|
+
path_str = res.stdout.strip()
|
|
218
|
+
if not path_str:
|
|
219
|
+
return []
|
|
220
|
+
p = Path(path_str).expanduser()
|
|
221
|
+
if not p.is_file():
|
|
222
|
+
return []
|
|
223
|
+
return _read_gitignore_lines_from(p)
|
|
224
|
+
except Exception:
|
|
225
|
+
return []
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _read_gitignore_lines_from(path: Path) -> list[str]:
|
|
229
|
+
"""Read a gitignore file and return non-empty, non-comment lines."""
|
|
230
|
+
try:
|
|
231
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
232
|
+
except Exception:
|
|
233
|
+
return []
|
|
234
|
+
return [s for line in text.splitlines() if (s := line.strip()) and not s.startswith("#")]
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def audit_gitignores(
|
|
238
|
+
*,
|
|
239
|
+
dev_root: Path | None = None,
|
|
240
|
+
max_depth: int = 2,
|
|
241
|
+
exclude_repo_globs: list[str] | None = None,
|
|
242
|
+
) -> tuple[dict[str, Any], list[str]]:
|
|
243
|
+
"""Audit .gitignore files across repos and return a report."""
|
|
244
|
+
errors: list[str] = []
|
|
245
|
+
root = dev_root if dev_root is not None else _default_dev_root()
|
|
246
|
+
|
|
247
|
+
repos = _iter_git_repos(root, max_depth=max_depth)
|
|
248
|
+
globs = [g for g in (exclude_repo_globs or []) if isinstance(g, str) and g.strip()]
|
|
249
|
+
if globs:
|
|
250
|
+
repos = [r for r in repos if not any(fnmatch.fnmatch(str(r), g) for g in globs)]
|
|
251
|
+
|
|
252
|
+
# Read global gitignore once -- patterns there apply to all repos.
|
|
253
|
+
global_gi_lines = _read_global_gitignore_lines()
|
|
254
|
+
|
|
255
|
+
results: list[RepoAuditResult] = []
|
|
256
|
+
gap_counter: Counter[str] = Counter()
|
|
257
|
+
repos_without_gitignore: list[str] = []
|
|
258
|
+
public_repos_with_gaps: list[str] = []
|
|
259
|
+
|
|
260
|
+
for repo in repos:
|
|
261
|
+
try:
|
|
262
|
+
langs = _detect_languages(repo)
|
|
263
|
+
is_public = _is_likely_public(repo)
|
|
264
|
+
repo_gi_lines = _read_gitignore_lines(repo)
|
|
265
|
+
gi_lines = global_gi_lines + repo_gi_lines
|
|
266
|
+
has_gitignore = (repo / ".gitignore").is_file()
|
|
267
|
+
except Exception as exc:
|
|
268
|
+
errors.append(f"failed to read {repo}: {exc}")
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
missing: list[str] = []
|
|
272
|
+
for pattern_name, variants, relevant_langs in REQUIRED_PATTERNS:
|
|
273
|
+
# Skip language-specific patterns if not relevant
|
|
274
|
+
if relevant_langs and not (langs & relevant_langs):
|
|
275
|
+
continue
|
|
276
|
+
if not _pattern_satisfied(gi_lines, variants):
|
|
277
|
+
missing.append(pattern_name)
|
|
278
|
+
gap_counter[pattern_name] += 1
|
|
279
|
+
|
|
280
|
+
case_warns = _check_case_sensitive_files(repo)
|
|
281
|
+
|
|
282
|
+
result = RepoAuditResult(
|
|
283
|
+
repo_path=str(repo),
|
|
284
|
+
has_gitignore=has_gitignore,
|
|
285
|
+
is_public=is_public,
|
|
286
|
+
languages=sorted(langs),
|
|
287
|
+
missing_patterns=missing,
|
|
288
|
+
case_warnings=case_warns,
|
|
289
|
+
)
|
|
290
|
+
results.append(result)
|
|
291
|
+
|
|
292
|
+
if not has_gitignore:
|
|
293
|
+
repos_without_gitignore.append(str(repo))
|
|
294
|
+
if is_public and missing:
|
|
295
|
+
public_repos_with_gaps.append(str(repo))
|
|
296
|
+
|
|
297
|
+
# Sort: public repos with gaps first, then by gap count
|
|
298
|
+
results.sort(key=lambda r: (-r.is_public, -len(r.missing_patterns), r.repo_path))
|
|
299
|
+
|
|
300
|
+
report: dict[str, Any] = {
|
|
301
|
+
"generated_at": _utc_now(),
|
|
302
|
+
"scope": {
|
|
303
|
+
"dev_root": str(root),
|
|
304
|
+
"repos_scanned": len(repos),
|
|
305
|
+
"max_depth": max_depth,
|
|
306
|
+
"exclude_repo_globs": globs,
|
|
307
|
+
},
|
|
308
|
+
"summary": {
|
|
309
|
+
"repos_without_gitignore": len(repos_without_gitignore),
|
|
310
|
+
"repos_without_gitignore_list": repos_without_gitignore[:50],
|
|
311
|
+
"public_repos_with_gaps": len(public_repos_with_gaps),
|
|
312
|
+
"public_repos_with_gaps_list": public_repos_with_gaps[:50],
|
|
313
|
+
"total_gaps": sum(len(r.missing_patterns) for r in results),
|
|
314
|
+
"gap_frequency": gap_counter.most_common(20),
|
|
315
|
+
"total_case_warnings": sum(len(r.case_warnings) for r in results),
|
|
316
|
+
},
|
|
317
|
+
"repos": [
|
|
318
|
+
{
|
|
319
|
+
"repo_path": r.repo_path,
|
|
320
|
+
"has_gitignore": r.has_gitignore,
|
|
321
|
+
"is_public": r.is_public,
|
|
322
|
+
"languages": r.languages,
|
|
323
|
+
"missing_patterns": r.missing_patterns,
|
|
324
|
+
**({"case_warnings": r.case_warnings} if r.case_warnings else {}),
|
|
325
|
+
}
|
|
326
|
+
for r in results
|
|
327
|
+
if r.missing_patterns or not r.has_gitignore or r.case_warnings
|
|
328
|
+
][:200],
|
|
329
|
+
"errors": errors,
|
|
330
|
+
}
|
|
331
|
+
return report, errors
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def write_report(path: Path, report: dict[str, Any]) -> None:
|
|
335
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
336
|
+
path.write_text(json.dumps(report, indent=2) + "\n")
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""Local dev workspace sweep for "blunders" (policy-based).
|
|
2
|
+
|
|
3
|
+
This sweep is meant to catch accidental committed artifacts such as:
|
|
4
|
+
- .env files
|
|
5
|
+
- private keys / cert bundles
|
|
6
|
+
- sqlite/db dumps
|
|
7
|
+
- large blobs
|
|
8
|
+
- known generated reports (e.g., devguard email history/report outputs)
|
|
9
|
+
|
|
10
|
+
It is intentionally conservative and *non-destructive*:
|
|
11
|
+
- it does not rewrite git history
|
|
12
|
+
- it does not upload anything
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import fnmatch
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import subprocess
|
|
21
|
+
import time
|
|
22
|
+
from collections.abc import Iterable
|
|
23
|
+
from dataclasses import asdict, dataclass
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
DEFAULT_DENY_GLOBS: list[str] = [
|
|
27
|
+
"**/.env",
|
|
28
|
+
"**/.env.*",
|
|
29
|
+
"**/*.pem",
|
|
30
|
+
"**/*.key",
|
|
31
|
+
"**/*.p12",
|
|
32
|
+
"**/*.pfx",
|
|
33
|
+
"**/*.kdbx",
|
|
34
|
+
"**/*.ovpn",
|
|
35
|
+
"**/*.mobileprovision",
|
|
36
|
+
"**/*.keystore",
|
|
37
|
+
"**/*.jks",
|
|
38
|
+
"**/*.pkcs12",
|
|
39
|
+
"**/id_rsa",
|
|
40
|
+
"**/id_rsa.*",
|
|
41
|
+
"**/id_ecdsa",
|
|
42
|
+
"**/id_ecdsa.*",
|
|
43
|
+
"**/id_ed25519",
|
|
44
|
+
"**/id_ed25519.*",
|
|
45
|
+
"**/.npmrc",
|
|
46
|
+
"**/.pypirc",
|
|
47
|
+
"**/.netrc",
|
|
48
|
+
"**/.htpasswd",
|
|
49
|
+
"**/.aws/credentials",
|
|
50
|
+
"**/.ssh/**",
|
|
51
|
+
"**/.gnupg/**",
|
|
52
|
+
"**/*.asc",
|
|
53
|
+
"**/*.sqlite",
|
|
54
|
+
"**/*.sqlite3",
|
|
55
|
+
"**/*.db",
|
|
56
|
+
"**/*.db-wal",
|
|
57
|
+
"**/*.db-shm",
|
|
58
|
+
# Terraform state/vars (often contain secrets)
|
|
59
|
+
"**/*.tfstate",
|
|
60
|
+
"**/*.tfstate.backup",
|
|
61
|
+
"**/*.tfvars",
|
|
62
|
+
# GCP / OAuth tokens
|
|
63
|
+
"**/credentials.json",
|
|
64
|
+
"**/token.json",
|
|
65
|
+
# Known Guardian "oops outputs"
|
|
66
|
+
"**/.devguard-email-history.json",
|
|
67
|
+
"**/.devguard-email-thread",
|
|
68
|
+
"**/repo_review_results.json",
|
|
69
|
+
"**/npm_security_report.json",
|
|
70
|
+
"**/npm_security_report.md",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass(frozen=True, slots=True)
|
|
75
|
+
class Hit:
|
|
76
|
+
repo_path: str
|
|
77
|
+
file_path: str
|
|
78
|
+
reason: str
|
|
79
|
+
size_bytes: int | None = None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _utc_now_iso() -> str:
|
|
83
|
+
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _matches_any(path: str, globs: list[str]) -> str | None:
|
|
87
|
+
p = path.lstrip("/")
|
|
88
|
+
# Allow common "example" env files (these are typically safe to commit).
|
|
89
|
+
# We still flag the real `.env` and other patterns.
|
|
90
|
+
env_allow = {".env.example", ".env.template", ".env.sample", ".env.dist"}
|
|
91
|
+
if Path(p).name in env_allow:
|
|
92
|
+
# If the only match would be the broad `.env.*` pattern, treat as allowed.
|
|
93
|
+
pass
|
|
94
|
+
for g in globs:
|
|
95
|
+
if Path(p).name in env_allow and (g.endswith("/.env.*") or g.endswith("**/.env.*")):
|
|
96
|
+
continue
|
|
97
|
+
# fnmatch's "*" matches "/" too; keep both patterns for readability
|
|
98
|
+
if fnmatch.fnmatch(p, g) or fnmatch.fnmatch(p, g.replace("**/", "")):
|
|
99
|
+
return g
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _git_ls_files(repo: Path) -> list[str]:
|
|
104
|
+
# Use -z to handle weird filenames.
|
|
105
|
+
proc = subprocess.run(
|
|
106
|
+
["git", "-C", str(repo), "ls-files", "-z"],
|
|
107
|
+
check=False,
|
|
108
|
+
stdout=subprocess.PIPE,
|
|
109
|
+
stderr=subprocess.DEVNULL,
|
|
110
|
+
)
|
|
111
|
+
if proc.returncode != 0:
|
|
112
|
+
return []
|
|
113
|
+
out = proc.stdout.decode("utf-8", errors="replace")
|
|
114
|
+
return [p for p in out.split("\0") if p]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _discover_git_repos(dev_root: Path, max_depth: int = 2) -> list[Path]:
|
|
118
|
+
"""Discover git repos under dev_root, bounded by max_depth.
|
|
119
|
+
|
|
120
|
+
We avoid an unbounded recursive walk by limiting to max_depth directory levels.
|
|
121
|
+
"""
|
|
122
|
+
repos: list[Path] = []
|
|
123
|
+
dev_root = dev_root.expanduser().resolve()
|
|
124
|
+
if not dev_root.exists():
|
|
125
|
+
return repos
|
|
126
|
+
|
|
127
|
+
# Depth 0: dev_root itself
|
|
128
|
+
if (dev_root / ".git").exists():
|
|
129
|
+
repos.append(dev_root)
|
|
130
|
+
|
|
131
|
+
# Depth-limited breadth walk
|
|
132
|
+
frontier: list[tuple[Path, int]] = [(dev_root, 0)]
|
|
133
|
+
while frontier:
|
|
134
|
+
cur, depth = frontier.pop()
|
|
135
|
+
if depth >= max_depth:
|
|
136
|
+
continue
|
|
137
|
+
try:
|
|
138
|
+
children = list(cur.iterdir())
|
|
139
|
+
except (OSError, PermissionError):
|
|
140
|
+
continue
|
|
141
|
+
for child in children:
|
|
142
|
+
if not child.is_dir():
|
|
143
|
+
continue
|
|
144
|
+
name = child.name
|
|
145
|
+
# Avoid obvious heavy dirs.
|
|
146
|
+
#
|
|
147
|
+
# Important: the workspace root under ~/Documents/dev often contains
|
|
148
|
+
# very large scratch/backup directories. Scanning into them can take
|
|
149
|
+
# minutes and isn't useful for "repo blunder" detection.
|
|
150
|
+
if name in {
|
|
151
|
+
".git",
|
|
152
|
+
".venv",
|
|
153
|
+
"venv",
|
|
154
|
+
"node_modules",
|
|
155
|
+
"target",
|
|
156
|
+
".cache",
|
|
157
|
+
".pytest_cache",
|
|
158
|
+
".ruff_cache",
|
|
159
|
+
}:
|
|
160
|
+
continue
|
|
161
|
+
if depth == 0:
|
|
162
|
+
# Skip top-level junk roots unless explicitly allowed.
|
|
163
|
+
if (name.startswith("_") or name.startswith(".")) and name not in {"_infra"}:
|
|
164
|
+
continue
|
|
165
|
+
if name in {"evals", "integration_test_tmp"}:
|
|
166
|
+
continue
|
|
167
|
+
if (child / ".git").exists():
|
|
168
|
+
repos.append(child)
|
|
169
|
+
# Don't recurse into a repo unless user explicitly sets higher max_depth.
|
|
170
|
+
continue
|
|
171
|
+
frontier.append((child, depth + 1))
|
|
172
|
+
|
|
173
|
+
# De-dupe while preserving order
|
|
174
|
+
seen: set[Path] = set()
|
|
175
|
+
out: list[Path] = []
|
|
176
|
+
for r in repos:
|
|
177
|
+
rr = r.resolve()
|
|
178
|
+
if rr in seen:
|
|
179
|
+
continue
|
|
180
|
+
seen.add(rr)
|
|
181
|
+
out.append(rr)
|
|
182
|
+
return out
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def sweep_dev_repos(
|
|
186
|
+
dev_root: Path,
|
|
187
|
+
deny_globs: list[str] | None = None,
|
|
188
|
+
max_blob_bytes: int = 5 * 1024 * 1024,
|
|
189
|
+
max_depth: int = 2,
|
|
190
|
+
) -> tuple[list[Hit], dict]:
|
|
191
|
+
"""Sweep discovered git repos under dev_root.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
(hits, metadata) where metadata is a small dict safe to serialize.
|
|
195
|
+
"""
|
|
196
|
+
globs = deny_globs or list(DEFAULT_DENY_GLOBS)
|
|
197
|
+
repos = _discover_git_repos(dev_root, max_depth=max_depth)
|
|
198
|
+
|
|
199
|
+
hits: list[Hit] = []
|
|
200
|
+
for repo in repos:
|
|
201
|
+
tracked = _git_ls_files(repo)
|
|
202
|
+
for rel in tracked:
|
|
203
|
+
pat = _matches_any(rel, globs)
|
|
204
|
+
if pat:
|
|
205
|
+
size = None
|
|
206
|
+
try:
|
|
207
|
+
p = repo / rel
|
|
208
|
+
if p.exists() and p.is_file():
|
|
209
|
+
size = p.stat().st_size
|
|
210
|
+
except OSError:
|
|
211
|
+
size = None
|
|
212
|
+
hits.append(
|
|
213
|
+
Hit(
|
|
214
|
+
repo_path=str(repo),
|
|
215
|
+
file_path=rel,
|
|
216
|
+
reason=f"deny_glob:{pat}",
|
|
217
|
+
size_bytes=size,
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# Large blobs (current working tree size, not historical blob size)
|
|
223
|
+
try:
|
|
224
|
+
p = repo / rel
|
|
225
|
+
if p.exists() and p.is_file():
|
|
226
|
+
sz = p.stat().st_size
|
|
227
|
+
if sz > max_blob_bytes:
|
|
228
|
+
hits.append(
|
|
229
|
+
Hit(
|
|
230
|
+
repo_path=str(repo),
|
|
231
|
+
file_path=rel,
|
|
232
|
+
reason=f"blob_too_large>{max_blob_bytes}",
|
|
233
|
+
size_bytes=sz,
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
except OSError:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
meta = {
|
|
240
|
+
"generated_at": _utc_now_iso(),
|
|
241
|
+
"dev_root": str(dev_root.expanduser()),
|
|
242
|
+
"repos_scanned": len(repos),
|
|
243
|
+
"max_depth": max_depth,
|
|
244
|
+
"max_blob_bytes": max_blob_bytes,
|
|
245
|
+
"deny_globs": globs,
|
|
246
|
+
}
|
|
247
|
+
return hits, meta
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def write_report(path: Path, hits: Iterable[Hit], meta: dict) -> None:
|
|
251
|
+
payload = {
|
|
252
|
+
**meta,
|
|
253
|
+
"hits": [asdict(h) for h in hits],
|
|
254
|
+
}
|
|
255
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
256
|
+
path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def default_dev_root() -> Path:
|
|
260
|
+
return Path(os.environ.get("DEV_DIR", str(Path.home() / "Documents" / "dev")))
|