devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. devguard/INTEGRATION_SUMMARY.md +121 -0
  2. devguard/__init__.py +3 -0
  3. devguard/__main__.py +6 -0
  4. devguard/checkers/__init__.py +41 -0
  5. devguard/checkers/api_usage.py +523 -0
  6. devguard/checkers/aws_cost.py +331 -0
  7. devguard/checkers/aws_iam.py +284 -0
  8. devguard/checkers/base.py +25 -0
  9. devguard/checkers/container.py +137 -0
  10. devguard/checkers/domain.py +189 -0
  11. devguard/checkers/firecrawl.py +117 -0
  12. devguard/checkers/fly.py +225 -0
  13. devguard/checkers/github.py +210 -0
  14. devguard/checkers/npm.py +327 -0
  15. devguard/checkers/npm_security.py +244 -0
  16. devguard/checkers/redteam.py +290 -0
  17. devguard/checkers/secret.py +279 -0
  18. devguard/checkers/swarm.py +376 -0
  19. devguard/checkers/tailscale.py +143 -0
  20. devguard/checkers/tailsnitch.py +303 -0
  21. devguard/checkers/tavily.py +179 -0
  22. devguard/checkers/vercel.py +192 -0
  23. devguard/cli.py +1510 -0
  24. devguard/cli_helpers.py +189 -0
  25. devguard/config.py +249 -0
  26. devguard/core.py +293 -0
  27. devguard/dashboard.py +715 -0
  28. devguard/discovery.py +363 -0
  29. devguard/http_client.py +142 -0
  30. devguard/llm_service.py +481 -0
  31. devguard/mcp_server.py +259 -0
  32. devguard/metrics.py +144 -0
  33. devguard/models.py +208 -0
  34. devguard/reporting.py +1571 -0
  35. devguard/sarif.py +295 -0
  36. devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
  37. devguard/scripts/README.md +221 -0
  38. devguard/scripts/auto_fix_recommendations.py +145 -0
  39. devguard/scripts/generate_npmignore.py +175 -0
  40. devguard/scripts/generate_security_report.py +324 -0
  41. devguard/scripts/prepublish_check.sh +29 -0
  42. devguard/scripts/redteam_npm_packages.py +1262 -0
  43. devguard/scripts/review_all_repos.py +300 -0
  44. devguard/spec.py +617 -0
  45. devguard/sweeps/__init__.py +23 -0
  46. devguard/sweeps/ai_editor_config_audit.py +697 -0
  47. devguard/sweeps/cargo_publish_audit.py +655 -0
  48. devguard/sweeps/dependency_audit.py +419 -0
  49. devguard/sweeps/gitignore_audit.py +336 -0
  50. devguard/sweeps/local_dev.py +260 -0
  51. devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
  52. devguard/sweeps/project_flaudit.py +636 -0
  53. devguard/sweeps/public_github_secrets.py +680 -0
  54. devguard/sweeps/publish_audit.py +478 -0
  55. devguard/sweeps/ssh_key_audit.py +327 -0
  56. devguard/utils.py +174 -0
  57. devguard-0.2.0.dist-info/METADATA +225 -0
  58. devguard-0.2.0.dist-info/RECORD +60 -0
  59. devguard-0.2.0.dist-info/WHEEL +4 -0
  60. devguard-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,336 @@
1
+ """Gitignore audit sweep: detect missing .gitignore patterns across local repos.
2
+
3
+ Scans git repos under a dev root and checks whether common hygiene patterns
4
+ (.env, .state/, *.log, etc.) are present in .gitignore. Repos with a LICENSE
5
+ file are flagged as likely public and get higher severity.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import fnmatch
11
+ import json
12
+ import os
13
+ from collections import Counter
14
+ from dataclasses import dataclass, field
15
+ from datetime import UTC, datetime
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+
20
+ def _utc_now() -> str:
21
+ return datetime.now(UTC).isoformat().replace("+00:00", "Z")
22
+
23
+
24
+ def _default_dev_root() -> Path:
25
+ return Path(os.getenv("DEV_DIR") or "~/Documents/dev").expanduser()
26
+
27
+
28
+ # Patterns to check, grouped by relevance.
29
+ # Each tuple: (pattern_name, gitignore_lines_that_satisfy_it, languages_where_relevant)
30
+ # languages_where_relevant: None = always, otherwise set of {"rust", "python", "js", "go", ...}
31
+ REQUIRED_PATTERNS: list[tuple[str, list[str], set[str] | None]] = [
32
+ (".env files", [".env", ".env.*", ".env.local", ".env.*.local"], None),
33
+ (".state/ dir", [".state", ".state/"], None),
34
+ (".claude/ dir", [".claude", ".claude/"], None),
35
+ ("*.log files", ["*.log"], None),
36
+ (".DS_Store", [".DS_Store"], None),
37
+ ("*.sqlite/db", ["*.sqlite", "*.sqlite3", "*.db"], None),
38
+ ("node_modules/", ["node_modules", "node_modules/"], {"js", "ts"}),
39
+ ("target/", ["target", "target/"], {"rust"}),
40
+ (".venv/", [".venv", ".venv/", "venv", "venv/"], {"python"}),
41
+ ("dist/", ["dist", "dist/"], {"js", "ts", "python"}),
42
+ ("build/", ["build", "build/"], {"js", "ts", "python", "go"}),
43
+ ("__pycache__/", ["__pycache__", "__pycache__/"], {"python"}),
44
+ ]
45
+
46
+
47
+ def _detect_languages(repo: Path) -> set[str]:
48
+ """Detect project languages from manifest files."""
49
+ langs: set[str] = set()
50
+ if (repo / "Cargo.toml").exists():
51
+ langs.add("rust")
52
+ if (repo / "pyproject.toml").exists() or (repo / "setup.py").exists():
53
+ langs.add("python")
54
+ if (repo / "package.json").exists():
55
+ langs.add("js")
56
+ langs.add("ts")
57
+ if (repo / "go.mod").exists():
58
+ langs.add("go")
59
+ return langs
60
+
61
+
62
+ def _is_likely_public(repo: Path) -> bool:
63
+ """Heuristic: repo has a LICENSE file -> likely public."""
64
+ for name in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE"):
65
+ if (repo / name).exists():
66
+ return True
67
+ return False
68
+
69
+
70
+ def _read_gitignore_lines(repo: Path) -> list[str]:
71
+ """Read .gitignore and return non-empty, non-comment lines."""
72
+ gi = repo / ".gitignore"
73
+ if not gi.is_file():
74
+ return []
75
+ return _read_gitignore_lines_from(gi)
76
+
77
+
78
+ def _pattern_satisfied(gitignore_lines: list[str], required_variants: list[str]) -> bool:
79
+ """Check if any variant of a required pattern appears in .gitignore.
80
+
81
+ Handles leading `/` and trailing `/` normalization.
82
+ """
83
+ normalized = set()
84
+ for line in gitignore_lines:
85
+ # Strip negation prefix
86
+ if line.startswith("!"):
87
+ continue
88
+ clean = line.lstrip("/").rstrip("/").strip()
89
+ if clean:
90
+ normalized.add(clean)
91
+ # "**/" prefix in gitignore means "at any depth", which covers root.
92
+ # e.g. **/*.log covers *.log, **/dist covers dist
93
+ if clean.startswith("**/"):
94
+ normalized.add(clean[3:])
95
+ for variant in required_variants:
96
+ clean = variant.lstrip("/").rstrip("/").strip()
97
+ if clean in normalized:
98
+ return True
99
+ # Check if any existing pattern would match this variant via fnmatch
100
+ for existing in normalized:
101
+ if fnmatch.fnmatch(clean, existing):
102
+ return True
103
+ return False
104
+
105
+
106
+ @dataclass(frozen=True)
107
+ class GitignoreGap:
108
+ repo_path: str
109
+ pattern_name: str
110
+ is_public: bool
111
+
112
+
113
+ @dataclass
114
+ class RepoAuditResult:
115
+ repo_path: str
116
+ has_gitignore: bool
117
+ is_public: bool
118
+ languages: list[str]
119
+ missing_patterns: list[str] = field(default_factory=list)
120
+ case_warnings: list[str] = field(default_factory=list)
121
+
122
+
123
+ # Files that must have exact casing for Claude Code to find them.
124
+ # (expected_name, parent_relative_to_repo)
125
+ _CASE_SENSITIVE_FILES: list[tuple[str, str]] = [
126
+ ("CLAUDE.md", "."),
127
+ ("CLAUDE.md", ".claude"),
128
+ ]
129
+
130
+
131
+ def _check_case_sensitive_files(repo: Path) -> list[str]:
132
+ """Check for case-sensitive file naming issues (e.g. claude.md vs CLAUDE.md).
133
+
134
+ On case-insensitive filesystems (macOS), wrong-cased files still "exist"
135
+ but git tracks the original case, which breaks on Linux/CI.
136
+ """
137
+ import subprocess as _sp
138
+
139
+ warnings: list[str] = []
140
+ for expected, parent_rel in _CASE_SENSITIVE_FILES:
141
+ parent = repo / parent_rel
142
+ if not parent.is_dir():
143
+ continue
144
+ # Check if any case variant exists
145
+ target = parent / expected
146
+ if not target.exists():
147
+ continue
148
+ # Ask git what case it actually tracks
149
+ try:
150
+ res = _sp.run(
151
+ ["git", "ls-files", str(Path(parent_rel) / expected)],
152
+ cwd=str(repo), capture_output=True, text=True, timeout=5,
153
+ )
154
+ tracked = res.stdout.strip()
155
+ if not tracked:
156
+ # Try lowercase
157
+ res2 = _sp.run(
158
+ ["git", "ls-files", str(Path(parent_rel) / expected.lower())],
159
+ cwd=str(repo), capture_output=True, text=True, timeout=5,
160
+ )
161
+ tracked = res2.stdout.strip()
162
+ if tracked and tracked != str(Path(parent_rel) / expected):
163
+ warnings.append(
164
+ f"git tracks '{tracked}' but Claude Code expects '{Path(parent_rel) / expected}'"
165
+ )
166
+ except Exception:
167
+ continue
168
+ return warnings
169
+
170
+
171
+ def _iter_git_repos(root: Path, max_depth: int) -> list[Path]:
172
+ """Discover git repos under root, bounded by max_depth."""
173
+ root = root.resolve()
174
+ max_depth = max(0, min(int(max_depth), 6))
175
+ junk = {
176
+ "node_modules", ".venv", "venv", "dist", "build", ".git",
177
+ ".cache", ".state", "__pycache__", "_trash", "_scratch",
178
+ "_external", "_archive", "_forks",
179
+ }
180
+ repos: list[Path] = []
181
+ stack: list[tuple[Path, int]] = [(root, 0)]
182
+ seen: set[Path] = set()
183
+ while stack:
184
+ cur, depth = stack.pop()
185
+ if cur in seen:
186
+ continue
187
+ seen.add(cur)
188
+ if (cur / ".git").exists():
189
+ repos.append(cur)
190
+ continue
191
+ if depth >= max_depth:
192
+ continue
193
+ try:
194
+ for child in cur.iterdir():
195
+ if not child.is_dir():
196
+ continue
197
+ name = child.name
198
+ if depth == 0 and name in junk:
199
+ continue
200
+ if name.startswith("."):
201
+ continue
202
+ stack.append((child, depth + 1))
203
+ except Exception:
204
+ continue
205
+ return sorted(repos)
206
+
207
+
208
+ def _read_global_gitignore_lines() -> list[str]:
209
+ """Read the global gitignore (core.excludesFile) and return non-empty, non-comment lines."""
210
+ import subprocess as _sp
211
+
212
+ try:
213
+ res = _sp.run(
214
+ ["git", "config", "--global", "core.excludesFile"],
215
+ capture_output=True, text=True, timeout=5,
216
+ )
217
+ path_str = res.stdout.strip()
218
+ if not path_str:
219
+ return []
220
+ p = Path(path_str).expanduser()
221
+ if not p.is_file():
222
+ return []
223
+ return _read_gitignore_lines_from(p)
224
+ except Exception:
225
+ return []
226
+
227
+
228
+ def _read_gitignore_lines_from(path: Path) -> list[str]:
229
+ """Read a gitignore file and return non-empty, non-comment lines."""
230
+ try:
231
+ text = path.read_text(encoding="utf-8", errors="replace")
232
+ except Exception:
233
+ return []
234
+ return [s for line in text.splitlines() if (s := line.strip()) and not s.startswith("#")]
235
+
236
+
237
+ def audit_gitignores(
238
+ *,
239
+ dev_root: Path | None = None,
240
+ max_depth: int = 2,
241
+ exclude_repo_globs: list[str] | None = None,
242
+ ) -> tuple[dict[str, Any], list[str]]:
243
+ """Audit .gitignore files across repos and return a report."""
244
+ errors: list[str] = []
245
+ root = dev_root if dev_root is not None else _default_dev_root()
246
+
247
+ repos = _iter_git_repos(root, max_depth=max_depth)
248
+ globs = [g for g in (exclude_repo_globs or []) if isinstance(g, str) and g.strip()]
249
+ if globs:
250
+ repos = [r for r in repos if not any(fnmatch.fnmatch(str(r), g) for g in globs)]
251
+
252
+ # Read global gitignore once -- patterns there apply to all repos.
253
+ global_gi_lines = _read_global_gitignore_lines()
254
+
255
+ results: list[RepoAuditResult] = []
256
+ gap_counter: Counter[str] = Counter()
257
+ repos_without_gitignore: list[str] = []
258
+ public_repos_with_gaps: list[str] = []
259
+
260
+ for repo in repos:
261
+ try:
262
+ langs = _detect_languages(repo)
263
+ is_public = _is_likely_public(repo)
264
+ repo_gi_lines = _read_gitignore_lines(repo)
265
+ gi_lines = global_gi_lines + repo_gi_lines
266
+ has_gitignore = (repo / ".gitignore").is_file()
267
+ except Exception as exc:
268
+ errors.append(f"failed to read {repo}: {exc}")
269
+ continue
270
+
271
+ missing: list[str] = []
272
+ for pattern_name, variants, relevant_langs in REQUIRED_PATTERNS:
273
+ # Skip language-specific patterns if not relevant
274
+ if relevant_langs and not (langs & relevant_langs):
275
+ continue
276
+ if not _pattern_satisfied(gi_lines, variants):
277
+ missing.append(pattern_name)
278
+ gap_counter[pattern_name] += 1
279
+
280
+ case_warns = _check_case_sensitive_files(repo)
281
+
282
+ result = RepoAuditResult(
283
+ repo_path=str(repo),
284
+ has_gitignore=has_gitignore,
285
+ is_public=is_public,
286
+ languages=sorted(langs),
287
+ missing_patterns=missing,
288
+ case_warnings=case_warns,
289
+ )
290
+ results.append(result)
291
+
292
+ if not has_gitignore:
293
+ repos_without_gitignore.append(str(repo))
294
+ if is_public and missing:
295
+ public_repos_with_gaps.append(str(repo))
296
+
297
+ # Sort: public repos with gaps first, then by gap count
298
+ results.sort(key=lambda r: (-r.is_public, -len(r.missing_patterns), r.repo_path))
299
+
300
+ report: dict[str, Any] = {
301
+ "generated_at": _utc_now(),
302
+ "scope": {
303
+ "dev_root": str(root),
304
+ "repos_scanned": len(repos),
305
+ "max_depth": max_depth,
306
+ "exclude_repo_globs": globs,
307
+ },
308
+ "summary": {
309
+ "repos_without_gitignore": len(repos_without_gitignore),
310
+ "repos_without_gitignore_list": repos_without_gitignore[:50],
311
+ "public_repos_with_gaps": len(public_repos_with_gaps),
312
+ "public_repos_with_gaps_list": public_repos_with_gaps[:50],
313
+ "total_gaps": sum(len(r.missing_patterns) for r in results),
314
+ "gap_frequency": gap_counter.most_common(20),
315
+ "total_case_warnings": sum(len(r.case_warnings) for r in results),
316
+ },
317
+ "repos": [
318
+ {
319
+ "repo_path": r.repo_path,
320
+ "has_gitignore": r.has_gitignore,
321
+ "is_public": r.is_public,
322
+ "languages": r.languages,
323
+ "missing_patterns": r.missing_patterns,
324
+ **({"case_warnings": r.case_warnings} if r.case_warnings else {}),
325
+ }
326
+ for r in results
327
+ if r.missing_patterns or not r.has_gitignore or r.case_warnings
328
+ ][:200],
329
+ "errors": errors,
330
+ }
331
+ return report, errors
332
+
333
+
334
+ def write_report(path: Path, report: dict[str, Any]) -> None:
335
+ path.parent.mkdir(parents=True, exist_ok=True)
336
+ path.write_text(json.dumps(report, indent=2) + "\n")
@@ -0,0 +1,260 @@
1
+ """Local dev workspace sweep for "blunders" (policy-based).
2
+
3
+ This sweep is meant to catch accidental committed artifacts such as:
4
+ - .env files
5
+ - private keys / cert bundles
6
+ - sqlite/db dumps
7
+ - large blobs
8
+ - known generated reports (e.g., devguard email history/report outputs)
9
+
10
+ It is intentionally conservative and *non-destructive*:
11
+ - it does not rewrite git history
12
+ - it does not upload anything
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import fnmatch
18
+ import json
19
+ import os
20
+ import subprocess
21
+ import time
22
+ from collections.abc import Iterable
23
+ from dataclasses import asdict, dataclass
24
+ from pathlib import Path
25
+
26
+ DEFAULT_DENY_GLOBS: list[str] = [
27
+ "**/.env",
28
+ "**/.env.*",
29
+ "**/*.pem",
30
+ "**/*.key",
31
+ "**/*.p12",
32
+ "**/*.pfx",
33
+ "**/*.kdbx",
34
+ "**/*.ovpn",
35
+ "**/*.mobileprovision",
36
+ "**/*.keystore",
37
+ "**/*.jks",
38
+ "**/*.pkcs12",
39
+ "**/id_rsa",
40
+ "**/id_rsa.*",
41
+ "**/id_ecdsa",
42
+ "**/id_ecdsa.*",
43
+ "**/id_ed25519",
44
+ "**/id_ed25519.*",
45
+ "**/.npmrc",
46
+ "**/.pypirc",
47
+ "**/.netrc",
48
+ "**/.htpasswd",
49
+ "**/.aws/credentials",
50
+ "**/.ssh/**",
51
+ "**/.gnupg/**",
52
+ "**/*.asc",
53
+ "**/*.sqlite",
54
+ "**/*.sqlite3",
55
+ "**/*.db",
56
+ "**/*.db-wal",
57
+ "**/*.db-shm",
58
+ # Terraform state/vars (often contain secrets)
59
+ "**/*.tfstate",
60
+ "**/*.tfstate.backup",
61
+ "**/*.tfvars",
62
+ # GCP / OAuth tokens
63
+ "**/credentials.json",
64
+ "**/token.json",
65
+ # Known Guardian "oops outputs"
66
+ "**/.devguard-email-history.json",
67
+ "**/.devguard-email-thread",
68
+ "**/repo_review_results.json",
69
+ "**/npm_security_report.json",
70
+ "**/npm_security_report.md",
71
+ ]
72
+
73
+
74
+ @dataclass(frozen=True, slots=True)
75
+ class Hit:
76
+ repo_path: str
77
+ file_path: str
78
+ reason: str
79
+ size_bytes: int | None = None
80
+
81
+
82
+ def _utc_now_iso() -> str:
83
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
84
+
85
+
86
+ def _matches_any(path: str, globs: list[str]) -> str | None:
87
+ p = path.lstrip("/")
88
+ # Allow common "example" env files (these are typically safe to commit).
89
+ # We still flag the real `.env` and other patterns.
90
+ env_allow = {".env.example", ".env.template", ".env.sample", ".env.dist"}
91
+ if Path(p).name in env_allow:
92
+ # If the only match would be the broad `.env.*` pattern, treat as allowed.
93
+ pass
94
+ for g in globs:
95
+ if Path(p).name in env_allow and (g.endswith("/.env.*") or g.endswith("**/.env.*")):
96
+ continue
97
+ # fnmatch's "*" matches "/" too; keep both patterns for readability
98
+ if fnmatch.fnmatch(p, g) or fnmatch.fnmatch(p, g.replace("**/", "")):
99
+ return g
100
+ return None
101
+
102
+
103
+ def _git_ls_files(repo: Path) -> list[str]:
104
+ # Use -z to handle weird filenames.
105
+ proc = subprocess.run(
106
+ ["git", "-C", str(repo), "ls-files", "-z"],
107
+ check=False,
108
+ stdout=subprocess.PIPE,
109
+ stderr=subprocess.DEVNULL,
110
+ )
111
+ if proc.returncode != 0:
112
+ return []
113
+ out = proc.stdout.decode("utf-8", errors="replace")
114
+ return [p for p in out.split("\0") if p]
115
+
116
+
117
+ def _discover_git_repos(dev_root: Path, max_depth: int = 2) -> list[Path]:
118
+ """Discover git repos under dev_root, bounded by max_depth.
119
+
120
+ We avoid an unbounded recursive walk by limiting to max_depth directory levels.
121
+ """
122
+ repos: list[Path] = []
123
+ dev_root = dev_root.expanduser().resolve()
124
+ if not dev_root.exists():
125
+ return repos
126
+
127
+ # Depth 0: dev_root itself
128
+ if (dev_root / ".git").exists():
129
+ repos.append(dev_root)
130
+
131
+ # Depth-limited breadth walk
132
+ frontier: list[tuple[Path, int]] = [(dev_root, 0)]
133
+ while frontier:
134
+ cur, depth = frontier.pop()
135
+ if depth >= max_depth:
136
+ continue
137
+ try:
138
+ children = list(cur.iterdir())
139
+ except (OSError, PermissionError):
140
+ continue
141
+ for child in children:
142
+ if not child.is_dir():
143
+ continue
144
+ name = child.name
145
+ # Avoid obvious heavy dirs.
146
+ #
147
+ # Important: the workspace root under ~/Documents/dev often contains
148
+ # very large scratch/backup directories. Scanning into them can take
149
+ # minutes and isn't useful for "repo blunder" detection.
150
+ if name in {
151
+ ".git",
152
+ ".venv",
153
+ "venv",
154
+ "node_modules",
155
+ "target",
156
+ ".cache",
157
+ ".pytest_cache",
158
+ ".ruff_cache",
159
+ }:
160
+ continue
161
+ if depth == 0:
162
+ # Skip top-level junk roots unless explicitly allowed.
163
+ if (name.startswith("_") or name.startswith(".")) and name not in {"_infra"}:
164
+ continue
165
+ if name in {"evals", "integration_test_tmp"}:
166
+ continue
167
+ if (child / ".git").exists():
168
+ repos.append(child)
169
+ # Don't recurse into a repo unless user explicitly sets higher max_depth.
170
+ continue
171
+ frontier.append((child, depth + 1))
172
+
173
+ # De-dupe while preserving order
174
+ seen: set[Path] = set()
175
+ out: list[Path] = []
176
+ for r in repos:
177
+ rr = r.resolve()
178
+ if rr in seen:
179
+ continue
180
+ seen.add(rr)
181
+ out.append(rr)
182
+ return out
183
+
184
+
185
+ def sweep_dev_repos(
186
+ dev_root: Path,
187
+ deny_globs: list[str] | None = None,
188
+ max_blob_bytes: int = 5 * 1024 * 1024,
189
+ max_depth: int = 2,
190
+ ) -> tuple[list[Hit], dict]:
191
+ """Sweep discovered git repos under dev_root.
192
+
193
+ Returns:
194
+ (hits, metadata) where metadata is a small dict safe to serialize.
195
+ """
196
+ globs = deny_globs or list(DEFAULT_DENY_GLOBS)
197
+ repos = _discover_git_repos(dev_root, max_depth=max_depth)
198
+
199
+ hits: list[Hit] = []
200
+ for repo in repos:
201
+ tracked = _git_ls_files(repo)
202
+ for rel in tracked:
203
+ pat = _matches_any(rel, globs)
204
+ if pat:
205
+ size = None
206
+ try:
207
+ p = repo / rel
208
+ if p.exists() and p.is_file():
209
+ size = p.stat().st_size
210
+ except OSError:
211
+ size = None
212
+ hits.append(
213
+ Hit(
214
+ repo_path=str(repo),
215
+ file_path=rel,
216
+ reason=f"deny_glob:{pat}",
217
+ size_bytes=size,
218
+ )
219
+ )
220
+ continue
221
+
222
+ # Large blobs (current working tree size, not historical blob size)
223
+ try:
224
+ p = repo / rel
225
+ if p.exists() and p.is_file():
226
+ sz = p.stat().st_size
227
+ if sz > max_blob_bytes:
228
+ hits.append(
229
+ Hit(
230
+ repo_path=str(repo),
231
+ file_path=rel,
232
+ reason=f"blob_too_large>{max_blob_bytes}",
233
+ size_bytes=sz,
234
+ )
235
+ )
236
+ except OSError:
237
+ continue
238
+
239
+ meta = {
240
+ "generated_at": _utc_now_iso(),
241
+ "dev_root": str(dev_root.expanduser()),
242
+ "repos_scanned": len(repos),
243
+ "max_depth": max_depth,
244
+ "max_blob_bytes": max_blob_bytes,
245
+ "deny_globs": globs,
246
+ }
247
+ return hits, meta
248
+
249
+
250
+ def write_report(path: Path, hits: Iterable[Hit], meta: dict) -> None:
251
+ payload = {
252
+ **meta,
253
+ "hits": [asdict(h) for h in hits],
254
+ }
255
+ path.parent.mkdir(parents=True, exist_ok=True)
256
+ path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
257
+
258
+
259
+ def default_dev_root() -> Path:
260
+ return Path(os.environ.get("DEV_DIR", str(Path.home() / "Documents" / "dev")))