devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. devguard/INTEGRATION_SUMMARY.md +121 -0
  2. devguard/__init__.py +3 -0
  3. devguard/__main__.py +6 -0
  4. devguard/checkers/__init__.py +41 -0
  5. devguard/checkers/api_usage.py +523 -0
  6. devguard/checkers/aws_cost.py +331 -0
  7. devguard/checkers/aws_iam.py +284 -0
  8. devguard/checkers/base.py +25 -0
  9. devguard/checkers/container.py +137 -0
  10. devguard/checkers/domain.py +189 -0
  11. devguard/checkers/firecrawl.py +117 -0
  12. devguard/checkers/fly.py +225 -0
  13. devguard/checkers/github.py +210 -0
  14. devguard/checkers/npm.py +327 -0
  15. devguard/checkers/npm_security.py +244 -0
  16. devguard/checkers/redteam.py +290 -0
  17. devguard/checkers/secret.py +279 -0
  18. devguard/checkers/swarm.py +376 -0
  19. devguard/checkers/tailscale.py +143 -0
  20. devguard/checkers/tailsnitch.py +303 -0
  21. devguard/checkers/tavily.py +179 -0
  22. devguard/checkers/vercel.py +192 -0
  23. devguard/cli.py +1510 -0
  24. devguard/cli_helpers.py +189 -0
  25. devguard/config.py +249 -0
  26. devguard/core.py +293 -0
  27. devguard/dashboard.py +715 -0
  28. devguard/discovery.py +363 -0
  29. devguard/http_client.py +142 -0
  30. devguard/llm_service.py +481 -0
  31. devguard/mcp_server.py +259 -0
  32. devguard/metrics.py +144 -0
  33. devguard/models.py +208 -0
  34. devguard/reporting.py +1571 -0
  35. devguard/sarif.py +295 -0
  36. devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
  37. devguard/scripts/README.md +221 -0
  38. devguard/scripts/auto_fix_recommendations.py +145 -0
  39. devguard/scripts/generate_npmignore.py +175 -0
  40. devguard/scripts/generate_security_report.py +324 -0
  41. devguard/scripts/prepublish_check.sh +29 -0
  42. devguard/scripts/redteam_npm_packages.py +1262 -0
  43. devguard/scripts/review_all_repos.py +300 -0
  44. devguard/spec.py +617 -0
  45. devguard/sweeps/__init__.py +23 -0
  46. devguard/sweeps/ai_editor_config_audit.py +697 -0
  47. devguard/sweeps/cargo_publish_audit.py +655 -0
  48. devguard/sweeps/dependency_audit.py +419 -0
  49. devguard/sweeps/gitignore_audit.py +336 -0
  50. devguard/sweeps/local_dev.py +260 -0
  51. devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
  52. devguard/sweeps/project_flaudit.py +636 -0
  53. devguard/sweeps/public_github_secrets.py +680 -0
  54. devguard/sweeps/publish_audit.py +478 -0
  55. devguard/sweeps/ssh_key_audit.py +327 -0
  56. devguard/utils.py +174 -0
  57. devguard-0.2.0.dist-info/METADATA +225 -0
  58. devguard-0.2.0.dist-info/RECORD +60 -0
  59. devguard-0.2.0.dist-info/WHEEL +4 -0
  60. devguard-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,521 @@
1
+ from __future__ import annotations
2
+
3
+ import fnmatch
4
+ import json
5
+ import os
6
+ import subprocess
7
+ from collections import Counter
8
+ from collections.abc import Iterable
9
+ from concurrent.futures import ThreadPoolExecutor, as_completed
10
+ from dataclasses import dataclass
11
+ from datetime import UTC, datetime
12
+ from pathlib import Path
13
+ from threading import Lock
14
+ from typing import Any
15
+
16
+
17
+ def _utc_now() -> str:
18
+ return datetime.now(UTC).isoformat().replace("+00:00", "Z")
19
+
20
+
21
+ def _default_dev_root() -> Path:
22
+ return Path(os.getenv("DEV_DIR") or "~/Documents/dev").expanduser()
23
+
24
+
25
+ # Files where hash-like strings routinely trigger false positives (e.g. SentryToken on uv.lock).
26
+ LOCK_FILE_BASENAMES: frozenset[str] = frozenset({
27
+ "uv.lock",
28
+ "Cargo.lock",
29
+ "package-lock.json",
30
+ "pnpm-lock.yaml",
31
+ "yarn.lock",
32
+ "poetry.lock",
33
+ "Gemfile.lock",
34
+ "composer.lock",
35
+ "Pipfile.lock",
36
+ "requirements.lock",
37
+ })
38
+
39
+
40
+ def _iter_git_repos(root: Path, max_depth: int) -> Iterable[Path]:
41
+ """Discover git repos under root, bounded by max_depth."""
42
+ root = root.resolve()
43
+ try:
44
+ max_depth = int(max_depth)
45
+ except Exception:
46
+ max_depth = 2
47
+ max_depth = max(0, min(max_depth, 6))
48
+
49
+ # BFS-ish walk with depth bound.
50
+ stack: list[tuple[Path, int]] = [(root, 0)]
51
+ seen: set[Path] = set()
52
+
53
+ junk_top = {
54
+ "node_modules",
55
+ ".venv",
56
+ "venv",
57
+ "dist",
58
+ "build",
59
+ ".git",
60
+ ".cache",
61
+ ".state",
62
+ "__pycache__",
63
+ "_trash",
64
+ "_scratch",
65
+ "_external",
66
+ "_archive",
67
+ "_forks",
68
+ }
69
+
70
+ while stack:
71
+ cur, depth = stack.pop()
72
+ if cur in seen:
73
+ continue
74
+ seen.add(cur)
75
+
76
+ # If this directory *is* a repo root, yield it and don't descend further.
77
+ if (cur / ".git").exists():
78
+ yield cur
79
+ continue
80
+
81
+ if depth >= max_depth:
82
+ continue
83
+
84
+ try:
85
+ for child in cur.iterdir():
86
+ if not child.is_dir():
87
+ continue
88
+ name = child.name
89
+ if depth == 0 and name in junk_top:
90
+ continue
91
+ # Skip hidden dirs by default (except `_infra` pattern is handled in other sweeps;
92
+ # here we only care about local worktrees).
93
+ if name.startswith("."):
94
+ continue
95
+ stack.append((child, depth + 1))
96
+ except Exception:
97
+ continue
98
+
99
+
100
+ def _dirty_paths(repo: Path, timeout_s: int = 8) -> tuple[list[str], str | None]:
101
+ """Return a list of dirty file paths (relative to repo) from `git status --porcelain`.
102
+
103
+ This includes modified, added, deleted (ignored), renamed (new path), and untracked files.
104
+ Returns ([], None) when the repo is clean (not an error).
105
+ """
106
+ try:
107
+ res = subprocess.run(
108
+ ["git", "status", "--porcelain=v1", "-z"],
109
+ cwd=str(repo),
110
+ capture_output=True,
111
+ text=True,
112
+ timeout=timeout_s,
113
+ env=os.environ.copy(),
114
+ )
115
+ except Exception as e:
116
+ return [], str(e)
117
+
118
+ if res.returncode != 0:
119
+ return [], (res.stderr or "").strip()[:300] or f"git status exit={res.returncode}"
120
+
121
+ out = res.stdout or ""
122
+ if not out:
123
+ return [], None
124
+
125
+ paths: list[str] = []
126
+ for entry in out.split("\0"):
127
+ if not entry:
128
+ continue
129
+ # Porcelain v1 format begins with XY status and a space, then path.
130
+ # For renames, it can be "R old -> new" (in -z form it's "R old\0new\0" in some modes),
131
+ # but we keep this parser simple and best-effort.
132
+ if len(entry) >= 4 and entry[2] == " ":
133
+ p = entry[3:]
134
+ else:
135
+ p = entry
136
+ # Handle the "old -> new" display form (non -z) defensively.
137
+ if " -> " in p:
138
+ p = p.split(" -> ", 1)[1]
139
+ p = p.strip()
140
+ if not p:
141
+ continue
142
+ paths.append(p)
143
+
144
+ # Dedup and drop obviously non-files.
145
+ uniq = sorted(set(paths))
146
+ return uniq, None
147
+
148
+
149
+ @dataclass(frozen=True)
150
+ class LocalDirtyFinding:
151
+ repo_path: str
152
+ engine: str
153
+ type: str
154
+ file: str | None
155
+ line: int | None
156
+ git_tracked: bool | None = None
157
+ git_ignored: bool | None = None
158
+ exposure: str | None = None
159
+
160
+ def to_dict(self) -> dict[str, Any]:
161
+ return {
162
+ "repo_path": self.repo_path,
163
+ "engine": self.engine,
164
+ "type": self.type,
165
+ "file": self.file,
166
+ "line": self.line,
167
+ "git_tracked": self.git_tracked,
168
+ "git_ignored": self.git_ignored,
169
+ "exposure": self.exposure,
170
+ }
171
+
172
+
173
+ def _parse_trufflehog_filesystem_json(stdout: str, repo_path: str) -> list[LocalDirtyFinding]:
174
+ findings: list[LocalDirtyFinding] = []
175
+ for line in (stdout or "").splitlines():
176
+ s = line.strip()
177
+ if not s:
178
+ continue
179
+ try:
180
+ obj = json.loads(s)
181
+ except Exception:
182
+ continue
183
+ if not isinstance(obj, dict):
184
+ continue
185
+ detector = obj.get("DetectorName") or obj.get("Detector") or obj.get("DetectorType") or "unknown"
186
+
187
+ file_path = None
188
+ line_no = None
189
+ sm = obj.get("SourceMetadata") or {}
190
+ data = sm.get("Data") if isinstance(sm, dict) else {}
191
+ fs = data.get("Filesystem") if isinstance(data, dict) else {}
192
+ if isinstance(fs, dict):
193
+ file_path = fs.get("file") or fs.get("path")
194
+ lv = fs.get("line")
195
+ if isinstance(lv, int):
196
+ line_no = lv
197
+ elif isinstance(lv, str):
198
+ try:
199
+ line_no = int(lv)
200
+ except Exception:
201
+ line_no = None
202
+
203
+ findings.append(
204
+ LocalDirtyFinding(
205
+ repo_path=repo_path,
206
+ engine="trufflehog",
207
+ type=str(detector),
208
+ file=str(file_path) if file_path is not None else None,
209
+ line=line_no,
210
+ )
211
+ )
212
+ return findings
213
+
214
+
215
+ def scan_dirty_worktrees(
216
+ *,
217
+ dev_root: Path | None,
218
+ max_depth: int,
219
+ only_dirty: bool,
220
+ exclude_repo_globs: list[str] | None = None,
221
+ check_upstream: bool = True,
222
+ fetch_remotes: bool = False,
223
+ max_paths_per_repo: int = 50,
224
+ include_ignored_files: bool = False,
225
+ max_concurrency: int,
226
+ timeout_s: int,
227
+ ) -> tuple[dict[str, Any], list[str]]:
228
+ errors: list[str] = []
229
+ root = dev_root if dev_root is not None else _default_dev_root()
230
+
231
+ repos = sorted({str(p) for p in _iter_git_repos(root, max_depth=max_depth)})
232
+ globs = [g for g in (exclude_repo_globs or []) if isinstance(g, str) and g.strip()]
233
+ if globs:
234
+ repos = [r for r in repos if not any(fnmatch.fnmatch(r, g) for g in globs)]
235
+ # When only_dirty, we filter inside _scan_one (single git status call per repo)
236
+ # rather than calling _is_repo_dirty + _dirty_paths (two calls per repo).
237
+ dirty_repos = repos
238
+
239
+ try:
240
+ max_concurrency = int(max_concurrency)
241
+ except Exception:
242
+ max_concurrency = 4
243
+ max_concurrency = max(1, min(max_concurrency, 12))
244
+
245
+ per_repo_timeout = max(30, min(int(timeout_s), 600))
246
+
247
+ findings: list[LocalDirtyFinding] = []
248
+ repo_meta: list[dict[str, Any]] = []
249
+ ignored_paths_skipped_total = 0
250
+ repos_with_ignored_skips = 0
251
+ repos_with_truncated_paths = 0
252
+ ignored_skipped_basenames: Counter[str] = Counter()
253
+ ignored_skipped_lock = Lock()
254
+
255
+ try:
256
+ max_paths_per_repo = int(max_paths_per_repo)
257
+ except Exception:
258
+ max_paths_per_repo = 50
259
+ max_paths_per_repo = max(1, min(max_paths_per_repo, 500))
260
+
261
+ def _classify(
262
+ repo: Path, repo_path: str, abs_path: str
263
+ ) -> tuple[bool | None, bool | None, str | None, str]:
264
+ rel = abs_path[len(repo_path.rstrip("/") + "/") :] if abs_path.startswith(repo_path.rstrip("/") + "/") else ""
265
+ tracked = _git_check_tracked(repo, rel) if rel else None
266
+ ignored = _git_check_ignored(repo, rel) if rel else None
267
+ exposure = None
268
+ if tracked is True:
269
+ exposure = "tracked"
270
+ elif ignored is True:
271
+ exposure = "untracked_ignored"
272
+ elif ignored is False:
273
+ exposure = "untracked_not_ignored"
274
+ return tracked, ignored, exposure, rel
275
+
276
+ def _git_check_ignored(repo: Path, rel_path: str) -> bool | None:
277
+ try:
278
+ r = subprocess.run(
279
+ ["git", "check-ignore", "-q", rel_path],
280
+ cwd=str(repo),
281
+ capture_output=True,
282
+ text=True,
283
+ timeout=5,
284
+ env=os.environ.copy(),
285
+ )
286
+ if r.returncode == 0:
287
+ return True
288
+ if r.returncode == 1:
289
+ return False
290
+ return None
291
+ except Exception:
292
+ return None
293
+
294
+ def _git_check_tracked(repo: Path, rel_path: str) -> bool | None:
295
+ try:
296
+ r = subprocess.run(
297
+ ["git", "ls-files", "--error-unmatch", rel_path],
298
+ cwd=str(repo),
299
+ capture_output=True,
300
+ text=True,
301
+ timeout=5,
302
+ env=os.environ.copy(),
303
+ )
304
+ if r.returncode == 0:
305
+ return True
306
+ if r.returncode == 1:
307
+ return False
308
+ return None
309
+ except Exception:
310
+ return None
311
+
312
+ def _maybe_fetch(repo: Path) -> str | None:
313
+ if not fetch_remotes:
314
+ return None
315
+ try:
316
+ res = subprocess.run(
317
+ ["git", "fetch", "--prune", "--quiet"],
318
+ cwd=str(repo),
319
+ capture_output=True,
320
+ text=True,
321
+ timeout=min(30, per_repo_timeout),
322
+ env=os.environ.copy(),
323
+ )
324
+ if res.returncode != 0:
325
+ return (res.stderr or "").strip()[:300] or f"git fetch exit={res.returncode}"
326
+ return None
327
+ except Exception as e:
328
+ return str(e)
329
+
330
+ def _ahead_behind(repo: Path) -> tuple[int | None, int | None, str | None]:
331
+ # Returns (ahead, behind, err). Values are None if no upstream.
332
+ try:
333
+ up = subprocess.run(
334
+ ["git", "rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"],
335
+ cwd=str(repo),
336
+ capture_output=True,
337
+ text=True,
338
+ timeout=8,
339
+ env=os.environ.copy(),
340
+ )
341
+ if up.returncode != 0:
342
+ return None, None, None
343
+ cnt = subprocess.run(
344
+ ["git", "rev-list", "--left-right", "--count", "HEAD...@{u}"],
345
+ cwd=str(repo),
346
+ capture_output=True,
347
+ text=True,
348
+ timeout=10,
349
+ env=os.environ.copy(),
350
+ )
351
+ if cnt.returncode != 0:
352
+ return None, None, (cnt.stderr or "").strip()[:200] or f"rev-list exit={cnt.returncode}"
353
+ # output: "<left>\t<right>" where left=behind? Actually for HEAD...@{u}, left=commits unique to HEAD, right=unique to upstream.
354
+ parts = (cnt.stdout or "").strip().split()
355
+ if len(parts) >= 2:
356
+ ahead = int(parts[0])
357
+ behind = int(parts[1])
358
+ return ahead, behind, None
359
+ return None, None, "unexpected rev-list output"
360
+ except Exception as e:
361
+ return None, None, str(e)
362
+
363
+ def _scan_one(repo_path: str) -> tuple[str, list[LocalDirtyFinding], list[str], dict | None]:
364
+ """Returns (repo_path, findings, errors, repo_meta_entry_or_None)."""
365
+ repo_errors: list[str] = []
366
+ repo_findings: list[LocalDirtyFinding] = []
367
+
368
+ repo = Path(repo_path)
369
+ rel_paths, err = _dirty_paths(repo)
370
+ if err:
371
+ return repo_path, [], [f"git status failed for {repo_path}: {err}"], None
372
+ if not rel_paths:
373
+ return repo_path, [], [], None
374
+
375
+ rel_paths_sorted = sorted(rel_paths)
376
+ truncated = False
377
+ if len(rel_paths_sorted) > max_paths_per_repo:
378
+ rel_paths_sorted = rel_paths_sorted[:max_paths_per_repo]
379
+ truncated = True
380
+
381
+ abs_paths: list[str] = []
382
+ skipped_ignored = 0
383
+ skipped_ignored_sample: list[str] = []
384
+ for rp in rel_paths_sorted:
385
+ ap = repo / rp
386
+ if not (ap.exists() and ap.is_file()):
387
+ continue
388
+ abs_p = str(ap)
389
+ tracked, ignored, _exposure, _rel = _classify(repo, repo_path, abs_p)
390
+ if ignored is True and tracked is False and not include_ignored_files:
391
+ skipped_ignored += 1
392
+ bn = Path(rp).name
393
+ if bn and len(skipped_ignored_sample) < 5 and bn not in skipped_ignored_sample:
394
+ skipped_ignored_sample.append(bn)
395
+ if bn:
396
+ with ignored_skipped_lock:
397
+ ignored_skipped_basenames[bn] += 1
398
+ continue
399
+ abs_paths.append(abs_p)
400
+ if not abs_paths:
401
+ return repo_path, [], [], None
402
+
403
+ fetch_err = _maybe_fetch(repo) if check_upstream else None
404
+ ahead, behind, ab_err = _ahead_behind(repo) if check_upstream else (None, None, None)
405
+ meta_entry = {
406
+ "repo_path": repo_path,
407
+ "dirty_paths_count": len(rel_paths),
408
+ "scanned_paths_count": len(abs_paths),
409
+ "paths_truncated": truncated,
410
+ "max_paths_per_repo": max_paths_per_repo,
411
+ "ignored_paths_skipped": skipped_ignored,
412
+ "ignored_paths_skipped_sample": skipped_ignored_sample,
413
+ "ahead": ahead,
414
+ "behind": behind,
415
+ "upstream_checked": bool(check_upstream),
416
+ "fetched": bool(fetch_remotes),
417
+ "upstream_error": ab_err,
418
+ "fetch_error": fetch_err,
419
+ }
420
+
421
+ cmd = [
422
+ "trufflehog",
423
+ "filesystem",
424
+ "--json",
425
+ "--no-update",
426
+ "--no-verification",
427
+ "--no-fail-on-scan-errors",
428
+ f"--concurrency={max_concurrency}",
429
+ *abs_paths,
430
+ ]
431
+ try:
432
+ res = subprocess.run(
433
+ cmd,
434
+ capture_output=True,
435
+ text=True,
436
+ timeout=per_repo_timeout,
437
+ env=os.environ.copy(),
438
+ )
439
+ except Exception as e:
440
+ return repo_path, [], [f"trufflehog filesystem failed for {repo_path}: {e}"], None
441
+
442
+ # TruffleHog may exit non-zero on some errors; we tolerate and record stderr.
443
+ if res.returncode not in (0, 183):
444
+ stderr = (res.stderr or "").strip()
445
+ if stderr:
446
+ repo_errors.append(f"trufflehog filesystem error for {repo_path}: exit={res.returncode} stderr={stderr[:600]}")
447
+
448
+ if res.stdout:
449
+ parsed = _parse_trufflehog_filesystem_json(res.stdout, repo_path=repo_path)
450
+ for f in parsed:
451
+ # Skip findings in lock files -- they contain dependency hashes that
452
+ # routinely trigger false positives (e.g. SentryToken on uv.lock).
453
+ if f.file and Path(f.file).name in LOCK_FILE_BASENAMES:
454
+ continue
455
+ if f.file:
456
+ tracked, ignored, exposure, _rel = _classify(repo, repo_path, f.file)
457
+ object.__setattr__(f, "git_tracked", tracked)
458
+ object.__setattr__(f, "git_ignored", ignored)
459
+ object.__setattr__(f, "exposure", exposure)
460
+ repo_findings.append(f)
461
+
462
+ return repo_path, repo_findings, repo_errors, meta_entry
463
+
464
+ with ThreadPoolExecutor(max_workers=max_concurrency) as ex:
465
+ futures = [ex.submit(_scan_one, r) for r in dirty_repos]
466
+ for fut in as_completed(futures):
467
+ try:
468
+ _repo, fs, es, meta_entry = fut.result()
469
+ except Exception as e:
470
+ errors.append(f"dirty worktree scan worker crashed: {e}")
471
+ continue
472
+ if fs:
473
+ findings.extend(fs)
474
+ if es:
475
+ errors.extend(es)
476
+ if meta_entry:
477
+ repo_meta.append(meta_entry)
478
+ ignored_paths_skipped_total += meta_entry["ignored_paths_skipped"]
479
+ if meta_entry["ignored_paths_skipped"] > 0:
480
+ repos_with_ignored_skips += 1
481
+ if meta_entry["paths_truncated"]:
482
+ repos_with_truncated_paths += 1
483
+
484
+ report: dict[str, Any] = {
485
+ "generated_at": _utc_now(),
486
+ "scope": {
487
+ "dev_root": str(root),
488
+ "repos_discovered_count": len(repos),
489
+ "repos_scanned_count": len(repo_meta),
490
+ "only_dirty": bool(only_dirty),
491
+ "max_depth": int(max_depth),
492
+ "exclude_repo_globs": globs,
493
+ "check_upstream": bool(check_upstream),
494
+ "fetch_remotes": bool(fetch_remotes),
495
+ "max_paths_per_repo": max_paths_per_repo,
496
+ "include_ignored_files": bool(include_ignored_files),
497
+ },
498
+ "engine": {
499
+ "name": "trufflehog",
500
+ "mode": "filesystem",
501
+ "max_concurrency": max_concurrency,
502
+ "per_repo_timeout_s": per_repo_timeout,
503
+ },
504
+ "repos": repo_meta[:500],
505
+ "findings": [f.to_dict() for f in findings[:500]],
506
+ "summary": {
507
+ "findings_total": len(findings),
508
+ "ignored_paths_skipped_total": ignored_paths_skipped_total,
509
+ "repos_with_ignored_paths_skipped": repos_with_ignored_skips,
510
+ "repos_with_paths_truncated": repos_with_truncated_paths,
511
+ "ignored_paths_skipped_top_basenames": ignored_skipped_basenames.most_common(15),
512
+ },
513
+ "errors": errors,
514
+ }
515
+
516
+ return report, errors
517
+
518
+
519
+ def write_report(path: Path, report: dict[str, Any]) -> None:
520
+ path.parent.mkdir(parents=True, exist_ok=True)
521
+ path.write_text(json.dumps(report, indent=2))