devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. devguard/INTEGRATION_SUMMARY.md +121 -0
  2. devguard/__init__.py +3 -0
  3. devguard/__main__.py +6 -0
  4. devguard/checkers/__init__.py +41 -0
  5. devguard/checkers/api_usage.py +523 -0
  6. devguard/checkers/aws_cost.py +331 -0
  7. devguard/checkers/aws_iam.py +284 -0
  8. devguard/checkers/base.py +25 -0
  9. devguard/checkers/container.py +137 -0
  10. devguard/checkers/domain.py +189 -0
  11. devguard/checkers/firecrawl.py +117 -0
  12. devguard/checkers/fly.py +225 -0
  13. devguard/checkers/github.py +210 -0
  14. devguard/checkers/npm.py +327 -0
  15. devguard/checkers/npm_security.py +244 -0
  16. devguard/checkers/redteam.py +290 -0
  17. devguard/checkers/secret.py +279 -0
  18. devguard/checkers/swarm.py +376 -0
  19. devguard/checkers/tailscale.py +143 -0
  20. devguard/checkers/tailsnitch.py +303 -0
  21. devguard/checkers/tavily.py +179 -0
  22. devguard/checkers/vercel.py +192 -0
  23. devguard/cli.py +1510 -0
  24. devguard/cli_helpers.py +189 -0
  25. devguard/config.py +249 -0
  26. devguard/core.py +293 -0
  27. devguard/dashboard.py +715 -0
  28. devguard/discovery.py +363 -0
  29. devguard/http_client.py +142 -0
  30. devguard/llm_service.py +481 -0
  31. devguard/mcp_server.py +259 -0
  32. devguard/metrics.py +144 -0
  33. devguard/models.py +208 -0
  34. devguard/reporting.py +1571 -0
  35. devguard/sarif.py +295 -0
  36. devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
  37. devguard/scripts/README.md +221 -0
  38. devguard/scripts/auto_fix_recommendations.py +145 -0
  39. devguard/scripts/generate_npmignore.py +175 -0
  40. devguard/scripts/generate_security_report.py +324 -0
  41. devguard/scripts/prepublish_check.sh +29 -0
  42. devguard/scripts/redteam_npm_packages.py +1262 -0
  43. devguard/scripts/review_all_repos.py +300 -0
  44. devguard/spec.py +617 -0
  45. devguard/sweeps/__init__.py +23 -0
  46. devguard/sweeps/ai_editor_config_audit.py +697 -0
  47. devguard/sweeps/cargo_publish_audit.py +655 -0
  48. devguard/sweeps/dependency_audit.py +419 -0
  49. devguard/sweeps/gitignore_audit.py +336 -0
  50. devguard/sweeps/local_dev.py +260 -0
  51. devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
  52. devguard/sweeps/project_flaudit.py +636 -0
  53. devguard/sweeps/public_github_secrets.py +680 -0
  54. devguard/sweeps/publish_audit.py +478 -0
  55. devguard/sweeps/ssh_key_audit.py +327 -0
  56. devguard/utils.py +174 -0
  57. devguard-0.2.0.dist-info/METADATA +225 -0
  58. devguard-0.2.0.dist-info/RECORD +60 -0
  59. devguard-0.2.0.dist-info/WHEEL +4 -0
  60. devguard-0.2.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,478 @@
1
+ """Multi-ecosystem publish audit: check PyPI and npm repos for correct CI publish pipelines.
2
+
3
+ Complements cargo_publish_audit by covering Python (pyproject.toml) and
4
+ JavaScript/TypeScript (package.json) repos. Checks OIDC trusted publishing,
5
+ workflow correctness, version consistency, and license presence.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import fnmatch
11
+ import json
12
+ import os
13
+ import re
14
+ import subprocess
15
+ from dataclasses import dataclass, field
16
+ from datetime import UTC, datetime
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+
21
+ def _utc_now() -> str:
22
+ return datetime.now(UTC).isoformat().replace("+00:00", "Z")
23
+
24
+
25
+ def _default_dev_root() -> Path:
26
+ return Path(os.getenv("DEV_DIR") or "~/Documents/dev").expanduser()
27
+
28
+
29
+ def _iter_repos(root: Path, max_depth: int, exclude_globs: list[str]) -> list[tuple[Path, str]]:
30
+ """Discover git repos with pyproject.toml or package.json under root.
31
+
32
+ Returns (repo_path, ecosystem) tuples. A repo can appear twice if it has both.
33
+ """
34
+ root = root.resolve()
35
+ max_depth = max(0, min(int(max_depth), 6))
36
+ junk = {
37
+ "node_modules",
38
+ ".venv",
39
+ "venv",
40
+ "dist",
41
+ "build",
42
+ ".git",
43
+ ".cache",
44
+ ".state",
45
+ "__pycache__",
46
+ "_trash",
47
+ "_scratch",
48
+ "_external",
49
+ "_archive",
50
+ "_forks",
51
+ "target",
52
+ }
53
+ repos: list[tuple[Path, str]] = []
54
+ stack: list[tuple[Path, int]] = [(root, 0)]
55
+ seen: set[Path] = set()
56
+ while stack:
57
+ cur, depth = stack.pop()
58
+ if cur in seen:
59
+ continue
60
+ seen.add(cur)
61
+ if (cur / ".git").exists():
62
+ if not any(fnmatch.fnmatch(str(cur), g) for g in exclude_globs):
63
+ if (cur / "pyproject.toml").exists():
64
+ repos.append((cur, "pypi"))
65
+ if (cur / "package.json").exists():
66
+ # Skip if private: true
67
+ try:
68
+ pkg = json.loads(
69
+ (cur / "package.json").read_text(encoding="utf-8", errors="replace")
70
+ )
71
+ if not pkg.get("private", False):
72
+ repos.append((cur, "npm"))
73
+ except Exception:
74
+ pass
75
+ continue
76
+ if depth >= max_depth:
77
+ continue
78
+ try:
79
+ for child in cur.iterdir():
80
+ if not child.is_dir():
81
+ continue
82
+ name = child.name
83
+ if name in junk or name.startswith("."):
84
+ continue
85
+ stack.append((child, depth + 1))
86
+ except Exception:
87
+ continue
88
+ return sorted(repos, key=lambda x: (x[0].name, x[1]))
89
+
90
+
91
+ def _is_likely_public(repo: Path) -> bool:
92
+ for name in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENSE-MIT", "LICENCE"):
93
+ if (repo / name).exists():
94
+ return True
95
+ return False
96
+
97
+
98
+ def _get_latest_version_tag(repo: Path) -> str | None:
99
+ try:
100
+ res = subprocess.run(
101
+ ["git", "tag", "--sort=-v:refname"],
102
+ cwd=str(repo),
103
+ capture_output=True,
104
+ text=True,
105
+ timeout=10,
106
+ )
107
+ if res.returncode != 0:
108
+ return None
109
+ for line in res.stdout.strip().splitlines():
110
+ tag = line.strip()
111
+ m = re.search(r"v?(\d+\.\d+\.\d+(?:-[\w.]+)?)", tag)
112
+ if m:
113
+ return m.group(1)
114
+ return None
115
+ except Exception:
116
+ return None
117
+
118
+
119
+ def _read_workflow_files(repo: Path) -> list[tuple[str, str]]:
120
+ wf_dir = repo / ".github" / "workflows"
121
+ if not wf_dir.is_dir():
122
+ return []
123
+ results = []
124
+ for f in sorted(wf_dir.iterdir()):
125
+ if f.suffix in (".yml", ".yaml") and f.is_file():
126
+ try:
127
+ text = f.read_text(encoding="utf-8", errors="replace")
128
+ results.append((f.name, text))
129
+ except Exception:
130
+ continue
131
+ return results
132
+
133
+
134
+ @dataclass
135
+ class Finding:
136
+ check: str
137
+ severity: str
138
+ message: str
139
+ detail: str = ""
140
+
141
+
142
+ @dataclass
143
+ class RepoAuditResult:
144
+ repo_path: str
145
+ repo_name: str
146
+ ecosystem: str # "pypi" or "npm"
147
+ is_public: bool
148
+ package_name: str | None
149
+ package_version: str | None
150
+ latest_tag: str | None
151
+ has_workflows: bool
152
+ findings: list[Finding] = field(default_factory=list)
153
+
154
+
155
+ def _read_pypi_metadata(repo: Path) -> tuple[str | None, str | None]:
156
+ """Read package name and version from pyproject.toml."""
157
+ toml_path = repo / "pyproject.toml"
158
+ if not toml_path.is_file():
159
+ return None, None
160
+ try:
161
+ text = toml_path.read_text(encoding="utf-8", errors="replace")
162
+ name = None
163
+ version = None
164
+ proj_match = re.search(r"^\[project\](.*?)(?=^\[|\Z)", text, re.MULTILINE | re.DOTALL)
165
+ if proj_match:
166
+ section = proj_match.group(1)
167
+ nm = re.search(r'^name\s*=\s*"([^"]+)"', section, re.MULTILINE)
168
+ if nm:
169
+ name = nm.group(1)
170
+ vm = re.search(r'^version\s*=\s*"([^"]+)"', section, re.MULTILINE)
171
+ if vm:
172
+ version = vm.group(1)
173
+ return name, version
174
+ except Exception:
175
+ return None, None
176
+
177
+
178
+ def _read_npm_metadata(repo: Path) -> tuple[str | None, str | None]:
179
+ """Read package name and version from package.json."""
180
+ pkg_path = repo / "package.json"
181
+ if not pkg_path.is_file():
182
+ return None, None
183
+ try:
184
+ pkg = json.loads(pkg_path.read_text(encoding="utf-8", errors="replace"))
185
+ return pkg.get("name"), pkg.get("version")
186
+ except Exception:
187
+ return None, None
188
+
189
+
190
+ def _audit_pypi_repo(repo: Path) -> RepoAuditResult:
191
+ """Audit a Python repo for PyPI publish readiness."""
192
+ name = repo.name
193
+ is_public = _is_likely_public(repo)
194
+ pkg_name, pkg_version = _read_pypi_metadata(repo)
195
+ latest_tag = _get_latest_version_tag(repo)
196
+ workflows = _read_workflow_files(repo)
197
+ has_workflows = len(workflows) > 0
198
+
199
+ result = RepoAuditResult(
200
+ repo_path=str(repo),
201
+ repo_name=name,
202
+ ecosystem="pypi",
203
+ is_public=is_public,
204
+ package_name=pkg_name,
205
+ package_version=pkg_version,
206
+ latest_tag=latest_tag,
207
+ has_workflows=has_workflows,
208
+ )
209
+
210
+ if not has_workflows:
211
+ sev = "error" if is_public else "warning"
212
+ result.findings.append(
213
+ Finding(check="no_workflows", severity=sev, message="No CI workflows")
214
+ )
215
+ return result
216
+
217
+ all_text = "\n".join(text for _, text in workflows)
218
+
219
+ # Check: publish workflow
220
+ publish_files = [
221
+ (f, t)
222
+ for f, t in workflows
223
+ if "pypi" in t.lower()
224
+ or "twine" in t.lower()
225
+ or "maturin" in t.lower()
226
+ or "gh-action-pypi-publish" in t
227
+ ]
228
+
229
+ # Severity: "error" if evidence of registry publication (version tags),
230
+ # "warning" if public but no tags, "info" if private.
231
+ has_tags = latest_tag is not None
232
+
233
+ def _sev() -> str:
234
+ if has_tags:
235
+ return "error"
236
+ return "warning" if is_public else "info"
237
+
238
+ if not publish_files:
239
+ result.findings.append(
240
+ Finding(
241
+ check="no_publish_workflow",
242
+ severity=_sev(),
243
+ message="No PyPI publish workflow detected",
244
+ detail="Expected a workflow using pypa/gh-action-pypi-publish, twine, or maturin.",
245
+ )
246
+ )
247
+
248
+ # Check: OIDC trusted publishing
249
+ has_oidc = bool(re.search(r"id-token\s*:\s*write", all_text))
250
+ has_pypi_action = "gh-action-pypi-publish" in all_text
251
+
252
+ if publish_files and not has_oidc:
253
+ result.findings.append(
254
+ Finding(
255
+ check="no_oidc",
256
+ severity=sev,
257
+ message="No OIDC trusted publishing (missing `id-token: write`)",
258
+ detail="PyPI supports trusted publishing via pypa/gh-action-pypi-publish. "
259
+ "Configure a pending publisher at pypi.org/manage/account/publishing/.",
260
+ )
261
+ )
262
+
263
+ # Check: uses token secret instead of OIDC
264
+ for fname, text in publish_files:
265
+ if re.search(r"secrets\.(PYPI_TOKEN|PYPI_API_TOKEN|TWINE_PASSWORD)", text):
266
+ if not has_oidc:
267
+ result.findings.append(
268
+ Finding(
269
+ check="secret_based_auth",
270
+ severity="warning",
271
+ message=f"{fname}: uses PyPI secret instead of OIDC trusted publishing",
272
+ detail="Migrate to pypa/gh-action-pypi-publish with `id-token: write`.",
273
+ )
274
+ )
275
+
276
+ # Check: LICENSE
277
+ if is_public:
278
+ has_license = any(
279
+ (repo / n).exists() for n in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE")
280
+ )
281
+ if not has_license:
282
+ result.findings.append(
283
+ Finding(
284
+ check="no_license_file",
285
+ severity="error",
286
+ message="Public repo has no LICENSE file",
287
+ )
288
+ )
289
+
290
+ return result
291
+
292
+
293
+ def _audit_npm_repo(repo: Path) -> RepoAuditResult:
294
+ """Audit a JS/TS repo for npm publish readiness."""
295
+ name = repo.name
296
+ is_public = _is_likely_public(repo)
297
+ pkg_name, pkg_version = _read_npm_metadata(repo)
298
+ latest_tag = _get_latest_version_tag(repo)
299
+ workflows = _read_workflow_files(repo)
300
+ has_workflows = len(workflows) > 0
301
+
302
+ result = RepoAuditResult(
303
+ repo_path=str(repo),
304
+ repo_name=name,
305
+ ecosystem="npm",
306
+ is_public=is_public,
307
+ package_name=pkg_name,
308
+ package_version=pkg_version,
309
+ latest_tag=latest_tag,
310
+ has_workflows=has_workflows,
311
+ )
312
+
313
+ if not has_workflows:
314
+ sev = "error" if is_public else "warning"
315
+ result.findings.append(
316
+ Finding(check="no_workflows", severity=sev, message="No CI workflows")
317
+ )
318
+ return result
319
+
320
+ all_text = "\n".join(text for _, text in workflows)
321
+
322
+ # Check: publish workflow
323
+ publish_files = [
324
+ (f, t) for f, t in workflows if "npm publish" in t or "provenance" in t.lower()
325
+ ]
326
+
327
+ # Severity: "error" if version tags exist (likely published),
328
+ # "warning" if public, "info" if private.
329
+ has_tags = latest_tag is not None
330
+
331
+ def _sev() -> str:
332
+ if has_tags:
333
+ return "error"
334
+ return "warning" if is_public else "info"
335
+
336
+ if not publish_files:
337
+ result.findings.append(
338
+ Finding(
339
+ check="no_publish_workflow",
340
+ severity=_sev(),
341
+ message="No npm publish workflow detected",
342
+ )
343
+ )
344
+
345
+ # Check: OIDC / provenance
346
+ has_oidc = bool(re.search(r"id-token\s*:\s*write", all_text))
347
+ has_provenance = "--provenance" in all_text
348
+
349
+ if publish_files and not has_oidc:
350
+ result.findings.append(
351
+ Finding(
352
+ check="no_oidc",
353
+ severity="warning",
354
+ message="No OIDC setup (missing `id-token: write`)",
355
+ detail="npm supports provenance via `npm publish --provenance` with OIDC.",
356
+ )
357
+ )
358
+
359
+ # Check: uses NPM_TOKEN secret
360
+ for fname, text in publish_files:
361
+ if re.search(r"secrets\.NPM_TOKEN", text) and not has_oidc:
362
+ result.findings.append(
363
+ Finding(
364
+ check="secret_based_auth",
365
+ severity="warning",
366
+ message=f"{fname}: uses secrets.NPM_TOKEN instead of OIDC provenance",
367
+ )
368
+ )
369
+
370
+ # Check: LICENSE
371
+ if is_public:
372
+ has_license = any(
373
+ (repo / n).exists() for n in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE")
374
+ )
375
+ if not has_license:
376
+ result.findings.append(
377
+ Finding(
378
+ check="no_license_file",
379
+ severity="error",
380
+ message="Public repo has no LICENSE file",
381
+ )
382
+ )
383
+
384
+ return result
385
+
386
+
387
+ def audit_publish(
388
+ *,
389
+ dev_root: Path | None = None,
390
+ max_depth: int = 2,
391
+ exclude_repo_globs: list[str] | None = None,
392
+ ecosystems: list[str] | None = None,
393
+ ) -> tuple[dict[str, Any], list[str]]:
394
+ """Audit PyPI and npm publish pipelines across repos."""
395
+ errors: list[str] = []
396
+ root = dev_root if dev_root is not None else _default_dev_root()
397
+ globs = [g for g in (exclude_repo_globs or []) if isinstance(g, str) and g.strip()]
398
+ allowed = set(ecosystems) if ecosystems else {"pypi", "npm"}
399
+
400
+ repos = _iter_repos(root, max_depth=max_depth, exclude_globs=globs)
401
+ repos = [(r, e) for r, e in repos if e in allowed]
402
+
403
+ results: list[RepoAuditResult] = []
404
+ for repo, ecosystem in repos:
405
+ try:
406
+ if ecosystem == "pypi":
407
+ result = _audit_pypi_repo(repo)
408
+ else:
409
+ result = _audit_npm_repo(repo)
410
+ results.append(result)
411
+ except Exception as exc:
412
+ errors.append(f"failed to audit {repo} ({ecosystem}): {exc}")
413
+
414
+ results.sort(
415
+ key=lambda r: (
416
+ -r.is_public,
417
+ -sum(1 for f in r.findings if f.severity == "error"),
418
+ r.repo_name,
419
+ )
420
+ )
421
+
422
+ repos_with_errors = [r for r in results if any(f.severity == "error" for f in r.findings)]
423
+ check_counts: dict[str, int] = {}
424
+ for r in results:
425
+ for f in r.findings:
426
+ check_counts[f.check] = check_counts.get(f.check, 0) + 1
427
+
428
+ report: dict[str, Any] = {
429
+ "generated_at": _utc_now(),
430
+ "scope": {
431
+ "dev_root": str(root),
432
+ "repos_scanned": len(repos),
433
+ "max_depth": max_depth,
434
+ "ecosystems": sorted(allowed),
435
+ "exclude_repo_globs": globs,
436
+ },
437
+ "summary": {
438
+ "repos_with_errors": len(repos_with_errors),
439
+ "repos_with_errors_list": [f"{r.repo_name} ({r.ecosystem})" for r in repos_with_errors],
440
+ "total_findings": sum(len(r.findings) for r in results),
441
+ "findings_by_check": sorted(check_counts.items(), key=lambda x: -x[1]),
442
+ "total_errors": sum(1 for r in results for f in r.findings if f.severity == "error"),
443
+ "total_warnings": sum(
444
+ 1 for r in results for f in r.findings if f.severity == "warning"
445
+ ),
446
+ },
447
+ "repos": [
448
+ {
449
+ "repo_path": r.repo_path,
450
+ "repo_name": r.repo_name,
451
+ "ecosystem": r.ecosystem,
452
+ "is_public": r.is_public,
453
+ "package_name": r.package_name,
454
+ "package_version": r.package_version,
455
+ "latest_tag": r.latest_tag,
456
+ "has_workflows": r.has_workflows,
457
+ "findings": [
458
+ {
459
+ "check": f.check,
460
+ "severity": f.severity,
461
+ "message": f.message,
462
+ **({"detail": f.detail} if f.detail else {}),
463
+ }
464
+ for f in r.findings
465
+ ],
466
+ }
467
+ for r in results
468
+ if r.findings
469
+ ][:200],
470
+ "clean_repos": [f"{r.repo_name} ({r.ecosystem})" for r in results if not r.findings],
471
+ "errors": errors,
472
+ }
473
+ return report, errors
474
+
475
+
476
+ def write_report(path: Path, report: dict[str, Any]) -> None:
477
+ path.parent.mkdir(parents=True, exist_ok=True)
478
+ path.write_text(json.dumps(report, indent=2) + "\n")