devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. devguard/INTEGRATION_SUMMARY.md +121 -0
  2. devguard/__init__.py +3 -0
  3. devguard/__main__.py +6 -0
  4. devguard/checkers/__init__.py +41 -0
  5. devguard/checkers/api_usage.py +523 -0
  6. devguard/checkers/aws_cost.py +331 -0
  7. devguard/checkers/aws_iam.py +284 -0
  8. devguard/checkers/base.py +25 -0
  9. devguard/checkers/container.py +137 -0
  10. devguard/checkers/domain.py +189 -0
  11. devguard/checkers/firecrawl.py +117 -0
  12. devguard/checkers/fly.py +225 -0
  13. devguard/checkers/github.py +210 -0
  14. devguard/checkers/npm.py +327 -0
  15. devguard/checkers/npm_security.py +244 -0
  16. devguard/checkers/redteam.py +290 -0
  17. devguard/checkers/secret.py +279 -0
  18. devguard/checkers/swarm.py +376 -0
  19. devguard/checkers/tailscale.py +143 -0
  20. devguard/checkers/tailsnitch.py +303 -0
  21. devguard/checkers/tavily.py +179 -0
  22. devguard/checkers/vercel.py +192 -0
  23. devguard/cli.py +1510 -0
  24. devguard/cli_helpers.py +189 -0
  25. devguard/config.py +249 -0
  26. devguard/core.py +293 -0
  27. devguard/dashboard.py +715 -0
  28. devguard/discovery.py +363 -0
  29. devguard/http_client.py +142 -0
  30. devguard/llm_service.py +481 -0
  31. devguard/mcp_server.py +259 -0
  32. devguard/metrics.py +144 -0
  33. devguard/models.py +208 -0
  34. devguard/reporting.py +1571 -0
  35. devguard/sarif.py +295 -0
  36. devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
  37. devguard/scripts/README.md +221 -0
  38. devguard/scripts/auto_fix_recommendations.py +145 -0
  39. devguard/scripts/generate_npmignore.py +175 -0
  40. devguard/scripts/generate_security_report.py +324 -0
  41. devguard/scripts/prepublish_check.sh +29 -0
  42. devguard/scripts/redteam_npm_packages.py +1262 -0
  43. devguard/scripts/review_all_repos.py +300 -0
  44. devguard/spec.py +617 -0
  45. devguard/sweeps/__init__.py +23 -0
  46. devguard/sweeps/ai_editor_config_audit.py +697 -0
  47. devguard/sweeps/cargo_publish_audit.py +655 -0
  48. devguard/sweeps/dependency_audit.py +419 -0
  49. devguard/sweeps/gitignore_audit.py +336 -0
  50. devguard/sweeps/local_dev.py +260 -0
  51. devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
  52. devguard/sweeps/project_flaudit.py +636 -0
  53. devguard/sweeps/public_github_secrets.py +680 -0
  54. devguard/sweeps/publish_audit.py +478 -0
  55. devguard/sweeps/ssh_key_audit.py +327 -0
  56. devguard/utils.py +174 -0
  57. devguard-0.2.0.dist-info/METADATA +225 -0
  58. devguard-0.2.0.dist-info/RECORD +60 -0
  59. devguard-0.2.0.dist-info/WHEEL +4 -0
  60. devguard-0.2.0.dist-info/entry_points.txt +2 -0
devguard/spec.py ADDED
@@ -0,0 +1,617 @@
1
+ """Specification system for defining what to monitor."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class DiscoveryRule(BaseModel):
10
+ """A rule for discovering resources to monitor."""
11
+
12
+ name: str = Field(description="Name of the discovery rule")
13
+ type: str = Field(description="Type of resource: npm, github, vercel, fly, domain, etc.")
14
+ method: str = Field(description="Discovery method: cli, file_scan, api, custom")
15
+ command: str | None = Field(None, description="CLI command to run (if method=cli)")
16
+ command_parser: str | None = Field(
17
+ None, description="How to parse command output: json, lines, regex"
18
+ )
19
+ file_pattern: str | None = Field(
20
+ None, description="File pattern to search for (if method=file_scan)"
21
+ )
22
+ file_extractor: str | None = Field(
23
+ None, description="How to extract data from files: json_path, regex, yaml_path"
24
+ )
25
+ extract_path: str | None = Field(
26
+ None, description="Path to extract (e.g., JSON path, YAML path, regex pattern)"
27
+ )
28
+ timeout: int = Field(10, description="Timeout in seconds")
29
+ enabled: bool = Field(True, description="Whether this rule is enabled")
30
+ metadata: dict[str, Any] = Field(default_factory=dict)
31
+
32
+
33
+ class LocalDevSweepSpec(BaseModel):
34
+ """Policy-based sweep over local dev repos (git working trees)."""
35
+
36
+ enabled: bool = Field(True, description="Whether this sweep is enabled (on by default)")
37
+ max_depth: int = Field(
38
+ 2,
39
+ description="How deep under dev_root to look for git repos (bounded).",
40
+ )
41
+ max_blob_mb: int = Field(
42
+ 5, description="Flag tracked files larger than this many MiB (working tree size)."
43
+ )
44
+ output: str = Field(
45
+ "devguard_sweep_dev.json", description="Where to write the JSON report (path)."
46
+ )
47
+ deny_globs: list[str] = Field(
48
+ default_factory=list,
49
+ description="Additional deny globs (appended to devguard defaults).",
50
+ )
51
+
52
+
53
+ class LocalDirtyWorktreeSecretsSweepSpec(BaseModel):
54
+ """Scan only *dirty* local git worktrees for secrets (redacted output).
55
+
56
+ This targets:
57
+ - untracked files
58
+ - modified but uncommitted changes
59
+ - local-only repos not pushed yet
60
+ """
61
+
62
+ enabled: bool = Field(False, description="Whether this sweep is enabled")
63
+ dev_root: str | None = Field(
64
+ None,
65
+ description="Workspace root to discover git repos under (default: $DEV_DIR or ~/Documents/dev).",
66
+ )
67
+ max_depth: int = Field(
68
+ 2, description="How deep under dev_root to look for git repos (bounded)."
69
+ )
70
+ only_dirty: bool = Field(
71
+ True, description="Only scan repos with uncommitted/untracked changes."
72
+ )
73
+ exclude_repo_globs: list[str] = Field(
74
+ default_factory=lambda: [
75
+ "*/_trash/*",
76
+ "*/_scratch/*",
77
+ "*/_external/*",
78
+ "*/_archive/*",
79
+ "*/_forks/*",
80
+ ],
81
+ description="Glob patterns (matched against repo paths) to exclude from scanning.",
82
+ )
83
+ max_paths_per_repo: int = Field(
84
+ 50,
85
+ description="Maximum number of dirty file paths to scan per repo (bounds runtime).",
86
+ )
87
+ include_ignored_files: bool = Field(
88
+ False,
89
+ description="If true, also scan untracked files that are ignored by gitignore (noisy).",
90
+ )
91
+ check_upstream: bool = Field(
92
+ True,
93
+ description="If true, compute ahead/behind vs upstream (may be stale if you haven't fetched).",
94
+ )
95
+ fetch_remotes: bool = Field(
96
+ False,
97
+ description="If true, run a fast `git fetch` before ahead/behind (slower; network).",
98
+ )
99
+ max_concurrency: int = Field(4, description="Maximum concurrent repo scans.")
100
+ timeout_s: int = Field(180, description="Per-repo timeout upper bound in seconds.")
101
+ output: str = Field(
102
+ ".state/devguard/local-dirty-worktree-secrets.json",
103
+ description="Where to write the redacted JSON report (path).",
104
+ )
105
+
106
+
107
+ class ProjectFlauditSweepSpec(BaseModel):
108
+ """Files-to-prompt per project + OpenRouter/Gemini flaw analysis.
109
+
110
+ For each project (or k most recently edited), aggregates README, impl, tests,
111
+ and optional rules into a prompt, then uses OpenRouter + Gemini to find:
112
+ readme/impl drift, readme/tests mismatch, rules violations.
113
+
114
+ All paths and patterns are configurable; defaults suit a typical super-workspace
115
+ but work for any layout.
116
+ """
117
+
118
+ enabled: bool = Field(False, description="Whether this sweep is enabled")
119
+ dev_root: str | None = Field(
120
+ None,
121
+ description="Workspace root. Default: $DEV_DIR or ~/Documents/dev when unset.",
122
+ )
123
+ k_recent: int = Field(
124
+ 5,
125
+ description="Number of most recently edited projects to analyze.",
126
+ )
127
+ max_depth: int = Field(2, description="How deep under dev_root to look for git repos.")
128
+ model_id: str = Field(
129
+ "google/gemini-2.5-flash",
130
+ description="OpenRouter model ID (e.g. google/gemini-2.5-flash, google/gemini-3.1-pro-preview).",
131
+ )
132
+ include_rules: bool = Field(
133
+ True,
134
+ description="Include per-repo .cursor/rules in the prompt.",
135
+ )
136
+ workspace_rules_path: str | None = Field(
137
+ None,
138
+ description="Optional path to workspace-level rules (e.g. parent .cursor/rules). "
139
+ "When set, rules from this dir are included for rules-violation checks. "
140
+ "Use when repos live under a super-workspace with shared rules.",
141
+ )
142
+ workspace_rules_include: list[str] = Field(
143
+ default_factory=list,
144
+ description="Rule filenames to include from workspace_rules_path (e.g. user-core.mdc). "
145
+ "If empty and workspace_rules_path is set, a default set is used.",
146
+ )
147
+ max_workspace_rules_chars: int = Field(
148
+ 15_000,
149
+ description="Max chars for workspace rules in the prompt.",
150
+ )
151
+ severity_guidance: str | None = Field(
152
+ None,
153
+ description="Optional custom severity guidance for the LLM. If unset, a default calibration is used.",
154
+ )
155
+ exclude_repo_globs: list[str] = Field(
156
+ default_factory=lambda: [
157
+ "*/_trash/*",
158
+ "*/_scratch/*",
159
+ "*/_external/*",
160
+ "*/_archive/*",
161
+ "*/_forks/*",
162
+ ],
163
+ description="Glob patterns to exclude repos from analysis.",
164
+ )
165
+ depth_0_skip_prefixes: list[str] = Field(
166
+ default_factory=lambda: ["_", "."],
167
+ description="At depth 0, skip dirs whose names start with these. Use [] to disable.",
168
+ )
169
+ depth_0_allow_names: list[str] = Field(
170
+ default_factory=lambda: ["_infra"],
171
+ description="Depth-0 dir names to allow despite depth_0_skip_prefixes.",
172
+ )
173
+ max_prompt_chars: int = Field(
174
+ 120_000,
175
+ description="Max prompt size before truncation.",
176
+ )
177
+ scope_recent_commits: int | None = Field(
178
+ None,
179
+ description="When set, only include files changed in last N commits (plus manifests + README). "
180
+ "Reduces prompt size and focuses on recent changes. None = full repo.",
181
+ )
182
+ public_repo_names: list[str] = Field(
183
+ default_factory=list,
184
+ description="When non-empty, only analyze these repos (by directory name under dev_root). "
185
+ "Used to focus on public crates; ignores k_recent and runs on all matching repos (up to cap).",
186
+ )
187
+ stricter_public_prompt: bool = Field(
188
+ True,
189
+ description="When public_repo_names is set, use a stricter system prompt aimed at public crate quality.",
190
+ )
191
+ output: str = Field(
192
+ ".state/devguard/project-flaudit.json",
193
+ description="Where to write the JSON report.",
194
+ )
195
+
196
+
197
+ class SSHKeyAuditSweepSpec(BaseModel):
198
+ """Audit SSH keys for weak algorithms, missing passphrases, and stale registrations."""
199
+
200
+ enabled: bool = Field(True, description="Whether this sweep is enabled (on by default)")
201
+ ssh_dir: str = Field(
202
+ "~/.ssh",
203
+ description="Path to SSH directory to scan.",
204
+ )
205
+ check_github: bool = Field(
206
+ True,
207
+ description="Cross-reference local keys with GitHub via `gh ssh-key list`.",
208
+ )
209
+ min_rsa_bits: int = Field(
210
+ 3072,
211
+ description="Minimum RSA key size in bits; keys below this are flagged.",
212
+ )
213
+ flag_ecdsa: bool = Field(
214
+ False,
215
+ description="Flag ECDSA keys (some consider NIST curves weak).",
216
+ )
217
+ output: str = Field(
218
+ ".state/devguard/ssh-key-audit.json",
219
+ description="Where to write the JSON report.",
220
+ )
221
+
222
+
223
+ class GitignoreAuditSweepSpec(BaseModel):
224
+ """Audit .gitignore files across local repos for missing hygiene patterns.
225
+
226
+ Checks for common patterns (.env, .state/, *.log, etc.) and flags repos --
227
+ especially public ones -- that are missing them.
228
+ """
229
+
230
+ enabled: bool = Field(True, description="Whether this sweep is enabled (on by default)")
231
+ dev_root: str | None = Field(
232
+ None,
233
+ description="Workspace root. Default: $DEV_DIR or ~/Documents/dev when unset.",
234
+ )
235
+ max_depth: int = Field(2, description="How deep under dev_root to look for git repos.")
236
+ exclude_repo_globs: list[str] = Field(
237
+ default_factory=lambda: [
238
+ "*/_trash/*",
239
+ "*/_scratch/*",
240
+ "*/_external/*",
241
+ "*/_archive/*",
242
+ "*/_forks/*",
243
+ ],
244
+ description="Glob patterns to exclude repos from the audit.",
245
+ )
246
+ output: str = Field(
247
+ ".state/devguard/gitignore-audit.json",
248
+ description="Where to write the JSON report.",
249
+ )
250
+
251
+
252
+ class DependencyAuditSweepSpec(BaseModel):
253
+ """Audit dependencies across local repos for known vulnerabilities.
254
+
255
+ Detects language by manifest/lock files and runs the appropriate audit tool
256
+ (cargo-audit, npm audit, pip-audit). Produces a unified report with
257
+ per-repo findings bucketed by severity.
258
+ """
259
+
260
+ enabled: bool = Field(True, description="Whether this sweep is enabled (on by default)")
261
+ dev_root: str | None = Field(
262
+ None,
263
+ description="Workspace root. Default: $DEV_DIR or ~/Documents/dev when unset.",
264
+ )
265
+ max_depth: int = Field(2, description="How deep under dev_root to look for git repos.")
266
+ exclude_repo_globs: list[str] = Field(
267
+ default_factory=lambda: [
268
+ "*/_trash/*",
269
+ "*/_scratch/*",
270
+ "*/_external/*",
271
+ "*/_archive/*",
272
+ "*/_forks/*",
273
+ ],
274
+ description="Glob patterns to exclude repos from the audit.",
275
+ )
276
+ max_concurrency: int = Field(4, description="Maximum concurrent repo scans.")
277
+ timeout_s: int = Field(120, description="Per-repo timeout upper bound in seconds.")
278
+ engines: list[str] = Field(
279
+ default_factory=lambda: ["cargo-audit", "npm-audit", "pip-audit"],
280
+ description="Audit engines to run. Supported: cargo-audit, npm-audit, pip-audit.",
281
+ )
282
+ output: str = Field(
283
+ ".state/devguard/dependency-audit.json",
284
+ description="Where to write the JSON report.",
285
+ )
286
+
287
+
288
+ class PublicGitHubSecretsSweepSpec(BaseModel):
289
+ """Spec for scanning public GitHub repos for leaked secrets (redacted output)."""
290
+
291
+ enabled: bool = Field(True, description="Whether this sweep is enabled")
292
+
293
+ owners: list[str] = Field(
294
+ default_factory=list,
295
+ description="GitHub owners (user/org) whose public repos should be scanned.",
296
+ )
297
+ max_repos: int = Field(200, description="Maximum number of repos to scan (bounded).")
298
+ include_repos: list[str] = Field(
299
+ default_factory=list,
300
+ description="Optional glob patterns for repo full names to include (owner/name).",
301
+ )
302
+ exclude_repos: list[str] = Field(
303
+ default_factory=list,
304
+ description="Optional glob patterns for repo full names to exclude (owner/name).",
305
+ )
306
+ include_forks: bool = Field(False, description="Whether to include forks.")
307
+
308
+ engines: list[str] = Field(
309
+ default_factory=lambda: ["trufflehog"],
310
+ description="Secret scanning engines to run. Supported: trufflehog, kingfisher",
311
+ )
312
+
313
+ timeout_s: int = Field(
314
+ 900,
315
+ description="Per-repo timeout upper bound in seconds (bounded internally).",
316
+ )
317
+ max_concurrency: int = Field(
318
+ 4,
319
+ description="Maximum concurrent repo scans (bounded parallelism for speed).",
320
+ )
321
+ fail_on_errors: bool = Field(
322
+ False,
323
+ description="If true, treat scan errors (missed repos) as a CI failure.",
324
+ )
325
+
326
+ output: str = Field(
327
+ "public_github_secret_scan.json",
328
+ description="Where to write the redacted JSON report (path).",
329
+ )
330
+
331
+
332
+ class AIEditorConfigAuditSweepSpec(BaseModel):
333
+ """Audit AI editor configs (Claude, Cursor, Copilot, MCP) across repos.
334
+
335
+ Checks CLAUDE.md presence/validity, .claude/ structure, Cursor .mdc frontmatter,
336
+ MCP JSON validity, hardcoded secrets, cross-tool rule consistency, and gitignore coverage.
337
+ """
338
+
339
+ enabled: bool = Field(True, description="Whether this sweep is enabled (on by default)")
340
+ dev_root: str | None = Field(
341
+ None,
342
+ description="Workspace root. Default: $DEV_DIR or ~/Documents/dev when unset.",
343
+ )
344
+ max_depth: int = Field(2, description="How deep under dev_root to look for git repos.")
345
+ exclude_repo_globs: list[str] = Field(
346
+ default_factory=lambda: [
347
+ "*/_trash/*",
348
+ "*/_scratch/*",
349
+ "*/_external/*",
350
+ "*/_archive/*",
351
+ "*/_forks/*",
352
+ ],
353
+ description="Glob patterns to exclude repos from the audit.",
354
+ )
355
+ only_with_configs: bool = Field(
356
+ True,
357
+ description="Only report repos that have at least one AI editor config.",
358
+ )
359
+ output: str = Field(
360
+ ".state/devguard/ai-editor-config-audit.json",
361
+ description="Where to write the JSON report.",
362
+ )
363
+
364
+
365
+ class CargoPublishAuditSweepSpec(BaseModel):
366
+ """Audit Rust repos for correct cargo publish CI pipelines.
367
+
368
+ Checks the full e2e: tag triggers, OIDC trusted publishing, dry-run on PRs,
369
+ version/tag consistency, workflow completeness, and token hygiene.
370
+ """
371
+
372
+ enabled: bool = Field(False, description="Whether this sweep is enabled")
373
+ dev_root: str | None = Field(
374
+ None,
375
+ description="Workspace root. Default: $DEV_DIR or ~/Documents/dev when unset.",
376
+ )
377
+ max_depth: int = Field(2, description="How deep under dev_root to look for git repos.")
378
+ exclude_repo_globs: list[str] = Field(
379
+ default_factory=lambda: [
380
+ "*/_trash/*",
381
+ "*/_scratch/*",
382
+ "*/_external/*",
383
+ "*/_archive/*",
384
+ "*/_forks/*",
385
+ ],
386
+ description="Glob patterns to exclude repos from the audit.",
387
+ )
388
+ only_public: bool = Field(
389
+ False,
390
+ description="Only audit repos that appear public (have a LICENSE file).",
391
+ )
392
+ repo_names: list[str] = Field(
393
+ default_factory=list,
394
+ description="When non-empty, only audit these repos (by directory name). "
395
+ "Useful to focus on published crates.",
396
+ )
397
+ output: str = Field(
398
+ ".state/devguard/cargo-publish-audit.json",
399
+ description="Where to write the JSON report.",
400
+ )
401
+
402
+
403
+ class PublishAuditSweepSpec(BaseModel):
404
+ """Audit PyPI and npm repos for correct publish CI pipelines.
405
+
406
+ Checks OIDC trusted publishing, workflow correctness, version consistency,
407
+ license presence, and secret hygiene. Complements cargo_publish_audit for
408
+ non-Rust ecosystems.
409
+ """
410
+
411
+ enabled: bool = Field(False, description="Whether this sweep is enabled")
412
+ dev_root: str | None = Field(
413
+ None,
414
+ description="Workspace root. Default: $DEV_DIR or ~/Documents/dev when unset.",
415
+ )
416
+ max_depth: int = Field(2, description="How deep under dev_root to look for git repos.")
417
+ exclude_repo_globs: list[str] = Field(
418
+ default_factory=lambda: [
419
+ "*/_trash/*",
420
+ "*/_scratch/*",
421
+ "*/_external/*",
422
+ "*/_archive/*",
423
+ "*/_forks/*",
424
+ ],
425
+ description="Glob patterns to exclude repos from the audit.",
426
+ )
427
+ ecosystems: list[str] = Field(
428
+ default_factory=lambda: ["pypi", "npm"],
429
+ description="Ecosystems to audit. Supported: pypi, npm.",
430
+ )
431
+ output: str = Field(
432
+ ".state/devguard/publish-audit.json",
433
+ description="Where to write the JSON report.",
434
+ )
435
+
436
+
437
+ class SweepSpec(BaseModel):
438
+ """Spec for all sweeps (policy checks)."""
439
+
440
+ local_dev: LocalDevSweepSpec = Field(
441
+ default_factory=lambda: LocalDevSweepSpec(),
442
+ description="Local dev workspace sweep",
443
+ )
444
+ public_github_secrets: PublicGitHubSecretsSweepSpec = Field(
445
+ default_factory=lambda: PublicGitHubSecretsSweepSpec(),
446
+ description="Scan public GitHub repos for leaked secrets (redacted)",
447
+ )
448
+ local_dirty_worktree_secrets: LocalDirtyWorktreeSecretsSweepSpec = Field(
449
+ default_factory=lambda: LocalDirtyWorktreeSecretsSweepSpec(),
450
+ description="Scan dirty local git worktrees for secrets (redacted)",
451
+ )
452
+ project_flaudit: ProjectFlauditSweepSpec = Field(
453
+ default_factory=lambda: ProjectFlauditSweepSpec(),
454
+ description="Files-to-prompt + OpenRouter/Gemini flaw analysis per project",
455
+ )
456
+ gitignore_audit: GitignoreAuditSweepSpec = Field(
457
+ default_factory=lambda: GitignoreAuditSweepSpec(),
458
+ description="Audit .gitignore files for missing hygiene patterns",
459
+ )
460
+ dependency_audit: DependencyAuditSweepSpec = Field(
461
+ default_factory=lambda: DependencyAuditSweepSpec(),
462
+ description="Audit dependencies for known vulnerabilities",
463
+ )
464
+ ssh_key_audit: SSHKeyAuditSweepSpec = Field(
465
+ default_factory=lambda: SSHKeyAuditSweepSpec(),
466
+ description="Audit SSH keys for weak algorithms, missing passphrases, stale registrations",
467
+ )
468
+ cargo_publish_audit: CargoPublishAuditSweepSpec = Field(
469
+ default_factory=lambda: CargoPublishAuditSweepSpec(),
470
+ description="Audit Rust repos for correct cargo publish CI pipelines",
471
+ )
472
+ ai_editor_config_audit: AIEditorConfigAuditSweepSpec = Field(
473
+ default_factory=lambda: AIEditorConfigAuditSweepSpec(),
474
+ description="Audit AI editor configs (Claude, Cursor, Copilot, MCP) across repos",
475
+ )
476
+ publish_audit: PublishAuditSweepSpec = Field(
477
+ default_factory=lambda: PublishAuditSweepSpec(),
478
+ description="Audit PyPI and npm repos for correct publish CI pipelines",
479
+ )
480
+
481
+
482
+ class MonitorSpec(BaseModel):
483
+ """Specification of what to monitor."""
484
+
485
+ name: str = Field(description="Name of this monitoring spec")
486
+ description: str | None = Field(None, description="Description of what this monitors")
487
+ discovery_rules: list[DiscoveryRule] = Field(
488
+ default_factory=list, description="Rules for auto-discovery"
489
+ )
490
+ manual_resources: dict[str, list[str]] = Field(
491
+ default_factory=dict,
492
+ description="Manually specified resources: {type: [names]}",
493
+ )
494
+ filters: dict[str, Any] = Field(
495
+ default_factory=dict,
496
+ description="Filters to apply to discovered resources",
497
+ )
498
+ sweeps: SweepSpec = Field(
499
+ default_factory=SweepSpec,
500
+ description="Policy sweeps (e.g., local dev repo hygiene).",
501
+ )
502
+
503
+
504
+ def load_spec(spec_path: Path) -> MonitorSpec:
505
+ """Load a monitoring spec from a file."""
506
+ import yaml # type: ignore[import-not-found]
507
+
508
+ with open(spec_path) as f:
509
+ data = yaml.safe_load(f) or {}
510
+
511
+ # Be tolerant of YAML keys that are present but null (common when a section is
512
+ # left empty with only comments).
513
+ if data.get("discovery_rules") is None:
514
+ data["discovery_rules"] = []
515
+ if data.get("manual_resources") is None:
516
+ data["manual_resources"] = {}
517
+ if data.get("filters") is None:
518
+ data["filters"] = {}
519
+ if data.get("sweeps") is None:
520
+ data["sweeps"] = {}
521
+ return MonitorSpec(**data)
522
+
523
+
524
+ def get_default_spec() -> MonitorSpec:
525
+ """Get the default monitoring spec."""
526
+ return MonitorSpec(
527
+ name="default",
528
+ description="Default Guardian monitoring spec",
529
+ discovery_rules=[
530
+ DiscoveryRule( # type: ignore[call-arg]
531
+ name="npm_list",
532
+ type="npm",
533
+ method="cli",
534
+ command="npm list --depth=0 --json",
535
+ command_parser="json",
536
+ extract_path="dependencies.keys()",
537
+ timeout=10,
538
+ ),
539
+ DiscoveryRule( # type: ignore[call-arg]
540
+ name="npm_package_json",
541
+ type="npm",
542
+ method="file_scan",
543
+ file_pattern="**/package.json",
544
+ file_extractor="json_path",
545
+ extract_path="name",
546
+ timeout=30,
547
+ ),
548
+ DiscoveryRule( # type: ignore[call-arg]
549
+ name="github_repos",
550
+ type="github",
551
+ method="cli",
552
+ command="gh repo list --json nameWithOwner --limit 100",
553
+ command_parser="json",
554
+ extract_path="[].nameWithOwner",
555
+ timeout=10,
556
+ ),
557
+ DiscoveryRule( # type: ignore[call-arg]
558
+ name="fly_apps",
559
+ type="fly",
560
+ method="cli",
561
+ command="flyctl apps list --json",
562
+ command_parser="json",
563
+ extract_path="[].Name",
564
+ timeout=10,
565
+ ),
566
+ DiscoveryRule( # type: ignore[call-arg]
567
+ name="vercel_projects",
568
+ type="vercel",
569
+ method="file_scan",
570
+ file_pattern="**/vercel.json",
571
+ file_extractor="json_path",
572
+ extract_path="name",
573
+ timeout=30,
574
+ ),
575
+ DiscoveryRule( # type: ignore[call-arg]
576
+ name="domains",
577
+ type="domain",
578
+ method="file_scan",
579
+ file_pattern="**/*.{json,yaml,yml,toml,env}",
580
+ file_extractor="regex",
581
+ extract_path=r"https?://([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})",
582
+ timeout=30,
583
+ ),
584
+ DiscoveryRule( # type: ignore[call-arg]
585
+ name="github_commits",
586
+ type="github_commits",
587
+ method="cli",
588
+ command="gh api user/events --jq '.[] | select(.type == \"PushEvent\") | {repo: .repo.name, message: .payload.commits[0].message, date: .created_at}' --limit 20",
589
+ command_parser="json_lines",
590
+ timeout=10,
591
+ ),
592
+ DiscoveryRule( # type: ignore[call-arg]
593
+ name="github_mentions",
594
+ type="github_mentions",
595
+ method="cli",
596
+ command="gh api search/issues -f 'q=mentions:{username}' --jq '.items[] | {title: .title, url: .html_url, state: .state, created_at: .created_at}' --limit 20",
597
+ command_parser="json_lines",
598
+ timeout=10,
599
+ ),
600
+ DiscoveryRule( # type: ignore[call-arg]
601
+ name="ssh_keys",
602
+ type="ssh_key",
603
+ method="file_scan",
604
+ file_pattern="~/.ssh/*.pub",
605
+ file_extractor="raw",
606
+ timeout=5,
607
+ ),
608
+ DiscoveryRule( # type: ignore[call-arg]
609
+ name="github_username",
610
+ type="username",
611
+ method="cli",
612
+ command="gh api user --jq .login",
613
+ command_parser="text",
614
+ timeout=5,
615
+ ),
616
+ ],
617
+ )
@@ -0,0 +1,23 @@
1
+ """Sweep utilities (local policy checks, etc.)."""
2
+
3
+ from .local_dev import (
4
+ DEFAULT_DENY_GLOBS as DEFAULT_DENY_GLOBS,
5
+ )
6
+ from .local_dev import (
7
+ default_dev_root as default_dev_root,
8
+ )
9
+ from .local_dev import (
10
+ sweep_dev_repos as sweep_dev_repos,
11
+ )
12
+ from .local_dev import (
13
+ write_report as write_report,
14
+ )
15
+ from .public_github_secrets import scan_public_github_repos as scan_public_github_repos
16
+
17
+ __all__ = [
18
+ "DEFAULT_DENY_GLOBS",
19
+ "default_dev_root",
20
+ "scan_public_github_repos",
21
+ "sweep_dev_repos",
22
+ "write_report",
23
+ ]