devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. devguard/INTEGRATION_SUMMARY.md +121 -0
  2. devguard/__init__.py +3 -0
  3. devguard/__main__.py +6 -0
  4. devguard/checkers/__init__.py +41 -0
  5. devguard/checkers/api_usage.py +523 -0
  6. devguard/checkers/aws_cost.py +331 -0
  7. devguard/checkers/aws_iam.py +284 -0
  8. devguard/checkers/base.py +25 -0
  9. devguard/checkers/container.py +137 -0
  10. devguard/checkers/domain.py +189 -0
  11. devguard/checkers/firecrawl.py +117 -0
  12. devguard/checkers/fly.py +225 -0
  13. devguard/checkers/github.py +210 -0
  14. devguard/checkers/npm.py +327 -0
  15. devguard/checkers/npm_security.py +244 -0
  16. devguard/checkers/redteam.py +290 -0
  17. devguard/checkers/secret.py +279 -0
  18. devguard/checkers/swarm.py +376 -0
  19. devguard/checkers/tailscale.py +143 -0
  20. devguard/checkers/tailsnitch.py +303 -0
  21. devguard/checkers/tavily.py +179 -0
  22. devguard/checkers/vercel.py +192 -0
  23. devguard/cli.py +1510 -0
  24. devguard/cli_helpers.py +189 -0
  25. devguard/config.py +249 -0
  26. devguard/core.py +293 -0
  27. devguard/dashboard.py +715 -0
  28. devguard/discovery.py +363 -0
  29. devguard/http_client.py +142 -0
  30. devguard/llm_service.py +481 -0
  31. devguard/mcp_server.py +259 -0
  32. devguard/metrics.py +144 -0
  33. devguard/models.py +208 -0
  34. devguard/reporting.py +1571 -0
  35. devguard/sarif.py +295 -0
  36. devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
  37. devguard/scripts/README.md +221 -0
  38. devguard/scripts/auto_fix_recommendations.py +145 -0
  39. devguard/scripts/generate_npmignore.py +175 -0
  40. devguard/scripts/generate_security_report.py +324 -0
  41. devguard/scripts/prepublish_check.sh +29 -0
  42. devguard/scripts/redteam_npm_packages.py +1262 -0
  43. devguard/scripts/review_all_repos.py +300 -0
  44. devguard/spec.py +617 -0
  45. devguard/sweeps/__init__.py +23 -0
  46. devguard/sweeps/ai_editor_config_audit.py +697 -0
  47. devguard/sweeps/cargo_publish_audit.py +655 -0
  48. devguard/sweeps/dependency_audit.py +419 -0
  49. devguard/sweeps/gitignore_audit.py +336 -0
  50. devguard/sweeps/local_dev.py +260 -0
  51. devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
  52. devguard/sweeps/project_flaudit.py +636 -0
  53. devguard/sweeps/public_github_secrets.py +680 -0
  54. devguard/sweeps/publish_audit.py +478 -0
  55. devguard/sweeps/ssh_key_audit.py +327 -0
  56. devguard/utils.py +174 -0
  57. devguard-0.2.0.dist-info/METADATA +225 -0
  58. devguard-0.2.0.dist-info/RECORD +60 -0
  59. devguard-0.2.0.dist-info/WHEEL +4 -0
  60. devguard-0.2.0.dist-info/entry_points.txt +2 -0
devguard/discovery.py ADDED
@@ -0,0 +1,363 @@
1
+ """Agnostic discovery engine based on spec rules."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from devguard.spec import DiscoveryRule, MonitorSpec
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def _parse_json_robustly(output: str) -> Any | None:
16
+ """Parse JSON from CLI output, handling common issues like update banners.
17
+
18
+ Many CLI tools (npm, gh, etc.) print non-JSON text like "Update available!"
19
+ before or after the actual JSON. This function extracts the JSON portion.
20
+ """
21
+ output = output.strip()
22
+ if not output:
23
+ return None
24
+
25
+ # Try direct parse first (fast path)
26
+ try:
27
+ return json.loads(output)
28
+ except json.JSONDecodeError:
29
+ pass
30
+
31
+ # Find JSON object or array boundaries
32
+ # Look for first { or [ and matching last } or ]
33
+ obj_start = output.find("{")
34
+ arr_start = output.find("[")
35
+
36
+ if obj_start == -1 and arr_start == -1:
37
+ return None
38
+
39
+ # Determine which comes first
40
+ if obj_start == -1:
41
+ start_char, end_char = "[", "]"
42
+ start_idx = arr_start
43
+ elif arr_start == -1:
44
+ start_char, end_char = "{", "}"
45
+ start_idx = obj_start
46
+ else:
47
+ if obj_start < arr_start:
48
+ start_char, end_char = "{", "}"
49
+ start_idx = obj_start
50
+ else:
51
+ start_char, end_char = "[", "]"
52
+ start_idx = arr_start
53
+
54
+ # Find the matching end
55
+ end_idx = output.rfind(end_char)
56
+ if end_idx == -1 or end_idx <= start_idx:
57
+ return None
58
+
59
+ json_str = output[start_idx : end_idx + 1]
60
+
61
+ try:
62
+ return json.loads(json_str)
63
+ except json.JSONDecodeError:
64
+ logger.debug(f"Failed to parse extracted JSON: {json_str[:100]}...")
65
+ return None
66
+
67
+
68
+ class DiscoveryResult:
69
+ """Results from auto-discovery."""
70
+
71
+ def __init__(self):
72
+ self.resources: dict[str, list[Any]] = {}
73
+ self.errors: list[str] = []
74
+ self.metadata: dict[str, Any] = {}
75
+
76
+ def add_resource(self, resource_type: str, value: Any) -> None:
77
+ """Add a discovered resource."""
78
+ if resource_type not in self.resources:
79
+ self.resources[resource_type] = []
80
+ if value not in self.resources[resource_type]:
81
+ self.resources[resource_type].append(value)
82
+
83
+ def to_dict(self) -> dict[str, Any]:
84
+ """Convert to dictionary."""
85
+ return {
86
+ "resources": self.resources,
87
+ "errors": self.errors,
88
+ "metadata": self.metadata,
89
+ }
90
+
91
+
92
+ async def execute_cli_command(
93
+ command: str, parser: str, extract_path: str | None, timeout: int, username: str | None = None
94
+ ) -> list[Any]:
95
+ """Execute a CLI command and parse results."""
96
+ results = []
97
+
98
+ # Replace {username} placeholder if present
99
+ if username and "{username}" in command:
100
+ command = command.replace("{username}", username)
101
+
102
+ try:
103
+ # Split command into parts
104
+ cmd_parts = command.split()
105
+ if not cmd_parts:
106
+ return results
107
+
108
+ result = await asyncio.wait_for(
109
+ asyncio.create_subprocess_exec(
110
+ *cmd_parts,
111
+ stdout=asyncio.subprocess.PIPE,
112
+ stderr=asyncio.subprocess.PIPE,
113
+ ),
114
+ timeout=timeout,
115
+ )
116
+ stdout, stderr = await result.communicate()
117
+
118
+ if result.returncode != 0:
119
+ logger.debug(f"Command failed: {command} (exit code {result.returncode})")
120
+ return results
121
+
122
+ output = stdout.decode().strip()
123
+
124
+ if parser == "json":
125
+ data = _parse_json_robustly(output)
126
+ if data is not None:
127
+ if extract_path:
128
+ results = _extract_json_path(data, extract_path)
129
+ else:
130
+ results = [data] if data else []
131
+ elif parser == "json_lines":
132
+ for line in output.split("\n"):
133
+ if line.strip():
134
+ try:
135
+ data = json.loads(line)
136
+ results.append(data)
137
+ except json.JSONDecodeError:
138
+ pass
139
+ elif parser == "lines":
140
+ results = [line.strip() for line in output.split("\n") if line.strip()]
141
+ elif parser == "text":
142
+ if output:
143
+ results = [output]
144
+ else:
145
+ logger.warning(f"Unknown parser: {parser}")
146
+
147
+ except TimeoutError:
148
+ logger.warning(f"Command timed out: {command}")
149
+ except Exception as e:
150
+ logger.debug(f"Error executing command: {command}: {e}")
151
+
152
+ return results
153
+
154
+
155
+ def _extract_json_path(data: Any, path: str) -> list[Any]:
156
+ """Extract values from JSON using a simple path syntax."""
157
+ results = []
158
+
159
+ # Simple path extraction
160
+ # Supports: "key", "key.subkey", "[].key", "dependencies.keys()"
161
+ try:
162
+ if path.endswith(".keys()"):
163
+ # Extract keys from a dict
164
+ key_path = path[:-7]
165
+ obj = _get_json_value(data, key_path)
166
+ if isinstance(obj, dict):
167
+ results = list(obj.keys())
168
+ elif path.startswith("[].") or path.startswith("[]."):
169
+ # Array extraction
170
+ key = path[3:]
171
+ if isinstance(data, list):
172
+ for item in data:
173
+ value = _get_json_value(item, key)
174
+ if value is not None:
175
+ results.append(value)
176
+ else:
177
+ value = _get_json_value(data, path)
178
+ if value is not None:
179
+ results = [value] if not isinstance(value, list) else value
180
+ except Exception as e:
181
+ logger.debug(f"Error extracting JSON path {path}: {e}")
182
+
183
+ return results
184
+
185
+
186
+ def _get_json_value(data: Any, path: str) -> Any:
187
+ """Get a value from nested JSON using dot notation."""
188
+ parts = path.split(".")
189
+ current = data
190
+ for part in parts:
191
+ if isinstance(current, dict):
192
+ current = current.get(part)
193
+ elif isinstance(current, list) and part.isdigit():
194
+ current = current[int(part)]
195
+ else:
196
+ return None
197
+ if current is None:
198
+ return None
199
+ return current
200
+
201
+
202
+ async def scan_files(
203
+ base_path: Path,
204
+ pattern: str,
205
+ extractor: str,
206
+ extract_path: str | None,
207
+ timeout: int,
208
+ ) -> list[Any]:
209
+ """Scan files matching a pattern and extract data."""
210
+ results = []
211
+ start_time = asyncio.get_event_loop().time()
212
+
213
+ try:
214
+ # Expand ~ in pattern
215
+ if pattern.startswith("~/"):
216
+ pattern = str(Path.home() / pattern[2:])
217
+ elif not pattern.startswith("/"):
218
+ # Relative to base_path
219
+ search_path = base_path / pattern
220
+ else:
221
+ search_path = Path(pattern)
222
+
223
+ # Handle glob patterns
224
+ if "**" in pattern or "*" in pattern:
225
+ for file_path in base_path.rglob(
226
+ pattern.replace("**/", "").replace("~", str(Path.home()))
227
+ ):
228
+ if (asyncio.get_event_loop().time() - start_time) > timeout:
229
+ break
230
+ try:
231
+ extracted = _extract_from_file(file_path, extractor, extract_path)
232
+ if extracted:
233
+ results.extend(extracted if isinstance(extracted, list) else [extracted])
234
+ except Exception as e:
235
+ logger.debug(f"Error processing {file_path}: {e}")
236
+ else:
237
+ # Single file
238
+ if search_path.exists():
239
+ extracted = _extract_from_file(search_path, extractor, extract_path)
240
+ if extracted:
241
+ results.extend(extracted if isinstance(extracted, list) else [extracted])
242
+ except Exception as e:
243
+ logger.warning(f"Error scanning files: {e}")
244
+
245
+ return results
246
+
247
+
248
+ def _extract_from_file(file_path: Path, extractor: str, extract_path: str | None) -> Any:
249
+ """Extract data from a file based on extractor type."""
250
+ try:
251
+ content = file_path.read_text()
252
+
253
+ if extractor == "json_path":
254
+ data = json.loads(content)
255
+ if extract_path:
256
+ return _extract_json_path(data, extract_path)
257
+ return data
258
+ elif extractor == "yaml_path":
259
+ import yaml
260
+
261
+ data = yaml.safe_load(content)
262
+ if extract_path:
263
+ return _extract_json_path(data, extract_path) # Same logic works for YAML
264
+ return data
265
+ elif extractor == "regex":
266
+ if extract_path:
267
+ matches = re.findall(extract_path, content)
268
+ return list(set(matches)) # Remove duplicates
269
+ return []
270
+ elif extractor == "raw":
271
+ return content.strip()
272
+ else:
273
+ logger.warning(f"Unknown extractor: {extractor}")
274
+ return None
275
+ except Exception as e:
276
+ logger.debug(f"Error extracting from {file_path}: {e}")
277
+ return None
278
+
279
+
280
+ async def discover_from_rule(
281
+ rule: DiscoveryRule, base_path: Path | None = None, username: str | None = None
282
+ ) -> list[Any]:
283
+ """Discover resources using a single rule."""
284
+ if not rule.enabled:
285
+ return []
286
+
287
+ if base_path is None:
288
+ base_path = Path.home() / "Documents" / "dev"
289
+
290
+ if rule.method == "cli":
291
+ if not rule.command:
292
+ logger.warning(f"Rule {rule.name} has method=cli but no command")
293
+ return []
294
+ return await execute_cli_command(
295
+ rule.command, rule.command_parser or "text", rule.extract_path, rule.timeout, username
296
+ )
297
+ elif rule.method == "file_scan":
298
+ if not rule.file_pattern:
299
+ logger.warning(f"Rule {rule.name} has method=file_scan but no file_pattern")
300
+ return []
301
+ return await scan_files(
302
+ base_path,
303
+ rule.file_pattern,
304
+ rule.file_extractor or "raw",
305
+ rule.extract_path,
306
+ rule.timeout,
307
+ )
308
+ elif rule.method == "api":
309
+ # API-based discovery would go here
310
+ logger.warning(f"API method not yet implemented for rule {rule.name}")
311
+ return []
312
+ elif rule.method == "custom":
313
+ # Custom discovery would go here
314
+ logger.warning(f"Custom method not yet implemented for rule {rule.name}")
315
+ return []
316
+ else:
317
+ logger.warning(f"Unknown method: {rule.method} for rule {rule.name}")
318
+ return []
319
+
320
+
321
+ async def discover_all(
322
+ spec: MonitorSpec, base_path: Path | None = None, username: str | None = None
323
+ ) -> DiscoveryResult:
324
+ """Run all discovery rules from a spec."""
325
+ result = DiscoveryResult()
326
+
327
+ # Get username if needed
328
+ if not username:
329
+ # Try to get from a username discovery rule
330
+ username_rules = [r for r in spec.discovery_rules if r.type == "username"]
331
+ if username_rules:
332
+ try:
333
+ username_results = await discover_from_rule(username_rules[0], base_path)
334
+ if username_results:
335
+ username = username_results[0]
336
+
337
+ except Exception:
338
+ pass
339
+
340
+ # Run all discovery rules
341
+ tasks = []
342
+ for rule in spec.discovery_rules:
343
+ if rule.type != "username": # Already handled
344
+ tasks.append(discover_from_rule(rule, base_path, username))
345
+
346
+ try:
347
+ rule_results = await asyncio.gather(*tasks, return_exceptions=True)
348
+
349
+ for rule, rule_result in zip(spec.discovery_rules, rule_results):
350
+ if isinstance(rule_result, Exception):
351
+ result.errors.append(f"{rule.name}: {str(rule_result)}")
352
+ elif isinstance(rule_result, list):
353
+ for value in rule_result:
354
+ result.add_resource(rule.type, value)
355
+ except Exception as e:
356
+ result.errors.append(f"Discovery error: {str(e)}")
357
+
358
+ # Add manual resources
359
+ for resource_type, resources in spec.manual_resources.items():
360
+ for resource in resources:
361
+ result.add_resource(resource_type, resource)
362
+
363
+ return result
@@ -0,0 +1,142 @@
1
+ """Shared HTTP client utilities with best practices for monitoring."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import random
6
+ from collections.abc import Callable
7
+ from typing import Any
8
+
9
+ import httpx
10
+ from httpx import Timeout
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Default timeout configuration for monitoring
15
+ DEFAULT_TIMEOUT = Timeout(
16
+ connect=5.0, # Connection establishment
17
+ read=10.0, # Reading response
18
+ write=5.0, # Sending request
19
+ pool=2.0, # Pool acquisition
20
+ )
21
+
22
+ # Default connection limits
23
+ DEFAULT_LIMITS = httpx.Limits(
24
+ max_connections=20,
25
+ max_keepalive_connections=10,
26
+ keepalive_expiry=15.0,
27
+ )
28
+
29
+
30
+ def create_client(
31
+ timeout: Timeout | None = None,
32
+ limits: httpx.Limits | None = None,
33
+ ) -> httpx.AsyncClient:
34
+ """Create an AsyncClient with sensible defaults for monitoring."""
35
+ return httpx.AsyncClient(
36
+ timeout=timeout or DEFAULT_TIMEOUT,
37
+ limits=limits or DEFAULT_LIMITS,
38
+ )
39
+
40
+
41
+ async def retry_with_backoff(
42
+ func: Callable,
43
+ max_retries: int = 3,
44
+ base_delay: float = 1.0,
45
+ max_delay: float = 32.0,
46
+ jitter: bool = True,
47
+ ) -> Any:
48
+ """
49
+ Retry a coroutine with exponential backoff.
50
+
51
+ For rate limiting (429), respects Retry-After header if available.
52
+ For other transient errors, uses exponential backoff with optional jitter.
53
+ """
54
+ last_exception = None
55
+
56
+ for attempt in range(max_retries):
57
+ try:
58
+ return await func()
59
+ except httpx.HTTPStatusError as e:
60
+ if e.response.status_code == 429:
61
+ # Respect Retry-After header if present
62
+ retry_after = e.response.headers.get("retry-after")
63
+ if retry_after:
64
+ try:
65
+ delay = float(retry_after)
66
+ except ValueError:
67
+ delay = base_delay * (2**attempt)
68
+ else:
69
+ delay = base_delay * (2**attempt)
70
+
71
+ logger.info(f"Rate limited. Retrying after {delay}s")
72
+ await asyncio.sleep(delay)
73
+ elif 500 <= e.response.status_code < 600:
74
+ # Server error - retry with backoff
75
+ delay = min(base_delay * (2**attempt), max_delay)
76
+ if jitter:
77
+ delay *= 0.5 + random.random()
78
+
79
+ logger.info(
80
+ f"Server error {e.response.status_code}. "
81
+ f"Retrying in {delay:.2f}s (attempt {attempt + 1}/{max_retries})"
82
+ )
83
+ await asyncio.sleep(delay)
84
+ else:
85
+ # Client error or other - don't retry
86
+ raise
87
+ last_exception = e
88
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
89
+ # Network/timeout error - retry with backoff
90
+ delay = min(base_delay * (2**attempt), max_delay)
91
+ if jitter:
92
+ delay *= 0.5 + random.random()
93
+
94
+ logger.info(
95
+ f"Network/timeout error. "
96
+ f"Retrying in {delay:.2f}s (attempt {attempt + 1}/{max_retries})"
97
+ )
98
+ await asyncio.sleep(delay)
99
+ last_exception = e
100
+ except httpx.RequestError as e:
101
+ # Other request errors - retry with backoff
102
+ delay = min(base_delay * (2**attempt), max_delay)
103
+ if jitter:
104
+ delay *= 0.5 + random.random()
105
+
106
+ logger.info(
107
+ f"Request error. Retrying in {delay:.2f}s (attempt {attempt + 1}/{max_retries})"
108
+ )
109
+ await asyncio.sleep(delay)
110
+ last_exception = e
111
+
112
+ if last_exception:
113
+ raise last_exception
114
+
115
+
116
+ def classify_error(exception: Exception, status_code: int | None = None) -> str:
117
+ """
118
+ Classify error severity for monitoring purposes.
119
+
120
+ Returns: 'transient', 'permanent', or 'rate_limited'
121
+ """
122
+ # Rate limiting
123
+ if status_code in [429, 503]:
124
+ return "rate_limited"
125
+
126
+ # Server errors (5xx) are typically transient
127
+ if status_code and 500 <= status_code < 600:
128
+ return "transient"
129
+
130
+ # Client errors (4xx) except 429 are permanent
131
+ if status_code and 400 <= status_code < 500:
132
+ return "permanent"
133
+
134
+ # Network errors are transient
135
+ if isinstance(exception, (httpx.ConnectError, httpx.TimeoutException)):
136
+ return "transient"
137
+
138
+ # Connection reset, read errors are transient
139
+ if isinstance(exception, httpx.RequestError):
140
+ return "transient"
141
+
142
+ return "permanent"