PyPI - devguard - Versions diffs - 0.2.0__py3-none-any.whl - Mend

devguard 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

devguard/INTEGRATION_SUMMARY.md +121 -0
devguard/__init__.py +3 -0
devguard/__main__.py +6 -0
devguard/checkers/__init__.py +41 -0
devguard/checkers/api_usage.py +523 -0
devguard/checkers/aws_cost.py +331 -0
devguard/checkers/aws_iam.py +284 -0
devguard/checkers/base.py +25 -0
devguard/checkers/container.py +137 -0
devguard/checkers/domain.py +189 -0
devguard/checkers/firecrawl.py +117 -0
devguard/checkers/fly.py +225 -0
devguard/checkers/github.py +210 -0
devguard/checkers/npm.py +327 -0
devguard/checkers/npm_security.py +244 -0
devguard/checkers/redteam.py +290 -0
devguard/checkers/secret.py +279 -0
devguard/checkers/swarm.py +376 -0
devguard/checkers/tailscale.py +143 -0
devguard/checkers/tailsnitch.py +303 -0
devguard/checkers/tavily.py +179 -0
devguard/checkers/vercel.py +192 -0
devguard/cli.py +1510 -0
devguard/cli_helpers.py +189 -0
devguard/config.py +249 -0
devguard/core.py +293 -0
devguard/dashboard.py +715 -0
devguard/discovery.py +363 -0
devguard/http_client.py +142 -0
devguard/llm_service.py +481 -0
devguard/mcp_server.py +259 -0
devguard/metrics.py +144 -0
devguard/models.py +208 -0
devguard/reporting.py +1571 -0
devguard/sarif.py +295 -0
devguard/scripts/ANALYSIS_SUMMARY.md +141 -0
devguard/scripts/README.md +221 -0
devguard/scripts/auto_fix_recommendations.py +145 -0
devguard/scripts/generate_npmignore.py +175 -0
devguard/scripts/generate_security_report.py +324 -0
devguard/scripts/prepublish_check.sh +29 -0
devguard/scripts/redteam_npm_packages.py +1262 -0
devguard/scripts/review_all_repos.py +300 -0
devguard/spec.py +617 -0
devguard/sweeps/__init__.py +23 -0
devguard/sweeps/ai_editor_config_audit.py +697 -0
devguard/sweeps/cargo_publish_audit.py +655 -0
devguard/sweeps/dependency_audit.py +419 -0
devguard/sweeps/gitignore_audit.py +336 -0
devguard/sweeps/local_dev.py +260 -0
devguard/sweeps/local_dirty_worktree_secrets.py +521 -0
devguard/sweeps/project_flaudit.py +636 -0
devguard/sweeps/public_github_secrets.py +680 -0
devguard/sweeps/publish_audit.py +478 -0
devguard/sweeps/ssh_key_audit.py +327 -0
devguard/utils.py +174 -0
devguard-0.2.0.dist-info/METADATA +225 -0
devguard-0.2.0.dist-info/RECORD +60 -0
devguard-0.2.0.dist-info/WHEEL +4 -0
devguard-0.2.0.dist-info/entry_points.txt +2 -0

devguard/discovery.py ADDED Viewed

@@ -0,0 +1,363 @@
+"""Agnostic discovery engine based on spec rules."""
+import asyncio
+import json
+import logging
+import re
+from pathlib import Path
+from typing import Any
+from devguard.spec import DiscoveryRule, MonitorSpec
+logger = logging.getLogger(__name__)
+def _parse_json_robustly(output: str) -> Any | None:
+    """Parse JSON from CLI output, handling common issues like update banners.
+    Many CLI tools (npm, gh, etc.) print non-JSON text like "Update available!"
+    before or after the actual JSON. This function extracts the JSON portion.
+    """
+    output = output.strip()
+    if not output:
+        return None
+    # Try direct parse first (fast path)
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError:
+        pass
+    # Find JSON object or array boundaries
+    # Look for first { or [ and matching last } or ]
+    obj_start = output.find("{")
+    arr_start = output.find("[")
+    if obj_start == -1 and arr_start == -1:
+        return None
+    # Determine which comes first
+    if obj_start == -1:
+        start_char, end_char = "[", "]"
+        start_idx = arr_start
+    elif arr_start == -1:
+        start_char, end_char = "{", "}"
+        start_idx = obj_start
+    else:
+        if obj_start < arr_start:
+            start_char, end_char = "{", "}"
+            start_idx = obj_start
+        else:
+            start_char, end_char = "[", "]"
+            start_idx = arr_start
+    # Find the matching end
+    end_idx = output.rfind(end_char)
+    if end_idx == -1 or end_idx <= start_idx:
+        return None
+    json_str = output[start_idx : end_idx + 1]
+    try:
+        return json.loads(json_str)
+    except json.JSONDecodeError:
+        logger.debug(f"Failed to parse extracted JSON: {json_str[:100]}...")
+        return None
+class DiscoveryResult:
+    """Results from auto-discovery."""
+    def __init__(self):
+        self.resources: dict[str, list[Any]] = {}
+        self.errors: list[str] = []
+        self.metadata: dict[str, Any] = {}
+    def add_resource(self, resource_type: str, value: Any) -> None:
+        """Add a discovered resource."""
+        if resource_type not in self.resources:
+            self.resources[resource_type] = []
+        if value not in self.resources[resource_type]:
+            self.resources[resource_type].append(value)
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "resources": self.resources,
+            "errors": self.errors,
+            "metadata": self.metadata,
+        }
+async def execute_cli_command(
+    command: str, parser: str, extract_path: str | None, timeout: int, username: str | None = None
+) -> list[Any]:
+    """Execute a CLI command and parse results."""
+    results = []
+    # Replace {username} placeholder if present
+    if username and "{username}" in command:
+        command = command.replace("{username}", username)
+    try:
+        # Split command into parts
+        cmd_parts = command.split()
+        if not cmd_parts:
+            return results
+        result = await asyncio.wait_for(
+            asyncio.create_subprocess_exec(
+                *cmd_parts,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            ),
+            timeout=timeout,
+        )
+        stdout, stderr = await result.communicate()
+        if result.returncode != 0:
+            logger.debug(f"Command failed: {command} (exit code {result.returncode})")
+            return results
+        output = stdout.decode().strip()
+        if parser == "json":
+            data = _parse_json_robustly(output)
+            if data is not None:
+                if extract_path:
+                    results = _extract_json_path(data, extract_path)
+                else:
+                    results = [data] if data else []
+        elif parser == "json_lines":
+            for line in output.split("\n"):
+                if line.strip():
+                    try:
+                        data = json.loads(line)
+                        results.append(data)
+                    except json.JSONDecodeError:
+                        pass
+        elif parser == "lines":
+            results = [line.strip() for line in output.split("\n") if line.strip()]
+        elif parser == "text":
+            if output:
+                results = [output]
+        else:
+            logger.warning(f"Unknown parser: {parser}")
+    except TimeoutError:
+        logger.warning(f"Command timed out: {command}")
+    except Exception as e:
+        logger.debug(f"Error executing command: {command}: {e}")
+    return results
+def _extract_json_path(data: Any, path: str) -> list[Any]:
+    """Extract values from JSON using a simple path syntax."""
+    results = []
+    # Simple path extraction
+    # Supports: "key", "key.subkey", "[].key", "dependencies.keys()"
+    try:
+        if path.endswith(".keys()"):
+            # Extract keys from a dict
+            key_path = path[:-7]
+            obj = _get_json_value(data, key_path)
+            if isinstance(obj, dict):
+                results = list(obj.keys())
+        elif path.startswith("[].") or path.startswith("[]."):
+            # Array extraction
+            key = path[3:]
+            if isinstance(data, list):
+                for item in data:
+                    value = _get_json_value(item, key)
+                    if value is not None:
+                        results.append(value)
+        else:
+            value = _get_json_value(data, path)
+            if value is not None:
+                results = [value] if not isinstance(value, list) else value
+    except Exception as e:
+        logger.debug(f"Error extracting JSON path {path}: {e}")
+    return results
+def _get_json_value(data: Any, path: str) -> Any:
+    """Get a value from nested JSON using dot notation."""
+    parts = path.split(".")
+    current = data
+    for part in parts:
+        if isinstance(current, dict):
+            current = current.get(part)
+        elif isinstance(current, list) and part.isdigit():
+            current = current[int(part)]
+        else:
+            return None
+        if current is None:
+            return None
+    return current
+async def scan_files(
+    base_path: Path,
+    pattern: str,
+    extractor: str,
+    extract_path: str | None,
+    timeout: int,
+) -> list[Any]:
+    """Scan files matching a pattern and extract data."""
+    results = []
+    start_time = asyncio.get_event_loop().time()
+    try:
+        # Expand ~ in pattern
+        if pattern.startswith("~/"):
+            pattern = str(Path.home() / pattern[2:])
+        elif not pattern.startswith("/"):
+            # Relative to base_path
+            search_path = base_path / pattern
+        else:
+            search_path = Path(pattern)
+        # Handle glob patterns
+        if "**" in pattern or "*" in pattern:
+            for file_path in base_path.rglob(
+                pattern.replace("**/", "").replace("~", str(Path.home()))
+            ):
+                if (asyncio.get_event_loop().time() - start_time) > timeout:
+                    break
+                try:
+                    extracted = _extract_from_file(file_path, extractor, extract_path)
+                    if extracted:
+                        results.extend(extracted if isinstance(extracted, list) else [extracted])
+                except Exception as e:
+                    logger.debug(f"Error processing {file_path}: {e}")
+        else:
+            # Single file
+            if search_path.exists():
+                extracted = _extract_from_file(search_path, extractor, extract_path)
+                if extracted:
+                    results.extend(extracted if isinstance(extracted, list) else [extracted])
+    except Exception as e:
+        logger.warning(f"Error scanning files: {e}")
+    return results
+def _extract_from_file(file_path: Path, extractor: str, extract_path: str | None) -> Any:
+    """Extract data from a file based on extractor type."""
+    try:
+        content = file_path.read_text()
+        if extractor == "json_path":
+            data = json.loads(content)
+            if extract_path:
+                return _extract_json_path(data, extract_path)
+            return data
+        elif extractor == "yaml_path":
+            import yaml
+            data = yaml.safe_load(content)
+            if extract_path:
+                return _extract_json_path(data, extract_path)  # Same logic works for YAML
+            return data
+        elif extractor == "regex":
+            if extract_path:
+                matches = re.findall(extract_path, content)
+                return list(set(matches))  # Remove duplicates
+            return []
+        elif extractor == "raw":
+            return content.strip()
+        else:
+            logger.warning(f"Unknown extractor: {extractor}")
+            return None
+    except Exception as e:
+        logger.debug(f"Error extracting from {file_path}: {e}")
+        return None
+async def discover_from_rule(
+    rule: DiscoveryRule, base_path: Path | None = None, username: str | None = None
+) -> list[Any]:
+    """Discover resources using a single rule."""
+    if not rule.enabled:
+        return []
+    if base_path is None:
+        base_path = Path.home() / "Documents" / "dev"
+    if rule.method == "cli":
+        if not rule.command:
+            logger.warning(f"Rule {rule.name} has method=cli but no command")
+            return []
+        return await execute_cli_command(
+            rule.command, rule.command_parser or "text", rule.extract_path, rule.timeout, username
+        )
+    elif rule.method == "file_scan":
+        if not rule.file_pattern:
+            logger.warning(f"Rule {rule.name} has method=file_scan but no file_pattern")
+            return []
+        return await scan_files(
+            base_path,
+            rule.file_pattern,
+            rule.file_extractor or "raw",
+            rule.extract_path,
+            rule.timeout,
+        )
+    elif rule.method == "api":
+        # API-based discovery would go here
+        logger.warning(f"API method not yet implemented for rule {rule.name}")
+        return []
+    elif rule.method == "custom":
+        # Custom discovery would go here
+        logger.warning(f"Custom method not yet implemented for rule {rule.name}")
+        return []
+    else:
+        logger.warning(f"Unknown method: {rule.method} for rule {rule.name}")
+        return []
+async def discover_all(
+    spec: MonitorSpec, base_path: Path | None = None, username: str | None = None
+) -> DiscoveryResult:
+    """Run all discovery rules from a spec."""
+    result = DiscoveryResult()
+    # Get username if needed
+    if not username:
+        # Try to get from a username discovery rule
+        username_rules = [r for r in spec.discovery_rules if r.type == "username"]
+        if username_rules:
+            try:
+                username_results = await discover_from_rule(username_rules[0], base_path)
+                if username_results:
+                    username = username_results[0]
+            except Exception:
+                pass
+    # Run all discovery rules
+    tasks = []
+    for rule in spec.discovery_rules:
+        if rule.type != "username":  # Already handled
+            tasks.append(discover_from_rule(rule, base_path, username))
+    try:
+        rule_results = await asyncio.gather(*tasks, return_exceptions=True)
+        for rule, rule_result in zip(spec.discovery_rules, rule_results):
+            if isinstance(rule_result, Exception):
+                result.errors.append(f"{rule.name}: {str(rule_result)}")
+            elif isinstance(rule_result, list):
+                for value in rule_result:
+                    result.add_resource(rule.type, value)
+    except Exception as e:
+        result.errors.append(f"Discovery error: {str(e)}")
+    # Add manual resources
+    for resource_type, resources in spec.manual_resources.items():
+        for resource in resources:
+            result.add_resource(resource_type, resource)
+    return result

devguard/http_client.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""Shared HTTP client utilities with best practices for monitoring."""
+import asyncio
+import logging
+import random
+from collections.abc import Callable
+from typing import Any
+import httpx
+from httpx import Timeout
+logger = logging.getLogger(__name__)
+# Default timeout configuration for monitoring
+DEFAULT_TIMEOUT = Timeout(
+    connect=5.0,  # Connection establishment
+    read=10.0,  # Reading response
+    write=5.0,  # Sending request
+    pool=2.0,  # Pool acquisition
+)
+# Default connection limits
+DEFAULT_LIMITS = httpx.Limits(
+    max_connections=20,
+    max_keepalive_connections=10,
+    keepalive_expiry=15.0,
+)
+def create_client(
+    timeout: Timeout | None = None,
+    limits: httpx.Limits | None = None,
+) -> httpx.AsyncClient:
+    """Create an AsyncClient with sensible defaults for monitoring."""
+    return httpx.AsyncClient(
+        timeout=timeout or DEFAULT_TIMEOUT,
+        limits=limits or DEFAULT_LIMITS,
+    )
+async def retry_with_backoff(
+    func: Callable,
+    max_retries: int = 3,
+    base_delay: float = 1.0,
+    max_delay: float = 32.0,
+    jitter: bool = True,
+) -> Any:
+    """
+    Retry a coroutine with exponential backoff.
+    For rate limiting (429), respects Retry-After header if available.
+    For other transient errors, uses exponential backoff with optional jitter.
+    """
+    last_exception = None
+    for attempt in range(max_retries):
+        try:
+            return await func()
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 429:
+                # Respect Retry-After header if present
+                retry_after = e.response.headers.get("retry-after")
+                if retry_after:
+                    try:
+                        delay = float(retry_after)
+                    except ValueError:
+                        delay = base_delay * (2**attempt)
+                else:
+                    delay = base_delay * (2**attempt)
+                logger.info(f"Rate limited. Retrying after {delay}s")
+                await asyncio.sleep(delay)
+            elif 500 <= e.response.status_code < 600:
+                # Server error - retry with backoff
+                delay = min(base_delay * (2**attempt), max_delay)
+                if jitter:
+                    delay *= 0.5 + random.random()
+                logger.info(
+                    f"Server error {e.response.status_code}. "
+                    f"Retrying in {delay:.2f}s (attempt {attempt + 1}/{max_retries})"
+                )
+                await asyncio.sleep(delay)
+            else:
+                # Client error or other - don't retry
+                raise
+            last_exception = e
+        except (httpx.ConnectError, httpx.TimeoutException) as e:
+            # Network/timeout error - retry with backoff
+            delay = min(base_delay * (2**attempt), max_delay)
+            if jitter:
+                delay *= 0.5 + random.random()
+            logger.info(
+                f"Network/timeout error. "
+                f"Retrying in {delay:.2f}s (attempt {attempt + 1}/{max_retries})"
+            )
+            await asyncio.sleep(delay)
+            last_exception = e
+        except httpx.RequestError as e:
+            # Other request errors - retry with backoff
+            delay = min(base_delay * (2**attempt), max_delay)
+            if jitter:
+                delay *= 0.5 + random.random()
+            logger.info(
+                f"Request error. Retrying in {delay:.2f}s (attempt {attempt + 1}/{max_retries})"
+            )
+            await asyncio.sleep(delay)
+            last_exception = e
+    if last_exception:
+        raise last_exception
+def classify_error(exception: Exception, status_code: int | None = None) -> str:
+    """
+    Classify error severity for monitoring purposes.
+    Returns: 'transient', 'permanent', or 'rate_limited'
+    """
+    # Rate limiting
+    if status_code in [429, 503]:
+        return "rate_limited"
+    # Server errors (5xx) are typically transient
+    if status_code and 500 <= status_code < 600:
+        return "transient"
+    # Client errors (4xx) except 429 are permanent
+    if status_code and 400 <= status_code < 500:
+        return "permanent"
+    # Network errors are transient
+    if isinstance(exception, (httpx.ConnectError, httpx.TimeoutException)):
+        return "transient"
+    # Connection reset, read errors are transient
+    if isinstance(exception, httpx.RequestError):
+        return "transient"
+    return "permanent"