PyPI - agent-audit - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agent-audit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

agent_audit/__init__.py +3 -0
agent_audit/__main__.py +13 -0
agent_audit/cli/__init__.py +1 -0
agent_audit/cli/commands/__init__.py +1 -0
agent_audit/cli/commands/init.py +44 -0
agent_audit/cli/commands/inspect.py +236 -0
agent_audit/cli/commands/scan.py +329 -0
agent_audit/cli/formatters/__init__.py +1 -0
agent_audit/cli/formatters/json.py +138 -0
agent_audit/cli/formatters/sarif.py +155 -0
agent_audit/cli/formatters/terminal.py +221 -0
agent_audit/cli/main.py +34 -0
agent_audit/config/__init__.py +1 -0
agent_audit/config/ignore.py +477 -0
agent_audit/core_utils/__init__.py +1 -0
agent_audit/models/__init__.py +18 -0
agent_audit/models/finding.py +159 -0
agent_audit/models/risk.py +77 -0
agent_audit/models/tool.py +182 -0
agent_audit/rules/__init__.py +6 -0
agent_audit/rules/engine.py +503 -0
agent_audit/rules/loader.py +160 -0
agent_audit/scanners/__init__.py +5 -0
agent_audit/scanners/base.py +32 -0
agent_audit/scanners/config_scanner.py +390 -0
agent_audit/scanners/mcp_config_scanner.py +321 -0
agent_audit/scanners/mcp_inspector.py +421 -0
agent_audit/scanners/python_scanner.py +544 -0
agent_audit/scanners/secret_scanner.py +521 -0
agent_audit/utils/__init__.py +21 -0
agent_audit/utils/compat.py +98 -0
agent_audit/utils/mcp_client.py +343 -0
agent_audit/version.py +3 -0
agent_audit-0.1.0.dist-info/METADATA +219 -0
agent_audit-0.1.0.dist-info/RECORD +37 -0
agent_audit-0.1.0.dist-info/WHEEL +4 -0
agent_audit-0.1.0.dist-info/entry_points.txt +3 -0

agent_audit/scanners/secret_scanner.py ADDED Viewed

@@ -0,0 +1,521 @@
+"""Secret scanner for detecting hardcoded credentials."""
+import fnmatch
+import re
+import logging
+from pathlib import Path
+from typing import List, Optional, Pattern, Tuple
+from dataclasses import dataclass, field
+from agent_audit.scanners.base import BaseScanner, ScanResult
+logger = logging.getLogger(__name__)
+@dataclass
+class SecretMatch:
+    """A detected secret."""
+    pattern_name: str
+    line_number: int
+    line_content: str
+    matched_text: str
+    start_col: int
+    end_col: int
+    severity: str  # critical, high, medium
+@dataclass
+class SecretScanResult(ScanResult):
+    """Result of secret scanning."""
+    secrets: List[SecretMatch] = field(default_factory=list)
+class SecretScanner(BaseScanner):
+    """
+    Regex-based secret detection scanner.
+    Detects:
+    - AWS access keys
+    - API keys (OpenAI, Anthropic, GitHub, etc.)
+    - Generic tokens and passwords
+    - Private keys
+    """
+    name = "Secret Scanner"
+    # Secret patterns with severity levels
+    SECRET_PATTERNS: List[Tuple[Pattern, str, str]] = [
+        # AWS
+        (re.compile(r'AKIA[0-9A-Z]{16}'), "AWS Access Key ID", "critical"),
+        (re.compile(r'(?<![A-Za-z0-9/+=])[A-Za-z0-9/+=]{40}(?![A-Za-z0-9/+=])'),
+         "Potential AWS Secret Key", "high"),
+        # OpenAI
+        (re.compile(r'sk-[a-zA-Z0-9]{48,}'), "OpenAI API Key", "critical"),
+        (re.compile(r'sk-proj-[a-zA-Z0-9]{48,}'), "OpenAI Project API Key", "critical"),
+        # Anthropic
+        (re.compile(r'sk-ant-[a-zA-Z0-9-]{40,}'), "Anthropic API Key", "critical"),
+        # GitHub
+        (re.compile(r'ghp_[a-zA-Z0-9]{36}'), "GitHub Personal Access Token", "critical"),
+        (re.compile(r'gho_[a-zA-Z0-9]{36}'), "GitHub OAuth Token", "critical"),
+        (re.compile(r'ghs_[a-zA-Z0-9]{36}'), "GitHub App Token", "critical"),
+        (re.compile(r'ghr_[a-zA-Z0-9]{36}'), "GitHub Refresh Token", "critical"),
+        # Google
+        (re.compile(r'AIza[0-9A-Za-z\-_]{35}'), "Google API Key", "critical"),
+        # Stripe
+        (re.compile(r'sk_live_[a-zA-Z0-9]{24,}'), "Stripe Live Secret Key", "critical"),
+        (re.compile(r'sk_test_[a-zA-Z0-9]{24,}'), "Stripe Test Secret Key", "high"),
+        (re.compile(r'pk_live_[a-zA-Z0-9]{24,}'), "Stripe Live Publishable Key", "medium"),
+        # Generic patterns
+        (re.compile(r'(?i)(api[_-]?key|apikey)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?'),
+         "Generic API Key", "high"),
+        (re.compile(r'(?i)(secret|password|passwd|pwd)\s*[=:]\s*["\']?([^\s"\']{8,})["\']?'),
+         "Generic Secret/Password", "high"),
+        (re.compile(r'(?i)(token|auth[_-]?token)\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{20,})["\']?'),
+         "Generic Token", "high"),
+        # Private keys
+        (re.compile(r'-----BEGIN (?:RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----'),
+         "Private Key Header", "critical"),
+        (re.compile(r'-----BEGIN PGP PRIVATE KEY BLOCK-----'),
+         "PGP Private Key", "critical"),
+        # Database connection strings
+        (re.compile(r'(?i)(?:mysql|postgres|postgresql|mongodb|redis)://[^\s"\']+:[^\s"\']+@'),
+         "Database Connection String with Credentials", "critical"),
+        # JWT secrets
+        (re.compile(r'(?i)jwt[_-]?secret\s*[=:]\s*["\']?([a-zA-Z0-9_\-]{16,})["\']?'),
+         "JWT Secret", "high"),
+        # Slack
+        (re.compile(r'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*'),
+         "Slack Token", "critical"),
+        # Twilio
+        (re.compile(r'SK[a-f0-9]{32}'), "Twilio API Key", "critical"),
+        # SendGrid
+        (re.compile(r'SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}'),
+         "SendGrid API Key", "critical"),
+    ]
+    # File extensions to scan
+    SCANNABLE_EXTENSIONS = {
+        '.py', '.js', '.ts', '.jsx', '.tsx', '.json', '.yaml', '.yml',
+        '.env', '.cfg', '.conf', '.config', '.ini', '.properties',
+        '.sh', '.bash', '.zsh', '.toml', '.xml', '.md', '.txt'
+    }
+    # Files to always skip
+    SKIP_FILES = {
+        'package-lock.json', 'yarn.lock', 'poetry.lock',
+        'Cargo.lock', 'go.sum', 'pnpm-lock.yaml'
+    }
+    def __init__(
+        self,
+        exclude_paths: Optional[List[str]] = None,
+        custom_patterns: Optional[List[Tuple[str, str, str]]] = None
+    ):
+        """
+        Initialize the secret scanner.
+        Args:
+            exclude_paths: Path patterns to exclude
+            custom_patterns: Additional patterns as (regex, name, severity) tuples
+        """
+        self.exclude_paths = set(exclude_paths or [])
+        self.patterns = list(self.SECRET_PATTERNS)
+        # Add custom patterns
+        if custom_patterns:
+            for regex_str, name, severity in custom_patterns:
+                self.patterns.append((re.compile(regex_str), name, severity))
+    def scan(self, path: Path) -> List[SecretScanResult]:
+        """
+        Scan for secrets in files.
+        Args:
+            path: File or directory to scan
+        Returns:
+            List of scan results
+        """
+        results = []
+        files = self._find_files(path)
+        for file_path in files:
+            result = self._scan_file(file_path)
+            if result and result.secrets:
+                results.append(result)
+        return results
+    def _find_files(self, path: Path) -> List[Path]:
+        """Find files to scan."""
+        if path.is_file():
+            if self._should_scan_file(path):
+                return [path]
+            return []
+        files = []
+        for file_path in path.rglob('*'):
+            if not file_path.is_file():
+                continue
+            if not self._should_scan_file(file_path):
+                continue
+            # Check exclude patterns using glob matching
+            rel_path = str(file_path.relative_to(path))
+            if self._should_exclude(rel_path):
+                continue
+            files.append(file_path)
+        return files
+    def _should_exclude(self, rel_path: str) -> bool:
+        """Check if a relative path matches any exclude pattern."""
+        # Normalize path separators
+        normalized_path = rel_path.replace('\\', '/')
+        for pattern in self.exclude_paths:
+            normalized_pattern = pattern.replace('\\', '/')
+            # Simple substring matching (backward compatibility)
+            if normalized_pattern in normalized_path:
+                return True
+            # Direct fnmatch for glob patterns
+            if fnmatch.fnmatch(normalized_path, normalized_pattern):
+                return True
+            # Handle "tests/**" style patterns
+            if normalized_pattern.endswith('/**'):
+                prefix = normalized_pattern[:-3]
+                if normalized_path.startswith(prefix + '/') or normalized_path == prefix:
+                    return True
+            # Handle "**/test_*" style patterns
+            if normalized_pattern.startswith('**/'):
+                suffix_pattern = normalized_pattern[3:]
+                # Match against filename
+                filename = Path(normalized_path).name
+                if fnmatch.fnmatch(filename, suffix_pattern):
+                    return True
+                # Match against any path segment
+                for part in Path(normalized_path).parts:
+                    if fnmatch.fnmatch(part, suffix_pattern):
+                        return True
+        return False
+    def _should_scan_file(self, file_path: Path) -> bool:
+        """Check if a file should be scanned."""
+        # Skip known non-secret files
+        if file_path.name in self.SKIP_FILES:
+            return False
+        # Skip hidden directories
+        if any(part.startswith('.') and part not in {'.env'}
+              for part in file_path.parts[:-1]):
+            return False
+        # Skip common non-source directories
+        skip_dirs = {'node_modules', 'venv', '.venv', '__pycache__',
+                    'dist', 'build', '.git'}
+        if any(part in skip_dirs for part in file_path.parts):
+            return False
+        # Check extension
+        if file_path.suffix.lower() in self.SCANNABLE_EXTENSIONS:
+            return True
+        # Also scan .env files regardless of extension
+        if '.env' in file_path.name:
+            return True
+        return False
+    def _scan_file(self, file_path: Path) -> Optional[SecretScanResult]:
+        """Scan a single file for secrets."""
+        try:
+            content = file_path.read_text(encoding='utf-8', errors='ignore')
+        except Exception as e:
+            logger.warning(f"Error reading {file_path}: {e}")
+            return None
+        secrets = []
+        lines = content.splitlines()
+        for line_num, line in enumerate(lines, start=1):
+            # Skip empty lines and comments
+            stripped = line.strip()
+            if not stripped or stripped.startswith('#') or stripped.startswith('//'):
+                continue
+            # Check each pattern
+            for pattern, name, severity in self.patterns:
+                for match in pattern.finditer(line):
+                    # Filter out false positives
+                    if self._is_false_positive(line, match, file_path):
+                        continue
+                    secret = SecretMatch(
+                        pattern_name=name,
+                        line_number=line_num,
+                        line_content=self._mask_secret(line, match),
+                        matched_text=self._mask_match(match.group()),
+                        start_col=match.start(),
+                        end_col=match.end(),
+                        severity=severity
+                    )
+                    secrets.append(secret)
+        return SecretScanResult(
+            source_file=str(file_path),
+            secrets=secrets
+        )
+    def _is_false_positive(
+        self,
+        line: str,
+        match: re.Match,
+        file_path: Path
+    ) -> bool:
+        """
+        Check if a match is likely a false positive.
+        Filters out:
+        - Example/placeholder values
+        - Test fixtures
+        - Documentation
+        - Variable/class/function names containing keywords
+        - Environment variable lookups
+        - Secure wrappers (SecretStr, etc.)
+        """
+        matched_text = match.group().lower()
+        line_lower = line.lower()
+        stripped = line.strip()
+        # Common placeholder patterns
+        placeholders = [
+            'example', 'placeholder', 'your_', 'my_', 'xxx',
+            'test', 'fake', 'dummy', 'sample', 'demo', '<your',
+            'insert_', 'replace_', 'changeme', 'undefined'
+        ]
+        if any(p in matched_text for p in placeholders):
+            return True
+        # Check if this looks like documentation
+        if '# example' in line_lower or '// example' in line_lower:
+            return True
+        # Check file path for test/example indicators
+        path_str = str(file_path).lower()
+        if any(p in path_str for p in ['test', 'example', 'fixture', 'mock', 'sample']):
+            return True
+        # Skip class definitions (class FooTokenBar:)
+        if stripped.startswith('class '):
+            return True
+        # Skip function definitions (def get_api_key(...):)
+        if stripped.startswith('def '):
+            return True
+        # Skip import statements
+        if stripped.startswith('import ') or stripped.startswith('from '):
+            return True
+        # Skip type annotations (variable: SecretStr)
+        if re.search(r':\s*(Optional\[)?SecretStr', line):
+            return True
+        # Check for environment variable lookups - value is not hardcoded
+        env_patterns = [
+            r'os\.environ\.get\s*\(',
+            r'os\.environ\[',
+            r'os\.getenv\s*\(',
+            r'getenv\s*\(',
+            r'environ\.get\s*\(',
+            r'settings\.\w+',  # e.g., settings.API_KEY
+            r'config\.\w+',    # e.g., config.api_key
+            r'Config\.\w+',
+            r'get_from_\w+\s*\(',  # get_from_env, get_from_dict_or_env, etc.
+        ]
+        for pattern in env_patterns:
+            if re.search(pattern, line):
+                return True
+        # Check for secure wrappers - value is wrapped, not exposed
+        secure_wrappers = [
+            r'SecretStr\s*\(',
+            r'Secret\s*\(',
+            r'SecureString\s*\(',
+            r'Field\s*\([^)]*secret\s*=\s*True',
+        ]
+        for pattern in secure_wrappers:
+            if re.search(pattern, line, re.IGNORECASE):
+                return True
+        # For generic patterns (api_key=, token=, etc.), verify the right side
+        # is a string literal that looks like a real secret, not a variable
+        if self._is_generic_pattern_match(match):
+            if not self._has_literal_secret_value(line, match):
+                return True
+        # Check if match looks like a PascalCase class/type name (e.g., ConversationTokenBufferMemory)
+        matched_text_raw = match.group()
+        if self._looks_like_class_name(matched_text_raw, line):
+            return True
+        # Environment variable references (not actual values)
+        if '${' in line or '$(' in line:
+            if matched_text in line[match.start():match.end()+5]:
+                # Check if the match is inside a variable reference
+                before = line[:match.start()]
+                if '${' in before[-10:] or '$(' in before[-10:]:
+                    return True
+        return False
+    def _looks_like_class_name(self, matched_text: str, line: str) -> bool:
+        """
+        Check if the matched text looks like a class/type name rather than a secret.
+        PascalCase identifiers with multiple capital letters are likely class names,
+        not secrets. Real secrets don't follow PascalCase naming conventions.
+        """
+        # Check for PascalCase pattern: starts with capital, has multiple capitals
+        # Also allow all-alpha strings that follow PascalCase (no numbers, no special chars)
+        text_to_check = matched_text
+        # If match contains '=' (from AWS pattern [A-Za-z0-9/+=]), extract the part after '='
+        # This handles cases like 'factory=PairwiseStringResultOutputParser'
+        if '=' in matched_text:
+            parts = matched_text.split('=')
+            # Check if the part after = looks like a class name
+            text_to_check = parts[-1]
+        if re.match(r'^[A-Z][a-zA-Z]+$', text_to_check):
+            # Count capital letters - class names typically have several
+            capital_count = sum(1 for c in text_to_check if c.isupper())
+            if capital_count >= 2:
+                return True
+            # Check for common class name suffixes
+            class_suffixes = [
+                'Memory', 'Buffer', 'Parser', 'Handler', 'Manager',
+                'Factory', 'Builder', 'Wrapper', 'Provider', 'Service',
+                'Client', 'Server', 'Controller', 'Processor', 'Validator'
+            ]
+            if any(text_to_check.endswith(suffix) for suffix in class_suffixes):
+                return True
+        return False
+    def _is_generic_pattern_match(self, match: re.Match) -> bool:
+        """Check if this match is from a generic pattern (api_key=, token=, etc.)."""
+        pattern_str = match.re.pattern
+        # Generic patterns have the keyword group followed by = or :
+        generic_indicators = [
+            r'\(api[_-]?key|apikey\)',
+            r'\(secret|password|passwd|pwd\)',
+            r'\(token|auth[_-]?token\)',
+            r'jwt[_-]?secret',
+        ]
+        for indicator in generic_indicators:
+            if indicator in pattern_str.lower():
+                return True
+        return False
+    def _has_literal_secret_value(self, line: str, match: re.Match) -> bool:
+        """
+        Check if the matched assignment has a string literal value that looks like a secret.
+        Returns True if it looks like a real hardcoded secret, False if it's likely
+        a variable reference, function call, or non-secret value.
+        """
+        # Extract the part after the = or :
+        match_text = match.group()
+        eq_pos = -1
+        for sep in ['=', ':']:
+            pos = match_text.find(sep)
+            if pos != -1:
+                eq_pos = pos
+                break
+        if eq_pos == -1:
+            return True  # Not an assignment pattern, let other checks handle it
+        # Get the value part (after = or :)
+        value_part = match_text[eq_pos + 1:].strip()
+        # Remove leading quotes
+        if value_part.startswith('"') or value_part.startswith("'"):
+            value_part = value_part[1:]
+        if value_part.endswith('"') or value_part.endswith("'"):
+            value_part = value_part[:-1]
+        # Check if value is empty or too short
+        if len(value_part) < 8:
+            return False
+        # Check if value looks like a variable name (all lowercase/uppercase, underscores)
+        if re.match(r'^[a-z_][a-z0-9_]*$', value_part) and not any(c.isdigit() for c in value_part[-4:]):
+            return False
+        # Check if it's a common non-secret pattern
+        non_secret_patterns = [
+            r'^[A-Z_]+$',  # All caps constant name like API_KEY
+            r'^None$',
+            r'^null$',
+            r'^""$',
+            r"^''$",
+            r'^\.\.\.$',  # Ellipsis
+        ]
+        for pattern in non_secret_patterns:
+            if re.match(pattern, value_part, re.IGNORECASE):
+                return False
+        # Check for mixed characters (letters, numbers, special chars) typical of secrets
+        has_letters = bool(re.search(r'[a-zA-Z]', value_part))
+        has_numbers = bool(re.search(r'[0-9]', value_part))
+        has_special = bool(re.search(r'[-_/+=]', value_part))
+        # Real secrets typically have a mix of character types
+        char_types = sum([has_letters, has_numbers, has_special])
+        if char_types < 2 and len(value_part) < 20:
+            return False
+        return True
+    def _mask_secret(self, line: str, match: re.Match) -> str:
+        """Mask the secret value in a line for safe display."""
+        start = match.start()
+        end = match.end()
+        matched_len = end - start
+        if matched_len <= 8:
+            masked = '*' * matched_len
+        else:
+            # Show first and last 4 chars
+            original = match.group()
+            masked = original[:4] + '*' * (matched_len - 8) + original[-4:]
+        return line[:start] + masked + line[end:]
+    def _mask_match(self, text: str) -> str:
+        """Mask a matched secret for display."""
+        if len(text) <= 8:
+            return '*' * len(text)
+        return text[:4] + '*' * (len(text) - 8) + text[-4:]

agent_audit/utils/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Utilities for agent-audit."""
+from agent_audit.utils.compat import (
+    IS_WINDOWS,
+    IS_MACOS,
+    IS_LINUX,
+    normalize_path,
+    home_config_dir,
+    get_subprocess_creation_flags,
+    setup_event_loop_policy,
+)
+__all__ = [
+    "IS_WINDOWS",
+    "IS_MACOS",
+    "IS_LINUX",
+    "normalize_path",
+    "home_config_dir",
+    "get_subprocess_creation_flags",
+    "setup_event_loop_policy",
+]

agent_audit/utils/compat.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Cross-platform compatibility utilities.
+This module provides platform detection and path handling utilities
+to ensure the tool works correctly on Windows, macOS, and Linux.
+"""
+import sys
+from pathlib import Path
+from typing import Union
+# Platform detection constants
+IS_WINDOWS = sys.platform == "win32"
+IS_MACOS = sys.platform == "darwin"
+IS_LINUX = sys.platform.startswith("linux")
+def normalize_path(path: Union[str, Path]) -> str:
+    """
+    Normalize a path to use forward slashes consistently.
+    This ensures that file paths stored in findings and reports
+    use consistent forward slashes across all platforms, making
+    output deterministic and comparable.
+    Args:
+        path: Path to normalize (string or Path object)
+    Returns:
+        Normalized path string with forward slashes
+    """
+    path_str = str(path)
+    # Always use forward slashes for consistency in outputs
+    return path_str.replace("\\", "/")
+def home_config_dir() -> Path:
+    """
+    Get the appropriate configuration directory for the current platform.
+    Returns:
+        - Windows: %APPDATA%/agent-audit
+        - macOS: ~/Library/Application Support/agent-audit
+        - Linux: ~/.config/agent-audit
+    Falls back to ~/.agent-audit if the platform-specific directory
+    cannot be determined.
+    """
+    if IS_WINDOWS:
+        # Use APPDATA on Windows
+        import os
+        appdata = os.environ.get("APPDATA")
+        if appdata:
+            return Path(appdata) / "agent-audit"
+        # Fallback to home directory
+        return Path.home() / ".agent-audit"
+    elif IS_MACOS:
+        # Use Library/Application Support on macOS
+        return Path.home() / "Library" / "Application Support" / "agent-audit"
+    else:
+        # Use XDG_CONFIG_HOME on Linux, fallback to ~/.config
+        import os
+        xdg_config = os.environ.get("XDG_CONFIG_HOME")
+        if xdg_config:
+            return Path(xdg_config) / "agent-audit"
+        return Path.home() / ".config" / "agent-audit"
+def get_subprocess_creation_flags() -> int:
+    """
+    Get the appropriate subprocess creation flags for the current platform.
+    On Windows, returns CREATE_NO_WINDOW to prevent console windows from
+    appearing when running background processes. On other platforms, returns 0.
+    Returns:
+        Creation flags for subprocess.Popen or asyncio.create_subprocess_exec
+    """
+    if IS_WINDOWS:
+        # CREATE_NO_WINDOW = 0x08000000
+        # Prevents console window from appearing
+        return 0x08000000
+    return 0
+def setup_event_loop_policy():
+    """
+    Configure the asyncio event loop policy for the current platform.
+    On Windows, sets WindowsSelectorEventLoopPolicy to avoid issues with
+    ProactorEventLoop and subprocesses. This should be called early in
+    the application startup.
+    """
+    if IS_WINDOWS:
+        import asyncio
+        # WindowsSelectorEventLoopPolicy is more compatible with subprocess operations
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())