PyPI - iam-policy-validator - Versions diffs - 1.13.1__py3-none-any.whl → 1.14.1__py3-none-any.whl - Mend

iam-policy-validator 1.13.1py3-none-any.whl → 1.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/METADATA +1 -1
{iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/RECORD +45 -39
iam_validator/__version__.py +1 -1
iam_validator/checks/action_condition_enforcement.py +6 -0
iam_validator/checks/action_resource_matching.py +12 -12
iam_validator/checks/action_validation.py +1 -0
iam_validator/checks/condition_key_validation.py +2 -0
iam_validator/checks/condition_type_mismatch.py +3 -0
iam_validator/checks/full_wildcard.py +1 -0
iam_validator/checks/mfa_condition_check.py +2 -0
iam_validator/checks/policy_structure.py +9 -0
iam_validator/checks/policy_type_validation.py +11 -0
iam_validator/checks/principal_validation.py +5 -0
iam_validator/checks/resource_validation.py +4 -0
iam_validator/checks/sensitive_action.py +1 -0
iam_validator/checks/service_wildcard.py +6 -3
iam_validator/checks/set_operator_validation.py +3 -0
iam_validator/checks/sid_uniqueness.py +2 -0
iam_validator/checks/trust_policy_validation.py +3 -0
iam_validator/checks/utils/__init__.py +16 -0
iam_validator/checks/utils/action_parser.py +149 -0
iam_validator/checks/wildcard_action.py +1 -0
iam_validator/checks/wildcard_resource.py +231 -4
iam_validator/commands/analyze.py +19 -1
iam_validator/commands/completion.py +6 -2
iam_validator/commands/validate.py +231 -12
iam_validator/core/aws_service/fetcher.py +21 -9
iam_validator/core/codeowners.py +245 -0
iam_validator/core/config/check_documentation.py +390 -0
iam_validator/core/config/config_loader.py +199 -0
iam_validator/core/config/defaults.py +25 -0
iam_validator/core/constants.py +1 -0
iam_validator/core/diff_parser.py +8 -4
iam_validator/core/finding_fingerprint.py +131 -0
iam_validator/core/formatters/sarif.py +370 -128
iam_validator/core/ignore_processor.py +309 -0
iam_validator/core/ignored_findings.py +400 -0
iam_validator/core/models.py +54 -4
iam_validator/core/policy_loader.py +313 -4
iam_validator/core/pr_commenter.py +223 -22
iam_validator/core/report.py +22 -6
iam_validator/integrations/github_integration.py +881 -123
{iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/WHEEL +0 -0
{iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/entry_points.txt +0 -0
{iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/licenses/LICENSE +0 -0

iam_validator/core/models.py CHANGED Viewed

@@ -168,6 +168,17 @@ class ValidationIssue(BaseModel):
     check_id: str | None = (
         None  # Check that triggered this issue (e.g., "policy_size", "sensitive_action")
     )
+    # Field that caused the issue (for precise line detection in PR comments)
+    # Values: "action", "resource", "condition", "principal", "effect", "sid"
+    field_name: str | None = None
+    # Enhanced finding quality fields (Phase 3)
+    # Explains why this issue is a security risk or compliance concern
+    risk_explanation: str | None = None
+    # Link to relevant AWS documentation or org-specific runbook
+    documentation_url: str | None = None
+    # Step-by-step remediation guidance
+    remediation_steps: list[str] | None = None
     # Severity level constants (ClassVar to avoid Pydantic treating them as fields)
     VALID_SEVERITIES: ClassVar[frozenset[str]] = frozenset(
@@ -205,11 +216,12 @@ class ValidationIssue(BaseModel):
         """Check if this issue uses IAM validity severity levels (error/warning/info)."""
         return self.severity in {"error", "warning", "info"}
-    def to_pr_comment(self, include_identifier: bool = True) -> str:
+    def to_pr_comment(self, include_identifier: bool = True, file_path: str = "") -> str:
         """Format issue as a PR comment.
         Args:
             include_identifier: Whether to include bot identifier (for cleanup)
+            file_path: Relative path to the policy file (for finding ID)
         Returns:
             Formatted comment string
@@ -235,6 +247,21 @@ class ValidationIssue(BaseModel):
             parts.append(f"{constants.BOT_IDENTIFIER}\n")
             # Add issue type identifier to allow multiple issues at same line
             parts.append(f"<!-- issue-type: {self.issue_type} -->\n")
+            # Add finding ID for ignore tracking
+            if file_path:
+                from iam_validator.core.finding_fingerprint import compute_finding_hash
+                finding_hash = compute_finding_hash(
+                    file_path=file_path,
+                    check_id=self.check_id,
+                    issue_type=self.issue_type,
+                    statement_sid=self.statement_sid,
+                    statement_index=self.statement_index,
+                    action=self.action,
+                    resource=self.resource,
+                    condition_key=self.condition_key,
+                )
+                parts.append(f"<!-- finding-id: {finding_hash} -->\n")
         # Build statement context for better navigation
         statement_context = f"Statement[{self.statement_index}]"
@@ -248,9 +275,19 @@ class ValidationIssue(BaseModel):
         # Show message immediately (not collapsed)
         parts.append(self.message)
+        # Add risk explanation if present (shown prominently)
+        if self.risk_explanation:
+            parts.append("")
+            parts.append(f"> **Why this matters:** {self.risk_explanation}")
         # Put additional details in collapsible section if there are any
         has_details = bool(
-            self.action or self.resource or self.condition_key or self.suggestion or self.example
+            self.action
+            or self.resource
+            or self.condition_key
+            or self.suggestion
+            or self.example
+            or self.remediation_steps
         )
         if has_details:
@@ -271,6 +308,13 @@ class ValidationIssue(BaseModel):
                     parts.append(f"  - Condition Key: `{self.condition_key}`")
                 parts.append("")
+            # Add remediation steps if present
+            if self.remediation_steps:
+                parts.append("**🔧 How to Fix:**")
+                for i, step in enumerate(self.remediation_steps, 1):
+                    parts.append(f"  {i}. {step}")
+                parts.append("")
             # Add suggestion if present
             if self.suggestion:
                 parts.append("**💡 Suggested Fix:**")
@@ -288,11 +332,17 @@ class ValidationIssue(BaseModel):
             parts.append("")
             parts.append("</details>")
-        # Add check ID at the bottom if available
+        # Add check ID and documentation link at the bottom
+        footer_parts = []
         if self.check_id:
+            footer_parts.append(f"*Check: `{self.check_id}`*")
+        if self.documentation_url:
+            footer_parts.append(f"[📖 Documentation]({self.documentation_url})")
+        if footer_parts:
             parts.append("")
             parts.append("---")
-            parts.append(f"*Check: `{self.check_id}`*")
+            parts.append(" | ".join(footer_parts))
         return "\n".join(parts)

iam_validator/core/policy_loader.py CHANGED Viewed

@@ -27,18 +27,123 @@ Example usage:
 import json
 import logging
+import re
 from collections.abc import Generator
+from dataclasses import dataclass, field
 from pathlib import Path
-from typing import overload
+from typing import Any, overload
 import yaml
 from pydantic import ValidationError
 from iam_validator.core.models import IAMPolicy
+@dataclass
+class StatementLineMap:
+    """Line numbers for each field in a statement.
+    Used for precise line-level PR comments on specific fields
+    (e.g., pointing to the exact Action line, not just the statement start).
+    """
+    statement_start: int  # Opening brace line
+    sid: int | None = None
+    effect: int | None = None
+    action: int | None = None
+    not_action: int | None = None
+    resource: int | None = None
+    not_resource: int | None = None
+    condition: int | None = None
+    principal: int | None = None
+    not_principal: int | None = None
+    def get_line_for_field(self, field_name: str) -> int:
+        """Get line number for a specific field, fallback to statement start.
+        Args:
+            field_name: Field name (case-insensitive): action, resource, condition, etc.
+        Returns:
+            Line number for the field, or statement_start if not found
+        """
+        field_map = {
+            "sid": self.sid,
+            "effect": self.effect,
+            "action": self.action,
+            "notaction": self.not_action,
+            "resource": self.resource,
+            "notresource": self.not_resource,
+            "condition": self.condition,
+            "principal": self.principal,
+            "notprincipal": self.not_principal,
+        }
+        line = field_map.get(field_name.lower().replace("_", ""))
+        return line if line is not None else self.statement_start
+@dataclass
+class PolicyLineMap:
+    """Line mappings for all statements in a policy file.
+    Provides field-level line number lookup for PR comment placement.
+    """
+    statements: list[StatementLineMap] = field(default_factory=list)
+    def get_statement_map(self, index: int) -> StatementLineMap | None:
+        """Get line map for a specific statement by index.
+        Args:
+            index: Statement index (0-based)
+        Returns:
+            StatementLineMap or None if index out of range
+        """
+        if 0 <= index < len(self.statements):
+            return self.statements[index]
+        return None
+    def get_line_for_field(self, statement_index: int, field_name: str) -> int | None:
+        """Get line number for a field in a specific statement.
+        Args:
+            statement_index: Statement index (0-based)
+            field_name: Field name (action, resource, condition, etc.)
+        Returns:
+            Line number or None if statement not found
+        """
+        stmt_map = self.get_statement_map(statement_index)
+        if stmt_map:
+            return stmt_map.get_line_for_field(field_name)
+        return None
 logger = logging.getLogger(__name__)
+class PolicyValidationLimits:
+    """Validation limits for policy loading.
+    These limits protect against DoS attacks via maliciously crafted policies
+    and ensure reasonable resource usage.
+    """
+    # Maximum file size in bytes (default: 10MB - AWS limit is 6KB for managed policies)
+    MAX_FILE_SIZE_BYTES: int = 10 * 1024 * 1024
+    # Maximum JSON/YAML nesting depth
+    MAX_DEPTH: int = 50
+    # Maximum number of statements per policy (AWS limit is ~20-30 depending on size)
+    MAX_STATEMENTS: int = 100
+    # Maximum number of actions per statement
+    MAX_ACTIONS_PER_STATEMENT: int = 500
+    # Maximum number of resources per statement
+    MAX_RESOURCES_PER_STATEMENT: int = 500
+    # Maximum string length for any field
+    MAX_STRING_LENGTH: int = 10000
 class PolicyLoader:
     """Loads and parses IAM policy documents from files.
@@ -49,17 +154,90 @@ class PolicyLoader:
     # Directories to skip when scanning recursively (cache, build artifacts, etc.)
     SKIP_DIRECTORIES = {".cache", ".git", "node_modules", "__pycache__", ".venv", "venv"}
-    def __init__(self, max_file_size_mb: int = 100) -> None:
+    def __init__(
+        self,
+        max_file_size_mb: int = 100,
+        enforce_limits: bool = True,
+    ) -> None:
         """Initialize the policy loader.
         Args:
             max_file_size_mb: Maximum file size in MB to load (default: 100MB)
+            enforce_limits: Whether to enforce validation limits (default: True)
         """
         self.loaded_policies: list[tuple[str, IAMPolicy]] = []
         self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
+        self.enforce_limits = enforce_limits
         # Track parsing/validation errors for reporting
         self.parsing_errors: list[tuple[str, str]] = []  # (file_path, error_message)
+    @staticmethod
+    def check_json_depth(
+        obj: Any, max_depth: int = PolicyValidationLimits.MAX_DEPTH, current_depth: int = 0
+    ) -> bool:
+        """Check if JSON object exceeds maximum nesting depth.
+        Args:
+            obj: JSON object to check
+            max_depth: Maximum allowed depth
+            current_depth: Current recursion depth
+        Returns:
+            True if within limits, raises ValueError if exceeded
+        """
+        if current_depth > max_depth:
+            raise ValueError(f"JSON nesting depth exceeds maximum of {max_depth}")
+        if isinstance(obj, dict):
+            for value in obj.values():
+                PolicyLoader.check_json_depth(value, max_depth, current_depth + 1)
+        elif isinstance(obj, list):
+            for item in obj:
+                PolicyLoader.check_json_depth(item, max_depth, current_depth + 1)
+        return True
+    @staticmethod
+    def validate_policy_limits(data: dict[str, Any]) -> list[str]:
+        """Validate policy data against size limits.
+        Args:
+            data: Parsed policy dictionary
+        Returns:
+            List of validation warnings (empty if all limits passed)
+        """
+        warnings: list[str] = []
+        limits = PolicyValidationLimits
+        # Check statement count
+        statements = data.get("Statement", [])
+        if isinstance(statements, list) and len(statements) > limits.MAX_STATEMENTS:
+            warnings.append(
+                f"Policy has {len(statements)} statements, exceeds recommended max of {limits.MAX_STATEMENTS}"
+            )
+        # Check each statement
+        for i, stmt in enumerate(statements if isinstance(statements, list) else []):
+            if not isinstance(stmt, dict):
+                continue
+            # Check actions
+            actions = stmt.get("Action", [])
+            if isinstance(actions, list) and len(actions) > limits.MAX_ACTIONS_PER_STATEMENT:
+                warnings.append(
+                    f"Statement {i} has {len(actions)} actions, exceeds recommended max of {limits.MAX_ACTIONS_PER_STATEMENT}"
+                )
+            # Check resources
+            resources = stmt.get("Resource", [])
+            if isinstance(resources, list) and len(resources) > limits.MAX_RESOURCES_PER_STATEMENT:
+                warnings.append(
+                    f"Statement {i} has {len(resources)} resources, exceeds recommended max of {limits.MAX_RESOURCES_PER_STATEMENT}"
+                )
+        return warnings
     @staticmethod
     def _find_statement_line_numbers(file_content: str) -> list[int]:
         """Find line numbers for each statement in a JSON policy file.
@@ -128,6 +306,137 @@ class PolicyLoader:
         return statement_lines
+    @staticmethod
+    def _find_yaml_statement_line_numbers(file_content: str) -> list[int]:
+        """Find line numbers for each statement in a YAML policy file.
+        Uses PyYAML's line tracking to find where each statement starts.
+        Args:
+            file_content: Raw content of the YAML policy file
+        Returns:
+            List of line numbers (1-indexed) for each statement
+        """
+        class LineTrackingLoader(yaml.SafeLoader):
+            """Custom YAML loader that tracks line numbers for mappings."""
+            pass
+        def construct_mapping_with_line(loader: yaml.SafeLoader, node: yaml.MappingNode) -> dict:
+            """Construct a mapping while preserving line number info."""
+            mapping = loader.construct_mapping(node)
+            # Store line number as a special key (1-indexed)
+            mapping["__line__"] = node.start_mark.line + 1
+            return mapping
+        # Register custom constructor for mappings
+        LineTrackingLoader.add_constructor(
+            yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
+            construct_mapping_with_line,
+        )
+        try:
+            data = yaml.load(file_content, Loader=LineTrackingLoader)  # noqa: S506
+        except yaml.YAMLError:
+            return []
+        if not data or not isinstance(data, dict):
+            return []
+        # Extract statement line numbers
+        statement_line_numbers = []
+        statements = data.get("Statement", [])
+        if isinstance(statements, list):
+            for stmt in statements:
+                if isinstance(stmt, dict) and "__line__" in stmt:
+                    statement_line_numbers.append(stmt["__line__"])
+        return statement_line_numbers
+    @staticmethod
+    def parse_statement_field_lines(file_content: str) -> PolicyLineMap:
+        """Parse JSON to find exact line numbers for each field in each statement.
+        This provides field-level line mapping for precise PR comment placement.
+        For example, an issue about Action: "*" will point to the Action line,
+        not just the statement's opening brace.
+        Args:
+            file_content: Raw content of the JSON policy file
+        Returns:
+            PolicyLineMap with field-level line numbers for all statements
+        """
+        lines = file_content.split("\n")
+        policy_map = PolicyLineMap()
+        in_statement_array = False
+        brace_depth = 0
+        current_stmt: StatementLineMap | None = None
+        # Field name pattern (case-insensitive for robustness)
+        field_pattern = re.compile(
+            r'^\s*"(Sid|Effect|Action|NotAction|Resource|NotResource|Condition|Principal|NotPrincipal)"\s*:',
+            re.IGNORECASE,
+        )
+        for line_num, line in enumerate(lines, start=1):
+            # Look for "Statement" array
+            if '"Statement"' in line or "'Statement'" in line:
+                in_statement_array = True
+                continue
+            if not in_statement_array:
+                continue
+            # Track braces
+            for char in line:
+                if char == "{":
+                    if brace_depth == 0:
+                        # Start of a new statement
+                        current_stmt = StatementLineMap(statement_start=line_num)
+                    brace_depth += 1
+                elif char == "}":
+                    brace_depth -= 1
+                    if brace_depth == 0 and current_stmt is not None:
+                        # End of statement - save it
+                        policy_map.statements.append(current_stmt)
+                        current_stmt = None
+                elif char == "]" and brace_depth == 0:
+                    # End of Statement array
+                    in_statement_array = False
+                    break
+            # Parse field names at brace_depth == 1 (direct children of statement)
+            if in_statement_array and brace_depth == 1 and current_stmt is not None:
+                match = field_pattern.match(line)
+                if match:
+                    field_name = match.group(1).lower()
+                    # Map to dataclass attribute
+                    if field_name == "sid":
+                        current_stmt.sid = line_num
+                    elif field_name == "effect":
+                        current_stmt.effect = line_num
+                    elif field_name == "action":
+                        current_stmt.action = line_num
+                    elif field_name == "notaction":
+                        current_stmt.not_action = line_num
+                    elif field_name == "resource":
+                        current_stmt.resource = line_num
+                    elif field_name == "notresource":
+                        current_stmt.not_resource = line_num
+                    elif field_name == "condition":
+                        current_stmt.condition = line_num
+                    elif field_name == "principal":
+                        current_stmt.principal = line_num
+                    elif field_name == "notprincipal":
+                        current_stmt.not_principal = line_num
+        return policy_map
     def _check_file_size(self, path: Path) -> bool:
         """Check if file size is within limits.
@@ -197,14 +506,14 @@ class PolicyLoader:
             with open(path, encoding="utf-8") as f:
                 file_content = f.read()
-            # Parse line numbers for JSON files
+            # Parse line numbers based on file type
             statement_line_numbers = []
             if path.suffix.lower() == ".json":
                 statement_line_numbers = self._find_statement_line_numbers(file_content)
                 data = json.loads(file_content)
             else:  # .yaml or .yml
+                statement_line_numbers = self._find_yaml_statement_line_numbers(file_content)
                 data = yaml.safe_load(file_content)
-                # TODO: Add YAML line number tracking if needed
             # Validate and parse the policy
             policy = IAMPolicy.model_validate(data)

iam-policy-validator 1.13.1__py3-none-any.whl → 1.14.1__py3-none-any.whl

iam-policy-validator 1.13.1py3-none-any.whl → 1.14.1py3-none-any.whl