iam-policy-validator 1.13.1__py3-none-any.whl → 1.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/METADATA +1 -1
  2. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/RECORD +45 -39
  3. iam_validator/__version__.py +1 -1
  4. iam_validator/checks/action_condition_enforcement.py +6 -0
  5. iam_validator/checks/action_resource_matching.py +12 -12
  6. iam_validator/checks/action_validation.py +1 -0
  7. iam_validator/checks/condition_key_validation.py +2 -0
  8. iam_validator/checks/condition_type_mismatch.py +3 -0
  9. iam_validator/checks/full_wildcard.py +1 -0
  10. iam_validator/checks/mfa_condition_check.py +2 -0
  11. iam_validator/checks/policy_structure.py +9 -0
  12. iam_validator/checks/policy_type_validation.py +11 -0
  13. iam_validator/checks/principal_validation.py +5 -0
  14. iam_validator/checks/resource_validation.py +4 -0
  15. iam_validator/checks/sensitive_action.py +1 -0
  16. iam_validator/checks/service_wildcard.py +6 -3
  17. iam_validator/checks/set_operator_validation.py +3 -0
  18. iam_validator/checks/sid_uniqueness.py +2 -0
  19. iam_validator/checks/trust_policy_validation.py +3 -0
  20. iam_validator/checks/utils/__init__.py +16 -0
  21. iam_validator/checks/utils/action_parser.py +149 -0
  22. iam_validator/checks/wildcard_action.py +1 -0
  23. iam_validator/checks/wildcard_resource.py +231 -4
  24. iam_validator/commands/analyze.py +19 -1
  25. iam_validator/commands/completion.py +6 -2
  26. iam_validator/commands/validate.py +231 -12
  27. iam_validator/core/aws_service/fetcher.py +21 -9
  28. iam_validator/core/codeowners.py +245 -0
  29. iam_validator/core/config/check_documentation.py +390 -0
  30. iam_validator/core/config/config_loader.py +199 -0
  31. iam_validator/core/config/defaults.py +25 -0
  32. iam_validator/core/constants.py +1 -0
  33. iam_validator/core/diff_parser.py +8 -4
  34. iam_validator/core/finding_fingerprint.py +131 -0
  35. iam_validator/core/formatters/sarif.py +370 -128
  36. iam_validator/core/ignore_processor.py +309 -0
  37. iam_validator/core/ignored_findings.py +400 -0
  38. iam_validator/core/models.py +54 -4
  39. iam_validator/core/policy_loader.py +313 -4
  40. iam_validator/core/pr_commenter.py +223 -22
  41. iam_validator/core/report.py +22 -6
  42. iam_validator/integrations/github_integration.py +881 -123
  43. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/WHEEL +0 -0
  44. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/entry_points.txt +0 -0
  45. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/licenses/LICENSE +0 -0
@@ -168,6 +168,17 @@ class ValidationIssue(BaseModel):
168
168
  check_id: str | None = (
169
169
  None # Check that triggered this issue (e.g., "policy_size", "sensitive_action")
170
170
  )
171
+ # Field that caused the issue (for precise line detection in PR comments)
172
+ # Values: "action", "resource", "condition", "principal", "effect", "sid"
173
+ field_name: str | None = None
174
+
175
+ # Enhanced finding quality fields (Phase 3)
176
+ # Explains why this issue is a security risk or compliance concern
177
+ risk_explanation: str | None = None
178
+ # Link to relevant AWS documentation or org-specific runbook
179
+ documentation_url: str | None = None
180
+ # Step-by-step remediation guidance
181
+ remediation_steps: list[str] | None = None
171
182
 
172
183
  # Severity level constants (ClassVar to avoid Pydantic treating them as fields)
173
184
  VALID_SEVERITIES: ClassVar[frozenset[str]] = frozenset(
@@ -205,11 +216,12 @@ class ValidationIssue(BaseModel):
205
216
  """Check if this issue uses IAM validity severity levels (error/warning/info)."""
206
217
  return self.severity in {"error", "warning", "info"}
207
218
 
208
- def to_pr_comment(self, include_identifier: bool = True) -> str:
219
+ def to_pr_comment(self, include_identifier: bool = True, file_path: str = "") -> str:
209
220
  """Format issue as a PR comment.
210
221
 
211
222
  Args:
212
223
  include_identifier: Whether to include bot identifier (for cleanup)
224
+ file_path: Relative path to the policy file (for finding ID)
213
225
 
214
226
  Returns:
215
227
  Formatted comment string
@@ -235,6 +247,21 @@ class ValidationIssue(BaseModel):
235
247
  parts.append(f"{constants.BOT_IDENTIFIER}\n")
236
248
  # Add issue type identifier to allow multiple issues at same line
237
249
  parts.append(f"<!-- issue-type: {self.issue_type} -->\n")
250
+ # Add finding ID for ignore tracking
251
+ if file_path:
252
+ from iam_validator.core.finding_fingerprint import compute_finding_hash
253
+
254
+ finding_hash = compute_finding_hash(
255
+ file_path=file_path,
256
+ check_id=self.check_id,
257
+ issue_type=self.issue_type,
258
+ statement_sid=self.statement_sid,
259
+ statement_index=self.statement_index,
260
+ action=self.action,
261
+ resource=self.resource,
262
+ condition_key=self.condition_key,
263
+ )
264
+ parts.append(f"<!-- finding-id: {finding_hash} -->\n")
238
265
 
239
266
  # Build statement context for better navigation
240
267
  statement_context = f"Statement[{self.statement_index}]"
@@ -248,9 +275,19 @@ class ValidationIssue(BaseModel):
248
275
  # Show message immediately (not collapsed)
249
276
  parts.append(self.message)
250
277
 
278
+ # Add risk explanation if present (shown prominently)
279
+ if self.risk_explanation:
280
+ parts.append("")
281
+ parts.append(f"> **Why this matters:** {self.risk_explanation}")
282
+
251
283
  # Put additional details in collapsible section if there are any
252
284
  has_details = bool(
253
- self.action or self.resource or self.condition_key or self.suggestion or self.example
285
+ self.action
286
+ or self.resource
287
+ or self.condition_key
288
+ or self.suggestion
289
+ or self.example
290
+ or self.remediation_steps
254
291
  )
255
292
 
256
293
  if has_details:
@@ -271,6 +308,13 @@ class ValidationIssue(BaseModel):
271
308
  parts.append(f" - Condition Key: `{self.condition_key}`")
272
309
  parts.append("")
273
310
 
311
+ # Add remediation steps if present
312
+ if self.remediation_steps:
313
+ parts.append("**🔧 How to Fix:**")
314
+ for i, step in enumerate(self.remediation_steps, 1):
315
+ parts.append(f" {i}. {step}")
316
+ parts.append("")
317
+
274
318
  # Add suggestion if present
275
319
  if self.suggestion:
276
320
  parts.append("**💡 Suggested Fix:**")
@@ -288,11 +332,17 @@ class ValidationIssue(BaseModel):
288
332
  parts.append("")
289
333
  parts.append("</details>")
290
334
 
291
- # Add check ID at the bottom if available
335
+ # Add check ID and documentation link at the bottom
336
+ footer_parts = []
292
337
  if self.check_id:
338
+ footer_parts.append(f"*Check: `{self.check_id}`*")
339
+ if self.documentation_url:
340
+ footer_parts.append(f"[📖 Documentation]({self.documentation_url})")
341
+
342
+ if footer_parts:
293
343
  parts.append("")
294
344
  parts.append("---")
295
- parts.append(f"*Check: `{self.check_id}`*")
345
+ parts.append(" | ".join(footer_parts))
296
346
 
297
347
  return "\n".join(parts)
298
348
 
@@ -27,18 +27,123 @@ Example usage:
27
27
 
28
28
  import json
29
29
  import logging
30
+ import re
30
31
  from collections.abc import Generator
32
+ from dataclasses import dataclass, field
31
33
  from pathlib import Path
32
- from typing import overload
34
+ from typing import Any, overload
33
35
 
34
36
  import yaml
35
37
  from pydantic import ValidationError
36
38
 
37
39
  from iam_validator.core.models import IAMPolicy
38
40
 
41
+
42
+ @dataclass
43
+ class StatementLineMap:
44
+ """Line numbers for each field in a statement.
45
+
46
+ Used for precise line-level PR comments on specific fields
47
+ (e.g., pointing to the exact Action line, not just the statement start).
48
+ """
49
+
50
+ statement_start: int # Opening brace line
51
+ sid: int | None = None
52
+ effect: int | None = None
53
+ action: int | None = None
54
+ not_action: int | None = None
55
+ resource: int | None = None
56
+ not_resource: int | None = None
57
+ condition: int | None = None
58
+ principal: int | None = None
59
+ not_principal: int | None = None
60
+
61
+ def get_line_for_field(self, field_name: str) -> int:
62
+ """Get line number for a specific field, fallback to statement start.
63
+
64
+ Args:
65
+ field_name: Field name (case-insensitive): action, resource, condition, etc.
66
+
67
+ Returns:
68
+ Line number for the field, or statement_start if not found
69
+ """
70
+ field_map = {
71
+ "sid": self.sid,
72
+ "effect": self.effect,
73
+ "action": self.action,
74
+ "notaction": self.not_action,
75
+ "resource": self.resource,
76
+ "notresource": self.not_resource,
77
+ "condition": self.condition,
78
+ "principal": self.principal,
79
+ "notprincipal": self.not_principal,
80
+ }
81
+ line = field_map.get(field_name.lower().replace("_", ""))
82
+ return line if line is not None else self.statement_start
83
+
84
+
85
+ @dataclass
86
+ class PolicyLineMap:
87
+ """Line mappings for all statements in a policy file.
88
+
89
+ Provides field-level line number lookup for PR comment placement.
90
+ """
91
+
92
+ statements: list[StatementLineMap] = field(default_factory=list)
93
+
94
+ def get_statement_map(self, index: int) -> StatementLineMap | None:
95
+ """Get line map for a specific statement by index.
96
+
97
+ Args:
98
+ index: Statement index (0-based)
99
+
100
+ Returns:
101
+ StatementLineMap or None if index out of range
102
+ """
103
+ if 0 <= index < len(self.statements):
104
+ return self.statements[index]
105
+ return None
106
+
107
+ def get_line_for_field(self, statement_index: int, field_name: str) -> int | None:
108
+ """Get line number for a field in a specific statement.
109
+
110
+ Args:
111
+ statement_index: Statement index (0-based)
112
+ field_name: Field name (action, resource, condition, etc.)
113
+
114
+ Returns:
115
+ Line number or None if statement not found
116
+ """
117
+ stmt_map = self.get_statement_map(statement_index)
118
+ if stmt_map:
119
+ return stmt_map.get_line_for_field(field_name)
120
+ return None
121
+
122
+
39
123
  logger = logging.getLogger(__name__)
40
124
 
41
125
 
126
+ class PolicyValidationLimits:
127
+ """Validation limits for policy loading.
128
+
129
+ These limits protect against DoS attacks via maliciously crafted policies
130
+ and ensure reasonable resource usage.
131
+ """
132
+
133
+ # Maximum file size in bytes (default: 10MB - AWS limit is 6KB for managed policies)
134
+ MAX_FILE_SIZE_BYTES: int = 10 * 1024 * 1024
135
+ # Maximum JSON/YAML nesting depth
136
+ MAX_DEPTH: int = 50
137
+ # Maximum number of statements per policy (AWS limit is ~20-30 depending on size)
138
+ MAX_STATEMENTS: int = 100
139
+ # Maximum number of actions per statement
140
+ MAX_ACTIONS_PER_STATEMENT: int = 500
141
+ # Maximum number of resources per statement
142
+ MAX_RESOURCES_PER_STATEMENT: int = 500
143
+ # Maximum string length for any field
144
+ MAX_STRING_LENGTH: int = 10000
145
+
146
+
42
147
  class PolicyLoader:
43
148
  """Loads and parses IAM policy documents from files.
44
149
 
@@ -49,17 +154,90 @@ class PolicyLoader:
49
154
  # Directories to skip when scanning recursively (cache, build artifacts, etc.)
50
155
  SKIP_DIRECTORIES = {".cache", ".git", "node_modules", "__pycache__", ".venv", "venv"}
51
156
 
52
- def __init__(self, max_file_size_mb: int = 100) -> None:
157
+ def __init__(
158
+ self,
159
+ max_file_size_mb: int = 100,
160
+ enforce_limits: bool = True,
161
+ ) -> None:
53
162
  """Initialize the policy loader.
54
163
 
55
164
  Args:
56
165
  max_file_size_mb: Maximum file size in MB to load (default: 100MB)
166
+ enforce_limits: Whether to enforce validation limits (default: True)
57
167
  """
58
168
  self.loaded_policies: list[tuple[str, IAMPolicy]] = []
59
169
  self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
170
+ self.enforce_limits = enforce_limits
60
171
  # Track parsing/validation errors for reporting
61
172
  self.parsing_errors: list[tuple[str, str]] = [] # (file_path, error_message)
62
173
 
174
+ @staticmethod
175
+ def check_json_depth(
176
+ obj: Any, max_depth: int = PolicyValidationLimits.MAX_DEPTH, current_depth: int = 0
177
+ ) -> bool:
178
+ """Check if JSON object exceeds maximum nesting depth.
179
+
180
+ Args:
181
+ obj: JSON object to check
182
+ max_depth: Maximum allowed depth
183
+ current_depth: Current recursion depth
184
+
185
+ Returns:
186
+ True if within limits, raises ValueError if exceeded
187
+ """
188
+ if current_depth > max_depth:
189
+ raise ValueError(f"JSON nesting depth exceeds maximum of {max_depth}")
190
+
191
+ if isinstance(obj, dict):
192
+ for value in obj.values():
193
+ PolicyLoader.check_json_depth(value, max_depth, current_depth + 1)
194
+ elif isinstance(obj, list):
195
+ for item in obj:
196
+ PolicyLoader.check_json_depth(item, max_depth, current_depth + 1)
197
+
198
+ return True
199
+
200
+ @staticmethod
201
+ def validate_policy_limits(data: dict[str, Any]) -> list[str]:
202
+ """Validate policy data against size limits.
203
+
204
+ Args:
205
+ data: Parsed policy dictionary
206
+
207
+ Returns:
208
+ List of validation warnings (empty if all limits passed)
209
+ """
210
+ warnings: list[str] = []
211
+ limits = PolicyValidationLimits
212
+
213
+ # Check statement count
214
+ statements = data.get("Statement", [])
215
+ if isinstance(statements, list) and len(statements) > limits.MAX_STATEMENTS:
216
+ warnings.append(
217
+ f"Policy has {len(statements)} statements, exceeds recommended max of {limits.MAX_STATEMENTS}"
218
+ )
219
+
220
+ # Check each statement
221
+ for i, stmt in enumerate(statements if isinstance(statements, list) else []):
222
+ if not isinstance(stmt, dict):
223
+ continue
224
+
225
+ # Check actions
226
+ actions = stmt.get("Action", [])
227
+ if isinstance(actions, list) and len(actions) > limits.MAX_ACTIONS_PER_STATEMENT:
228
+ warnings.append(
229
+ f"Statement {i} has {len(actions)} actions, exceeds recommended max of {limits.MAX_ACTIONS_PER_STATEMENT}"
230
+ )
231
+
232
+ # Check resources
233
+ resources = stmt.get("Resource", [])
234
+ if isinstance(resources, list) and len(resources) > limits.MAX_RESOURCES_PER_STATEMENT:
235
+ warnings.append(
236
+ f"Statement {i} has {len(resources)} resources, exceeds recommended max of {limits.MAX_RESOURCES_PER_STATEMENT}"
237
+ )
238
+
239
+ return warnings
240
+
63
241
  @staticmethod
64
242
  def _find_statement_line_numbers(file_content: str) -> list[int]:
65
243
  """Find line numbers for each statement in a JSON policy file.
@@ -128,6 +306,137 @@ class PolicyLoader:
128
306
 
129
307
  return statement_lines
130
308
 
309
+ @staticmethod
310
+ def _find_yaml_statement_line_numbers(file_content: str) -> list[int]:
311
+ """Find line numbers for each statement in a YAML policy file.
312
+
313
+ Uses PyYAML's line tracking to find where each statement starts.
314
+
315
+ Args:
316
+ file_content: Raw content of the YAML policy file
317
+
318
+ Returns:
319
+ List of line numbers (1-indexed) for each statement
320
+ """
321
+
322
+ class LineTrackingLoader(yaml.SafeLoader):
323
+ """Custom YAML loader that tracks line numbers for mappings."""
324
+
325
+ pass
326
+
327
+ def construct_mapping_with_line(loader: yaml.SafeLoader, node: yaml.MappingNode) -> dict:
328
+ """Construct a mapping while preserving line number info."""
329
+ mapping = loader.construct_mapping(node)
330
+ # Store line number as a special key (1-indexed)
331
+ mapping["__line__"] = node.start_mark.line + 1
332
+ return mapping
333
+
334
+ # Register custom constructor for mappings
335
+ LineTrackingLoader.add_constructor(
336
+ yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
337
+ construct_mapping_with_line,
338
+ )
339
+
340
+ try:
341
+ data = yaml.load(file_content, Loader=LineTrackingLoader) # noqa: S506
342
+ except yaml.YAMLError:
343
+ return []
344
+
345
+ if not data or not isinstance(data, dict):
346
+ return []
347
+
348
+ # Extract statement line numbers
349
+ statement_line_numbers = []
350
+ statements = data.get("Statement", [])
351
+
352
+ if isinstance(statements, list):
353
+ for stmt in statements:
354
+ if isinstance(stmt, dict) and "__line__" in stmt:
355
+ statement_line_numbers.append(stmt["__line__"])
356
+
357
+ return statement_line_numbers
358
+
359
+ @staticmethod
360
+ def parse_statement_field_lines(file_content: str) -> PolicyLineMap:
361
+ """Parse JSON to find exact line numbers for each field in each statement.
362
+
363
+ This provides field-level line mapping for precise PR comment placement.
364
+ For example, an issue about Action: "*" will point to the Action line,
365
+ not just the statement's opening brace.
366
+
367
+ Args:
368
+ file_content: Raw content of the JSON policy file
369
+
370
+ Returns:
371
+ PolicyLineMap with field-level line numbers for all statements
372
+ """
373
+ lines = file_content.split("\n")
374
+ policy_map = PolicyLineMap()
375
+
376
+ in_statement_array = False
377
+ brace_depth = 0
378
+ current_stmt: StatementLineMap | None = None
379
+
380
+ # Field name pattern (case-insensitive for robustness)
381
+ field_pattern = re.compile(
382
+ r'^\s*"(Sid|Effect|Action|NotAction|Resource|NotResource|Condition|Principal|NotPrincipal)"\s*:',
383
+ re.IGNORECASE,
384
+ )
385
+
386
+ for line_num, line in enumerate(lines, start=1):
387
+ # Look for "Statement" array
388
+ if '"Statement"' in line or "'Statement'" in line:
389
+ in_statement_array = True
390
+ continue
391
+
392
+ if not in_statement_array:
393
+ continue
394
+
395
+ # Track braces
396
+ for char in line:
397
+ if char == "{":
398
+ if brace_depth == 0:
399
+ # Start of a new statement
400
+ current_stmt = StatementLineMap(statement_start=line_num)
401
+ brace_depth += 1
402
+ elif char == "}":
403
+ brace_depth -= 1
404
+ if brace_depth == 0 and current_stmt is not None:
405
+ # End of statement - save it
406
+ policy_map.statements.append(current_stmt)
407
+ current_stmt = None
408
+ elif char == "]" and brace_depth == 0:
409
+ # End of Statement array
410
+ in_statement_array = False
411
+ break
412
+
413
+ # Parse field names at brace_depth == 1 (direct children of statement)
414
+ if in_statement_array and brace_depth == 1 and current_stmt is not None:
415
+ match = field_pattern.match(line)
416
+ if match:
417
+ field_name = match.group(1).lower()
418
+ # Map to dataclass attribute
419
+ if field_name == "sid":
420
+ current_stmt.sid = line_num
421
+ elif field_name == "effect":
422
+ current_stmt.effect = line_num
423
+ elif field_name == "action":
424
+ current_stmt.action = line_num
425
+ elif field_name == "notaction":
426
+ current_stmt.not_action = line_num
427
+ elif field_name == "resource":
428
+ current_stmt.resource = line_num
429
+ elif field_name == "notresource":
430
+ current_stmt.not_resource = line_num
431
+ elif field_name == "condition":
432
+ current_stmt.condition = line_num
433
+ elif field_name == "principal":
434
+ current_stmt.principal = line_num
435
+ elif field_name == "notprincipal":
436
+ current_stmt.not_principal = line_num
437
+
438
+ return policy_map
439
+
131
440
  def _check_file_size(self, path: Path) -> bool:
132
441
  """Check if file size is within limits.
133
442
 
@@ -197,14 +506,14 @@ class PolicyLoader:
197
506
  with open(path, encoding="utf-8") as f:
198
507
  file_content = f.read()
199
508
 
200
- # Parse line numbers for JSON files
509
+ # Parse line numbers based on file type
201
510
  statement_line_numbers = []
202
511
  if path.suffix.lower() == ".json":
203
512
  statement_line_numbers = self._find_statement_line_numbers(file_content)
204
513
  data = json.loads(file_content)
205
514
  else: # .yaml or .yml
515
+ statement_line_numbers = self._find_yaml_statement_line_numbers(file_content)
206
516
  data = yaml.safe_load(file_content)
207
- # TODO: Add YAML line number tracking if needed
208
517
 
209
518
  # Validate and parse the policy
210
519
  policy = IAMPolicy.model_validate(data)