iam-policy-validator 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. iam_policy_validator-1.14.0.dist-info/METADATA +782 -0
  2. iam_policy_validator-1.14.0.dist-info/RECORD +106 -0
  3. iam_policy_validator-1.14.0.dist-info/WHEEL +4 -0
  4. iam_policy_validator-1.14.0.dist-info/entry_points.txt +2 -0
  5. iam_policy_validator-1.14.0.dist-info/licenses/LICENSE +21 -0
  6. iam_validator/__init__.py +27 -0
  7. iam_validator/__main__.py +11 -0
  8. iam_validator/__version__.py +9 -0
  9. iam_validator/checks/__init__.py +45 -0
  10. iam_validator/checks/action_condition_enforcement.py +1442 -0
  11. iam_validator/checks/action_resource_matching.py +472 -0
  12. iam_validator/checks/action_validation.py +67 -0
  13. iam_validator/checks/condition_key_validation.py +88 -0
  14. iam_validator/checks/condition_type_mismatch.py +257 -0
  15. iam_validator/checks/full_wildcard.py +62 -0
  16. iam_validator/checks/mfa_condition_check.py +105 -0
  17. iam_validator/checks/policy_size.py +114 -0
  18. iam_validator/checks/policy_structure.py +556 -0
  19. iam_validator/checks/policy_type_validation.py +331 -0
  20. iam_validator/checks/principal_validation.py +708 -0
  21. iam_validator/checks/resource_validation.py +135 -0
  22. iam_validator/checks/sensitive_action.py +438 -0
  23. iam_validator/checks/service_wildcard.py +98 -0
  24. iam_validator/checks/set_operator_validation.py +153 -0
  25. iam_validator/checks/sid_uniqueness.py +146 -0
  26. iam_validator/checks/trust_policy_validation.py +509 -0
  27. iam_validator/checks/utils/__init__.py +17 -0
  28. iam_validator/checks/utils/action_parser.py +149 -0
  29. iam_validator/checks/utils/policy_level_checks.py +190 -0
  30. iam_validator/checks/utils/sensitive_action_matcher.py +293 -0
  31. iam_validator/checks/utils/wildcard_expansion.py +86 -0
  32. iam_validator/checks/wildcard_action.py +58 -0
  33. iam_validator/checks/wildcard_resource.py +374 -0
  34. iam_validator/commands/__init__.py +31 -0
  35. iam_validator/commands/analyze.py +549 -0
  36. iam_validator/commands/base.py +48 -0
  37. iam_validator/commands/cache.py +393 -0
  38. iam_validator/commands/completion.py +471 -0
  39. iam_validator/commands/download_services.py +255 -0
  40. iam_validator/commands/post_to_pr.py +86 -0
  41. iam_validator/commands/query.py +485 -0
  42. iam_validator/commands/validate.py +830 -0
  43. iam_validator/core/__init__.py +13 -0
  44. iam_validator/core/access_analyzer.py +671 -0
  45. iam_validator/core/access_analyzer_report.py +640 -0
  46. iam_validator/core/aws_fetcher.py +29 -0
  47. iam_validator/core/aws_service/__init__.py +21 -0
  48. iam_validator/core/aws_service/cache.py +108 -0
  49. iam_validator/core/aws_service/client.py +205 -0
  50. iam_validator/core/aws_service/fetcher.py +641 -0
  51. iam_validator/core/aws_service/parsers.py +149 -0
  52. iam_validator/core/aws_service/patterns.py +51 -0
  53. iam_validator/core/aws_service/storage.py +291 -0
  54. iam_validator/core/aws_service/validators.py +380 -0
  55. iam_validator/core/check_registry.py +679 -0
  56. iam_validator/core/cli.py +134 -0
  57. iam_validator/core/codeowners.py +245 -0
  58. iam_validator/core/condition_validators.py +626 -0
  59. iam_validator/core/config/__init__.py +81 -0
  60. iam_validator/core/config/aws_api.py +35 -0
  61. iam_validator/core/config/aws_global_conditions.py +160 -0
  62. iam_validator/core/config/category_suggestions.py +181 -0
  63. iam_validator/core/config/check_documentation.py +390 -0
  64. iam_validator/core/config/condition_requirements.py +258 -0
  65. iam_validator/core/config/config_loader.py +670 -0
  66. iam_validator/core/config/defaults.py +739 -0
  67. iam_validator/core/config/principal_requirements.py +421 -0
  68. iam_validator/core/config/sensitive_actions.py +672 -0
  69. iam_validator/core/config/service_principals.py +132 -0
  70. iam_validator/core/config/wildcards.py +127 -0
  71. iam_validator/core/constants.py +149 -0
  72. iam_validator/core/diff_parser.py +325 -0
  73. iam_validator/core/finding_fingerprint.py +131 -0
  74. iam_validator/core/formatters/__init__.py +27 -0
  75. iam_validator/core/formatters/base.py +147 -0
  76. iam_validator/core/formatters/console.py +68 -0
  77. iam_validator/core/formatters/csv.py +171 -0
  78. iam_validator/core/formatters/enhanced.py +481 -0
  79. iam_validator/core/formatters/html.py +672 -0
  80. iam_validator/core/formatters/json.py +33 -0
  81. iam_validator/core/formatters/markdown.py +64 -0
  82. iam_validator/core/formatters/sarif.py +251 -0
  83. iam_validator/core/ignore_patterns.py +297 -0
  84. iam_validator/core/ignore_processor.py +309 -0
  85. iam_validator/core/ignored_findings.py +400 -0
  86. iam_validator/core/label_manager.py +197 -0
  87. iam_validator/core/models.py +404 -0
  88. iam_validator/core/policy_checks.py +220 -0
  89. iam_validator/core/policy_loader.py +785 -0
  90. iam_validator/core/pr_commenter.py +780 -0
  91. iam_validator/core/report.py +942 -0
  92. iam_validator/integrations/__init__.py +28 -0
  93. iam_validator/integrations/github_integration.py +1821 -0
  94. iam_validator/integrations/ms_teams.py +442 -0
  95. iam_validator/sdk/__init__.py +220 -0
  96. iam_validator/sdk/arn_matching.py +382 -0
  97. iam_validator/sdk/context.py +222 -0
  98. iam_validator/sdk/exceptions.py +48 -0
  99. iam_validator/sdk/helpers.py +177 -0
  100. iam_validator/sdk/policy_utils.py +451 -0
  101. iam_validator/sdk/query_utils.py +454 -0
  102. iam_validator/sdk/shortcuts.py +283 -0
  103. iam_validator/utils/__init__.py +35 -0
  104. iam_validator/utils/cache.py +105 -0
  105. iam_validator/utils/regex.py +205 -0
  106. iam_validator/utils/terminal.py +22 -0
@@ -0,0 +1,785 @@
1
+ """IAM Policy Loader Module.
2
+
3
+ This module provides functionality to load and parse IAM policy documents
4
+ from various file formats (JSON, YAML) and directories.
5
+
6
+ The loader supports both eager loading (load all at once) and streaming
7
+ (process one file at a time) to optimize memory usage.
8
+
9
+ Example usage:
10
+ loader = PolicyLoader()
11
+
12
+ # Eager loading (loads all files into memory)
13
+ policy = loader.load_from_file("policy.json")
14
+ policies = loader.load_from_directory("./policies/", recursive=True)
15
+ policies = loader.load_from_path("./policies/", recursive=False)
16
+
17
+ # Streaming (memory-efficient, processes one file at a time)
18
+ for file_path, policy in loader.stream_from_path("./policies/"):
19
+ # Process each policy immediately
20
+ validate_and_report(file_path, policy)
21
+
22
+ # Batch processing (configurable batch size)
23
+ for batch in loader.batch_from_paths(["./policies/"], batch_size=10):
24
+ # Process batch of up to 10 policies
25
+ validate_batch(batch)
26
+ """
27
+
28
+ import json
29
+ import logging
30
+ import re
31
+ from collections.abc import Generator
32
+ from dataclasses import dataclass, field
33
+ from pathlib import Path
34
+ from typing import Any, overload
35
+
36
+ import yaml
37
+ from pydantic import ValidationError
38
+
39
+ from iam_validator.core.models import IAMPolicy
40
+
41
+
42
+ @dataclass
43
+ class StatementLineMap:
44
+ """Line numbers for each field in a statement.
45
+
46
+ Used for precise line-level PR comments on specific fields
47
+ (e.g., pointing to the exact Action line, not just the statement start).
48
+ """
49
+
50
+ statement_start: int # Opening brace line
51
+ sid: int | None = None
52
+ effect: int | None = None
53
+ action: int | None = None
54
+ not_action: int | None = None
55
+ resource: int | None = None
56
+ not_resource: int | None = None
57
+ condition: int | None = None
58
+ principal: int | None = None
59
+ not_principal: int | None = None
60
+
61
+ def get_line_for_field(self, field_name: str) -> int:
62
+ """Get line number for a specific field, fallback to statement start.
63
+
64
+ Args:
65
+ field_name: Field name (case-insensitive): action, resource, condition, etc.
66
+
67
+ Returns:
68
+ Line number for the field, or statement_start if not found
69
+ """
70
+ field_map = {
71
+ "sid": self.sid,
72
+ "effect": self.effect,
73
+ "action": self.action,
74
+ "notaction": self.not_action,
75
+ "resource": self.resource,
76
+ "notresource": self.not_resource,
77
+ "condition": self.condition,
78
+ "principal": self.principal,
79
+ "notprincipal": self.not_principal,
80
+ }
81
+ line = field_map.get(field_name.lower().replace("_", ""))
82
+ return line if line is not None else self.statement_start
83
+
84
+
85
+ @dataclass
86
+ class PolicyLineMap:
87
+ """Line mappings for all statements in a policy file.
88
+
89
+ Provides field-level line number lookup for PR comment placement.
90
+ """
91
+
92
+ statements: list[StatementLineMap] = field(default_factory=list)
93
+
94
+ def get_statement_map(self, index: int) -> StatementLineMap | None:
95
+ """Get line map for a specific statement by index.
96
+
97
+ Args:
98
+ index: Statement index (0-based)
99
+
100
+ Returns:
101
+ StatementLineMap or None if index out of range
102
+ """
103
+ if 0 <= index < len(self.statements):
104
+ return self.statements[index]
105
+ return None
106
+
107
+ def get_line_for_field(self, statement_index: int, field_name: str) -> int | None:
108
+ """Get line number for a field in a specific statement.
109
+
110
+ Args:
111
+ statement_index: Statement index (0-based)
112
+ field_name: Field name (action, resource, condition, etc.)
113
+
114
+ Returns:
115
+ Line number or None if statement not found
116
+ """
117
+ stmt_map = self.get_statement_map(statement_index)
118
+ if stmt_map:
119
+ return stmt_map.get_line_for_field(field_name)
120
+ return None
121
+
122
+
123
+ logger = logging.getLogger(__name__)
124
+
125
+
126
+ class PolicyValidationLimits:
127
+ """Validation limits for policy loading.
128
+
129
+ These limits protect against DoS attacks via maliciously crafted policies
130
+ and ensure reasonable resource usage.
131
+ """
132
+
133
+ # Maximum file size in bytes (default: 10MB - AWS limit is 6KB for managed policies)
134
+ MAX_FILE_SIZE_BYTES: int = 10 * 1024 * 1024
135
+ # Maximum JSON/YAML nesting depth
136
+ MAX_DEPTH: int = 50
137
+ # Maximum number of statements per policy (AWS limit is ~20-30 depending on size)
138
+ MAX_STATEMENTS: int = 100
139
+ # Maximum number of actions per statement
140
+ MAX_ACTIONS_PER_STATEMENT: int = 500
141
+ # Maximum number of resources per statement
142
+ MAX_RESOURCES_PER_STATEMENT: int = 500
143
+ # Maximum string length for any field
144
+ MAX_STRING_LENGTH: int = 10000
145
+
146
+
147
+ class PolicyLoader:
148
+ """Loads and parses IAM policy documents from files.
149
+
150
+ Supports both eager loading and streaming for memory efficiency.
151
+ """
152
+
153
+ SUPPORTED_EXTENSIONS = {".json", ".yaml", ".yml"}
154
+ # Directories to skip when scanning recursively (cache, build artifacts, etc.)
155
+ SKIP_DIRECTORIES = {".cache", ".git", "node_modules", "__pycache__", ".venv", "venv"}
156
+
157
+ def __init__(
158
+ self,
159
+ max_file_size_mb: int = 100,
160
+ enforce_limits: bool = True,
161
+ ) -> None:
162
+ """Initialize the policy loader.
163
+
164
+ Args:
165
+ max_file_size_mb: Maximum file size in MB to load (default: 100MB)
166
+ enforce_limits: Whether to enforce validation limits (default: True)
167
+ """
168
+ self.loaded_policies: list[tuple[str, IAMPolicy]] = []
169
+ self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
170
+ self.enforce_limits = enforce_limits
171
+ # Track parsing/validation errors for reporting
172
+ self.parsing_errors: list[tuple[str, str]] = [] # (file_path, error_message)
173
+
174
+ @staticmethod
175
+ def check_json_depth(
176
+ obj: Any, max_depth: int = PolicyValidationLimits.MAX_DEPTH, current_depth: int = 0
177
+ ) -> bool:
178
+ """Check if JSON object exceeds maximum nesting depth.
179
+
180
+ Args:
181
+ obj: JSON object to check
182
+ max_depth: Maximum allowed depth
183
+ current_depth: Current recursion depth
184
+
185
+ Returns:
186
+ True if within limits, raises ValueError if exceeded
187
+ """
188
+ if current_depth > max_depth:
189
+ raise ValueError(f"JSON nesting depth exceeds maximum of {max_depth}")
190
+
191
+ if isinstance(obj, dict):
192
+ for value in obj.values():
193
+ PolicyLoader.check_json_depth(value, max_depth, current_depth + 1)
194
+ elif isinstance(obj, list):
195
+ for item in obj:
196
+ PolicyLoader.check_json_depth(item, max_depth, current_depth + 1)
197
+
198
+ return True
199
+
200
+ @staticmethod
201
+ def validate_policy_limits(data: dict[str, Any]) -> list[str]:
202
+ """Validate policy data against size limits.
203
+
204
+ Args:
205
+ data: Parsed policy dictionary
206
+
207
+ Returns:
208
+ List of validation warnings (empty if all limits passed)
209
+ """
210
+ warnings: list[str] = []
211
+ limits = PolicyValidationLimits
212
+
213
+ # Check statement count
214
+ statements = data.get("Statement", [])
215
+ if isinstance(statements, list) and len(statements) > limits.MAX_STATEMENTS:
216
+ warnings.append(
217
+ f"Policy has {len(statements)} statements, exceeds recommended max of {limits.MAX_STATEMENTS}"
218
+ )
219
+
220
+ # Check each statement
221
+ for i, stmt in enumerate(statements if isinstance(statements, list) else []):
222
+ if not isinstance(stmt, dict):
223
+ continue
224
+
225
+ # Check actions
226
+ actions = stmt.get("Action", [])
227
+ if isinstance(actions, list) and len(actions) > limits.MAX_ACTIONS_PER_STATEMENT:
228
+ warnings.append(
229
+ f"Statement {i} has {len(actions)} actions, exceeds recommended max of {limits.MAX_ACTIONS_PER_STATEMENT}"
230
+ )
231
+
232
+ # Check resources
233
+ resources = stmt.get("Resource", [])
234
+ if isinstance(resources, list) and len(resources) > limits.MAX_RESOURCES_PER_STATEMENT:
235
+ warnings.append(
236
+ f"Statement {i} has {len(resources)} resources, exceeds recommended max of {limits.MAX_RESOURCES_PER_STATEMENT}"
237
+ )
238
+
239
+ return warnings
240
+
241
+ @staticmethod
242
+ def _find_statement_line_numbers(file_content: str) -> list[int]:
243
+ """Find line numbers for each statement in a JSON policy file.
244
+
245
+ Args:
246
+ file_content: Raw content of the policy file
247
+
248
+ Returns:
249
+ List of line numbers (1-indexed) for each statement's Sid or opening brace
250
+ """
251
+ lines = file_content.split("\n")
252
+ statement_lines = []
253
+ in_statement_array = False
254
+ brace_depth = 0
255
+ statement_start_line = None
256
+ current_statement_first_field = None
257
+
258
+ for line_num, line in enumerate(lines, start=1):
259
+ # Look for "Statement" array
260
+ if '"Statement"' in line or "'Statement'" in line:
261
+ in_statement_array = True
262
+ continue
263
+
264
+ if not in_statement_array:
265
+ continue
266
+
267
+ # Track opening braces for statement objects
268
+ for char in line:
269
+ if char == "{":
270
+ if brace_depth == 0 and statement_start_line is None:
271
+ # Found the start of a statement object
272
+ statement_start_line = line_num
273
+ current_statement_first_field = None
274
+ brace_depth += 1
275
+ elif char == "}":
276
+ brace_depth -= 1
277
+ if brace_depth == 0 and statement_start_line is not None:
278
+ # Completed a statement object
279
+ # Use first field line if found, otherwise use opening brace
280
+ statement_lines.append(
281
+ current_statement_first_field or statement_start_line
282
+ )
283
+ statement_start_line = None
284
+ current_statement_first_field = None
285
+ elif char == "]" and brace_depth == 0:
286
+ # End of Statement array
287
+ in_statement_array = False
288
+ break
289
+
290
+ # Track first field in statement (usually Sid, Effect, or Action)
291
+ if (
292
+ in_statement_array
293
+ and brace_depth == 1
294
+ and current_statement_first_field is None
295
+ and statement_start_line is not None
296
+ ):
297
+ stripped = line.strip()
298
+ # Look for first JSON field (e.g., "Sid":, "Effect":, "Action":)
299
+ if (
300
+ stripped
301
+ and stripped[0] == '"'
302
+ and ":" in stripped
303
+ and not stripped.startswith('"{')
304
+ ):
305
+ current_statement_first_field = line_num
306
+
307
+ return statement_lines
308
+
309
+ @staticmethod
310
+ def _find_yaml_statement_line_numbers(file_content: str) -> list[int]:
311
+ """Find line numbers for each statement in a YAML policy file.
312
+
313
+ Uses PyYAML's line tracking to find where each statement starts.
314
+
315
+ Args:
316
+ file_content: Raw content of the YAML policy file
317
+
318
+ Returns:
319
+ List of line numbers (1-indexed) for each statement
320
+ """
321
+
322
+ class LineTrackingLoader(yaml.SafeLoader):
323
+ """Custom YAML loader that tracks line numbers for mappings."""
324
+
325
+ pass
326
+
327
+ def construct_mapping_with_line(loader: yaml.SafeLoader, node: yaml.MappingNode) -> dict:
328
+ """Construct a mapping while preserving line number info."""
329
+ mapping = loader.construct_mapping(node)
330
+ # Store line number as a special key (1-indexed)
331
+ mapping["__line__"] = node.start_mark.line + 1
332
+ return mapping
333
+
334
+ # Register custom constructor for mappings
335
+ LineTrackingLoader.add_constructor(
336
+ yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
337
+ construct_mapping_with_line,
338
+ )
339
+
340
+ try:
341
+ data = yaml.load(file_content, Loader=LineTrackingLoader) # noqa: S506
342
+ except yaml.YAMLError:
343
+ return []
344
+
345
+ if not data or not isinstance(data, dict):
346
+ return []
347
+
348
+ # Extract statement line numbers
349
+ statement_line_numbers = []
350
+ statements = data.get("Statement", [])
351
+
352
+ if isinstance(statements, list):
353
+ for stmt in statements:
354
+ if isinstance(stmt, dict) and "__line__" in stmt:
355
+ statement_line_numbers.append(stmt["__line__"])
356
+
357
+ return statement_line_numbers
358
+
359
+ @staticmethod
360
+ def parse_statement_field_lines(file_content: str) -> PolicyLineMap:
361
+ """Parse JSON to find exact line numbers for each field in each statement.
362
+
363
+ This provides field-level line mapping for precise PR comment placement.
364
+ For example, an issue about Action: "*" will point to the Action line,
365
+ not just the statement's opening brace.
366
+
367
+ Args:
368
+ file_content: Raw content of the JSON policy file
369
+
370
+ Returns:
371
+ PolicyLineMap with field-level line numbers for all statements
372
+ """
373
+ lines = file_content.split("\n")
374
+ policy_map = PolicyLineMap()
375
+
376
+ in_statement_array = False
377
+ brace_depth = 0
378
+ current_stmt: StatementLineMap | None = None
379
+
380
+ # Field name pattern (case-insensitive for robustness)
381
+ field_pattern = re.compile(
382
+ r'^\s*"(Sid|Effect|Action|NotAction|Resource|NotResource|Condition|Principal|NotPrincipal)"\s*:',
383
+ re.IGNORECASE,
384
+ )
385
+
386
+ for line_num, line in enumerate(lines, start=1):
387
+ # Look for "Statement" array
388
+ if '"Statement"' in line or "'Statement'" in line:
389
+ in_statement_array = True
390
+ continue
391
+
392
+ if not in_statement_array:
393
+ continue
394
+
395
+ # Track braces
396
+ for char in line:
397
+ if char == "{":
398
+ if brace_depth == 0:
399
+ # Start of a new statement
400
+ current_stmt = StatementLineMap(statement_start=line_num)
401
+ brace_depth += 1
402
+ elif char == "}":
403
+ brace_depth -= 1
404
+ if brace_depth == 0 and current_stmt is not None:
405
+ # End of statement - save it
406
+ policy_map.statements.append(current_stmt)
407
+ current_stmt = None
408
+ elif char == "]" and brace_depth == 0:
409
+ # End of Statement array
410
+ in_statement_array = False
411
+ break
412
+
413
+ # Parse field names at brace_depth == 1 (direct children of statement)
414
+ if in_statement_array and brace_depth == 1 and current_stmt is not None:
415
+ match = field_pattern.match(line)
416
+ if match:
417
+ field_name = match.group(1).lower()
418
+ # Map to dataclass attribute
419
+ if field_name == "sid":
420
+ current_stmt.sid = line_num
421
+ elif field_name == "effect":
422
+ current_stmt.effect = line_num
423
+ elif field_name == "action":
424
+ current_stmt.action = line_num
425
+ elif field_name == "notaction":
426
+ current_stmt.not_action = line_num
427
+ elif field_name == "resource":
428
+ current_stmt.resource = line_num
429
+ elif field_name == "notresource":
430
+ current_stmt.not_resource = line_num
431
+ elif field_name == "condition":
432
+ current_stmt.condition = line_num
433
+ elif field_name == "principal":
434
+ current_stmt.principal = line_num
435
+ elif field_name == "notprincipal":
436
+ current_stmt.not_principal = line_num
437
+
438
+ return policy_map
439
+
440
+ def _check_file_size(self, path: Path) -> bool:
441
+ """Check if file size is within limits.
442
+
443
+ Args:
444
+ path: Path to the file
445
+
446
+ Returns:
447
+ True if file size is acceptable, False otherwise
448
+ """
449
+ try:
450
+ file_size = path.stat().st_size
451
+ if file_size > self.max_file_size_bytes:
452
+ logger.warning(
453
+ f"File {path} exceeds maximum size "
454
+ f"({file_size / 1024 / 1024:.2f}MB > "
455
+ f"{self.max_file_size_bytes / 1024 / 1024:.2f}MB). Skipping."
456
+ )
457
+ return False
458
+ return True
459
+ except OSError as e:
460
+ logger.error("Failed to check file size for %s: %s", path, e)
461
+ return False
462
+
463
+ @overload
464
+ def load_from_file(self, file_path: str, return_raw_dict: bool = False) -> IAMPolicy | None: ...
465
+
466
+ @overload
467
+ def load_from_file(
468
+ self, file_path: str, return_raw_dict: bool = True
469
+ ) -> tuple[IAMPolicy, dict] | None: ...
470
+
471
+ def load_from_file(
472
+ self, file_path: str, return_raw_dict: bool = False
473
+ ) -> IAMPolicy | tuple[IAMPolicy, dict] | None:
474
+ """Load a single IAM policy from a file.
475
+
476
+ Args:
477
+ file_path: Path to the policy file
478
+ return_raw_dict: If True, return tuple of (policy, raw_dict) for validation
479
+
480
+ Returns:
481
+ Parsed IAMPolicy, or tuple of (IAMPolicy, raw_dict) if return_raw_dict=True,
482
+ or None if loading fails
483
+ """
484
+ path = Path(file_path)
485
+
486
+ if not path.exists():
487
+ logger.error("File not found: %s", file_path)
488
+ return None
489
+
490
+ if not path.is_file():
491
+ logger.error("Not a file: %s", file_path)
492
+ return None
493
+
494
+ if path.suffix.lower() not in self.SUPPORTED_EXTENSIONS:
495
+ logger.warning(
496
+ f"Unsupported file extension: {path.suffix}. "
497
+ f"Supported: {', '.join(self.SUPPORTED_EXTENSIONS)}"
498
+ )
499
+ return None
500
+
501
+ # Check file size before loading
502
+ if not self._check_file_size(path):
503
+ return None
504
+
505
+ try:
506
+ with open(path, encoding="utf-8") as f:
507
+ file_content = f.read()
508
+
509
+ # Parse line numbers based on file type
510
+ statement_line_numbers = []
511
+ if path.suffix.lower() == ".json":
512
+ statement_line_numbers = self._find_statement_line_numbers(file_content)
513
+ data = json.loads(file_content)
514
+ else: # .yaml or .yml
515
+ statement_line_numbers = self._find_yaml_statement_line_numbers(file_content)
516
+ data = yaml.safe_load(file_content)
517
+
518
+ # Validate and parse the policy
519
+ policy = IAMPolicy.model_validate(data)
520
+
521
+ # Attach line numbers to statements
522
+ if statement_line_numbers:
523
+ for idx, statement in enumerate(policy.statement or []):
524
+ if idx < len(statement_line_numbers):
525
+ statement.line_number = statement_line_numbers[idx]
526
+
527
+ logger.info("Successfully loaded policy from %s", file_path)
528
+ return (policy, data) if return_raw_dict else policy
529
+
530
+ except json.JSONDecodeError as e:
531
+ error_msg = f"Invalid JSON: {e}"
532
+ logger.error("Invalid JSON in %s: %s", file_path, e)
533
+ self.parsing_errors.append((file_path, error_msg))
534
+ return None
535
+ except yaml.YAMLError as e:
536
+ error_msg = f"Invalid YAML: {e}"
537
+ logger.error("Invalid YAML in %s: %s", file_path, e)
538
+ self.parsing_errors.append((file_path, error_msg))
539
+ return None
540
+ except ValidationError as e:
541
+ # Handle Pydantic validation errors with helpful messages
542
+ error_messages = []
543
+ for error in e.errors():
544
+ loc = ".".join(str(x) for x in error["loc"])
545
+ error_type = error["type"]
546
+
547
+ # Provide user-friendly messages for common errors
548
+ if error_type == "extra_forbidden":
549
+ # Extract the field name that has a typo
550
+ field_name = error["loc"][-1] if error["loc"] else "unknown"
551
+ error_messages.append(
552
+ f"Unknown field '{field_name}' at {loc}. "
553
+ f"This might be a typo. Did you mean 'Condition', 'Action', or 'Resource'?"
554
+ )
555
+ else:
556
+ error_messages.append(f"{loc}: {error['msg']}")
557
+
558
+ error_summary = "\n ".join(error_messages)
559
+ logger.error(
560
+ "Policy validation failed for %s:\n %s",
561
+ file_path,
562
+ error_summary,
563
+ )
564
+ # Track parsing error for GitHub reporting
565
+ self.parsing_errors.append((file_path, error_summary))
566
+ return None
567
+ except Exception as e:
568
+ logger.error("Failed to load policy from %s: %s", file_path, e)
569
+ return None
570
+
571
+ def load_from_directory(
572
+ self, directory_path: str, recursive: bool = True
573
+ ) -> list[tuple[str, IAMPolicy]]:
574
+ """Load all IAM policies from a directory.
575
+
576
+ Args:
577
+ directory_path: Path to the directory
578
+ recursive: Whether to search subdirectories
579
+
580
+ Returns:
581
+ List of tuples (file_path, policy)
582
+ """
583
+ path = Path(directory_path)
584
+
585
+ if not path.exists():
586
+ logger.error("Directory not found: %s", directory_path)
587
+ return []
588
+
589
+ if not path.is_dir():
590
+ logger.error("Not a directory: %s", directory_path)
591
+ return []
592
+
593
+ policies: list[tuple[str, IAMPolicy]] = []
594
+ pattern = "**/*" if recursive else "*"
595
+
596
+ for file_path in path.glob(pattern):
597
+ # Skip directories that shouldn't be scanned
598
+ if any(skip_dir in file_path.parts for skip_dir in self.SKIP_DIRECTORIES):
599
+ continue
600
+
601
+ if file_path.is_file() and file_path.suffix.lower() in self.SUPPORTED_EXTENSIONS:
602
+ policy = self.load_from_file(str(file_path))
603
+ if policy:
604
+ policies.append((str(file_path), policy))
605
+
606
+ logger.info("Loaded %d policies from %s", len(policies), directory_path)
607
+ return policies
608
+
609
+ def load_from_path(self, path: str, recursive: bool = True) -> list[tuple[str, IAMPolicy]]:
610
+ """Load IAM policies from a file or directory.
611
+
612
+ Args:
613
+ path: Path to file or directory
614
+ recursive: Whether to search subdirectories (only applies to directories)
615
+
616
+ Returns:
617
+ List of tuples (file_path, policy)
618
+ """
619
+ path_obj = Path(path)
620
+
621
+ if path_obj.is_file():
622
+ policy = self.load_from_file(path)
623
+ return [(path, policy)] if policy else []
624
+ elif path_obj.is_dir():
625
+ return self.load_from_directory(path, recursive)
626
+ else:
627
+ logger.error("Path not found: %s", path)
628
+ return []
629
+
630
+ def load_from_paths(
631
+ self, paths: list[str], recursive: bool = True
632
+ ) -> list[tuple[str, IAMPolicy]]:
633
+ """Load IAM policies from multiple files or directories.
634
+
635
+ Args:
636
+ paths: List of paths to files or directories
637
+ recursive: Whether to search subdirectories (only applies to directories)
638
+
639
+ Returns:
640
+ List of tuples (file_path, policy) from all paths combined
641
+ """
642
+ all_policies: list[tuple[str, IAMPolicy]] = []
643
+
644
+ for path in paths:
645
+ policies = self.load_from_path(path.strip(), recursive)
646
+ all_policies.extend(policies)
647
+
648
+ logger.info("Loaded %d total policies from %d path(s)", len(all_policies), len(paths))
649
+ return all_policies
650
+
651
+ def _get_policy_files(self, path: str, recursive: bool = True) -> Generator[Path, None, None]:
652
+ """Get all policy files from a path (file or directory).
653
+
654
+ This is a generator that yields file paths without loading them,
655
+ enabling memory-efficient iteration.
656
+
657
+ Args:
658
+ path: Path to file or directory
659
+ recursive: Whether to search subdirectories
660
+
661
+ Yields:
662
+ Path objects for policy files
663
+ """
664
+ path_obj = Path(path)
665
+
666
+ if path_obj.is_file():
667
+ if path_obj.suffix.lower() in self.SUPPORTED_EXTENSIONS:
668
+ yield path_obj
669
+ elif path_obj.is_dir():
670
+ pattern = "**/*" if recursive else "*"
671
+ for file_path in path_obj.glob(pattern):
672
+ # Skip directories that shouldn't be scanned
673
+ if any(skip_dir in file_path.parts for skip_dir in self.SKIP_DIRECTORIES):
674
+ continue
675
+
676
+ if file_path.is_file() and file_path.suffix.lower() in self.SUPPORTED_EXTENSIONS:
677
+ yield file_path
678
+ else:
679
+ logger.error("Path not found: %s", path)
680
+
681
+ def stream_from_path(
682
+ self, path: str, recursive: bool = True
683
+ ) -> Generator[tuple[str, IAMPolicy], None, None]:
684
+ """Stream IAM policies from a file or directory one at a time.
685
+
686
+ This is a memory-efficient alternative to load_from_path that yields
687
+ policies one at a time instead of loading all into memory.
688
+
689
+ Args:
690
+ path: Path to file or directory
691
+ recursive: Whether to search subdirectories
692
+
693
+ Yields:
694
+ Tuples of (file_path, policy) for each successfully loaded policy
695
+ """
696
+ for file_path in self._get_policy_files(path, recursive):
697
+ policy = self.load_from_file(str(file_path))
698
+ if policy:
699
+ yield (str(file_path), policy)
700
+
701
+ def stream_from_paths(
702
+ self, paths: list[str], recursive: bool = True
703
+ ) -> Generator[tuple[str, IAMPolicy], None, None]:
704
+ """Stream IAM policies from multiple paths one at a time.
705
+
706
+ This is a memory-efficient alternative to load_from_paths that yields
707
+ policies one at a time instead of loading all into memory.
708
+
709
+ Args:
710
+ paths: List of paths to files or directories
711
+ recursive: Whether to search subdirectories
712
+
713
+ Yields:
714
+ Tuples of (file_path, policy) for each successfully loaded policy
715
+ """
716
+ for path in paths:
717
+ yield from self.stream_from_path(path.strip(), recursive)
718
+
719
+ def batch_from_paths(
720
+ self, paths: list[str], batch_size: int = 10, recursive: bool = True
721
+ ) -> Generator[list[tuple[str, IAMPolicy]], None, None]:
722
+ """Load policies in batches for balanced memory usage and performance.
723
+
724
+ Args:
725
+ paths: List of paths to files or directories
726
+ batch_size: Number of policies per batch (default: 10)
727
+ recursive: Whether to search subdirectories
728
+
729
+ Yields:
730
+ Lists of (file_path, policy) tuples, up to batch_size per list
731
+ """
732
+ batch: list[tuple[str, IAMPolicy]] = []
733
+
734
+ for file_path, policy in self.stream_from_paths(paths, recursive):
735
+ batch.append((file_path, policy))
736
+
737
+ if len(batch) >= batch_size:
738
+ yield batch
739
+ batch = []
740
+
741
+ # Yield remaining policies
742
+ if batch:
743
+ yield batch
744
+
745
+ @staticmethod
746
+ def parse_policy_string(policy_json: str) -> IAMPolicy | None:
747
+ """Parse an IAM policy from a JSON string.
748
+
749
+ Args:
750
+ policy_json: JSON string containing the policy
751
+
752
+ Returns:
753
+ Parsed IAMPolicy or None if parsing fails
754
+ """
755
+ try:
756
+ data = json.loads(policy_json)
757
+ policy = IAMPolicy.model_validate(data)
758
+ logger.info("Successfully parsed policy from string")
759
+ return policy
760
+ except json.JSONDecodeError as e:
761
+ logger.error("Invalid JSON: %s", e)
762
+ return None
763
+ except ValidationError as e:
764
+ # Handle Pydantic validation errors with helpful messages
765
+ error_messages = []
766
+ for error in e.errors():
767
+ loc = ".".join(str(x) for x in error["loc"])
768
+ error_type = error["type"]
769
+
770
+ # Provide user-friendly messages for common errors
771
+ if error_type == "extra_forbidden":
772
+ # Extract the field name that has a typo
773
+ field_name = error["loc"][-1] if error["loc"] else "unknown"
774
+ error_messages.append(
775
+ f"Unknown field '{field_name}' at {loc}. "
776
+ f"This might be a typo. Did you mean 'Condition', 'Action', or 'Resource'?"
777
+ )
778
+ else:
779
+ error_messages.append(f"{loc}: {error['msg']}")
780
+
781
+ logger.error("Policy validation failed:\n %s", "\n ".join(error_messages))
782
+ return None
783
+ except Exception as e:
784
+ logger.error("Failed to parse policy string: %s", e)
785
+ return None