iam-policy-validator 1.7.2__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.8.0.dist-info}/METADATA +22 -6
  2. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.8.0.dist-info}/RECORD +38 -35
  3. iam_validator/__version__.py +1 -1
  4. iam_validator/checks/__init__.py +5 -3
  5. iam_validator/checks/action_condition_enforcement.py +61 -23
  6. iam_validator/checks/action_resource_matching.py +6 -2
  7. iam_validator/checks/action_validation.py +1 -1
  8. iam_validator/checks/condition_key_validation.py +1 -1
  9. iam_validator/checks/condition_type_mismatch.py +6 -6
  10. iam_validator/checks/policy_structure.py +577 -0
  11. iam_validator/checks/policy_type_validation.py +48 -32
  12. iam_validator/checks/principal_validation.py +65 -133
  13. iam_validator/checks/resource_validation.py +8 -8
  14. iam_validator/checks/sensitive_action.py +7 -3
  15. iam_validator/checks/service_wildcard.py +2 -2
  16. iam_validator/checks/set_operator_validation.py +11 -11
  17. iam_validator/checks/sid_uniqueness.py +8 -4
  18. iam_validator/checks/trust_policy_validation.py +512 -0
  19. iam_validator/checks/utils/sensitive_action_matcher.py +26 -26
  20. iam_validator/checks/utils/wildcard_expansion.py +1 -1
  21. iam_validator/checks/wildcard_action.py +3 -1
  22. iam_validator/checks/wildcard_resource.py +3 -1
  23. iam_validator/commands/validate.py +6 -12
  24. iam_validator/core/__init__.py +1 -2
  25. iam_validator/core/access_analyzer.py +1 -1
  26. iam_validator/core/access_analyzer_report.py +2 -2
  27. iam_validator/core/aws_fetcher.py +45 -43
  28. iam_validator/core/check_registry.py +83 -79
  29. iam_validator/core/config/condition_requirements.py +69 -17
  30. iam_validator/core/config/defaults.py +58 -52
  31. iam_validator/core/config/service_principals.py +40 -3
  32. iam_validator/core/ignore_patterns.py +297 -0
  33. iam_validator/core/models.py +15 -5
  34. iam_validator/core/policy_checks.py +31 -472
  35. iam_validator/core/policy_loader.py +27 -4
  36. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.8.0.dist-info}/WHEEL +0 -0
  37. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.8.0.dist-info}/entry_points.txt +0 -0
  38. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -9,19 +9,19 @@ This module provides comprehensive validation of IAM policies including:
9
9
 
10
10
  import asyncio
11
11
  import logging
12
- import re
13
12
  from pathlib import Path
14
13
 
15
14
  from iam_validator.core import constants
16
15
  from iam_validator.core.aws_fetcher import AWSServiceFetcher
17
- from iam_validator.core.check_registry import CheckRegistry
16
+ from iam_validator.core.check_registry import CheckRegistry, create_default_registry
17
+ from iam_validator.core.config.config_loader import ConfigLoader
18
18
  from iam_validator.core.models import (
19
19
  IAMPolicy,
20
20
  PolicyType,
21
21
  PolicyValidationResult,
22
- Statement,
23
22
  ValidationIssue,
24
23
  )
24
+ from iam_validator.core.policy_loader import PolicyLoader
25
25
 
26
26
  logger = logging.getLogger(__name__)
27
27
 
@@ -46,482 +46,23 @@ def _should_fail_on_issue(
46
46
  return issue.severity in fail_on_severities
47
47
 
48
48
 
49
- class PolicyValidator:
50
- """Validates IAM policies for correctness and security."""
51
-
52
- def __init__(self, fetcher: AWSServiceFetcher):
53
- """Initialize the validator.
54
-
55
- Args:
56
- fetcher: AWS service fetcher instance
57
- """
58
- self.fetcher = fetcher
59
- self._file_cache: dict[str, list[str]] = {}
60
-
61
- def _find_field_line(
62
- self, policy_file: str, statement_line: int, search_term: str
63
- ) -> int | None:
64
- """Find the specific line number for a field within a statement.
65
-
66
- Args:
67
- policy_file: Path to the policy file
68
- statement_line: Line number where the statement starts (Sid/first field line)
69
- search_term: The term to search for (e.g., action name, resource ARN)
70
-
71
- Returns:
72
- Line number where the field is found, or None
73
- """
74
- try:
75
- # Cache file contents
76
- if policy_file not in self._file_cache:
77
- with open(policy_file, encoding="utf-8") as f:
78
- self._file_cache[policy_file] = f.readlines()
79
-
80
- lines = self._file_cache[policy_file]
81
-
82
- # Need to go back to find the opening brace of the statement
83
- # Look backwards from statement_line to find the opening {
84
- statement_start = statement_line
85
- for i in range(statement_line - 1, max(0, statement_line - 10), -1):
86
- if "{" in lines[i]:
87
- statement_start = i + 1 # Convert to 1-indexed
88
- break
89
-
90
- # Now search from the statement opening brace
91
- brace_depth = 0
92
- in_statement = False
93
-
94
- for i, line in enumerate(lines[statement_start - 1 :], start=statement_start):
95
- # Track braces to stay within statement bounds
96
- for char in line:
97
- if char == "{":
98
- brace_depth += 1
99
- in_statement = True
100
- elif char == "}":
101
- brace_depth -= 1
102
-
103
- # Search for the term in this line
104
- if in_statement and search_term in line:
105
- return i
106
-
107
- # Exit if we've left the statement
108
- if in_statement and brace_depth == 0:
109
- break
110
-
111
- return None
112
-
113
- except Exception as e:
114
- logger.debug(f"Could not find field line in {policy_file}: {e}")
115
- return None
116
-
117
- async def validate_policy(
118
- self, policy: IAMPolicy, policy_file: str, policy_type: PolicyType = "IDENTITY_POLICY"
119
- ) -> PolicyValidationResult:
120
- """Validate a complete IAM policy.
121
-
122
- Args:
123
- policy: IAM policy to validate
124
- policy_file: Path to the policy file
125
- policy_type: Type of policy (IDENTITY_POLICY, RESOURCE_POLICY, SERVICE_CONTROL_POLICY)
126
-
127
- Returns:
128
- PolicyValidationResult with all findings
129
- """
130
- result = PolicyValidationResult(
131
- policy_file=policy_file, is_valid=True, policy_type=policy_type
132
- )
133
-
134
- # Apply automatic policy-type validation (not configurable - always runs)
135
- from iam_validator.checks import policy_type_validation
136
-
137
- policy_type_issues = await policy_type_validation.execute_policy(
138
- policy, policy_file, policy_type=policy_type
139
- )
140
- result.issues.extend(policy_type_issues)
141
-
142
- for idx, statement in enumerate(policy.statement):
143
- # Get line number for this statement
144
- statement_line = statement.line_number
145
-
146
- # Validate actions
147
- # Optimization: Batch actions by service and cache line lookups
148
- actions = statement.get_actions()
149
- non_wildcard_actions = [a for a in actions if a != "*"]
150
-
151
- # Group actions by service prefix for batch validation
152
- from collections import defaultdict
153
-
154
- actions_by_service = defaultdict(list)
155
- for action in non_wildcard_actions:
156
- if ":" in action:
157
- service_prefix = action.split(":")[0]
158
- actions_by_service[service_prefix].append(action)
159
- else:
160
- # Invalid action format, validate individually
161
- actions_by_service["_invalid"].append(action)
162
-
163
- # Pre-fetch all required services in parallel
164
- if actions_by_service:
165
- service_prefixes = [s for s in actions_by_service.keys() if s != "_invalid"]
166
- # Batch fetch services to warm up cache
167
- fetch_results = await asyncio.gather(
168
- *[self.fetcher.fetch_service_by_name(s) for s in service_prefixes],
169
- return_exceptions=True, # Don't fail if a service doesn't exist
170
- )
171
-
172
- # Log any service fetch failures for debugging
173
- # Note: Individual action validation will still work and report proper errors
174
- for i, fetch_result in enumerate(fetch_results):
175
- if isinstance(fetch_result, Exception):
176
- service_name = service_prefixes[i]
177
- logger.debug(
178
- f"Pre-fetch failed for service '{service_name}': {fetch_result}. "
179
- "Will validate actions individually."
180
- )
181
-
182
- # Cache action line lookups to avoid repeated file searches
183
- action_line_cache = {}
184
-
185
- for action in non_wildcard_actions:
186
- # Look up line number once per action (cached)
187
- if action not in action_line_cache:
188
- action_line = None
189
- if statement_line:
190
- # Search for the full action string in quotes to avoid partial matches
191
- # Try full action first (e.g., "s3:GetObject")
192
- action_line = self._find_field_line(
193
- policy_file, statement_line, f'"{action}"'
194
- )
195
- # If not found, try just the action part after colon
196
- if not action_line and ":" in action:
197
- action_name = action.split(":")[-1]
198
- action_line = self._find_field_line(
199
- policy_file, statement_line, f'"{action_name}"'
200
- )
201
- action_line_cache[action] = action_line or statement_line
202
-
203
- await self._validate_action(
204
- action,
205
- idx,
206
- statement.sid,
207
- action_line_cache[action],
208
- result,
209
- )
210
-
211
- # Validate condition keys if present
212
- # Optimization: Cache condition line lookups and batch validations
213
- if statement.condition:
214
- # Pre-filter non-wildcard actions once
215
- non_wildcard_actions = [a for a in actions if a != "*"]
216
-
217
- # Cache condition key line numbers to avoid repeated file searches
218
- condition_line_cache = {}
219
-
220
- for operator, conditions in statement.condition.items():
221
- for condition_key in conditions.keys():
222
- # Look up line number once per condition key
223
- if condition_key not in condition_line_cache:
224
- condition_line = None
225
- if statement_line:
226
- condition_line = self._find_field_line(
227
- policy_file, statement_line, condition_key
228
- )
229
- condition_line_cache[condition_key] = condition_line or statement_line
230
-
231
- # Validate condition key against all non-wildcard actions
232
- for action in non_wildcard_actions:
233
- await self._validate_condition_key(
234
- action,
235
- condition_key,
236
- idx,
237
- statement.sid,
238
- condition_line_cache[condition_key],
239
- result,
240
- )
241
-
242
- # Validate resources
243
- resources = statement.get_resources()
244
- for resource in resources:
245
- if resource != "*": # Skip wildcard resources
246
- # Try to find specific resource line
247
- resource_line = None
248
- if statement_line:
249
- resource_line = self._find_field_line(policy_file, statement_line, resource)
250
- self._validate_resource(
251
- resource,
252
- idx,
253
- statement.sid,
254
- resource_line or statement_line,
255
- result,
256
- )
257
-
258
- # Security best practice checks
259
- self._check_security_best_practices(statement, idx, statement_line, result, policy_file)
260
-
261
- # Update final validation status
262
- # Default to failing only on "error" severity for legacy validator
263
- result.is_valid = len([i for i in result.issues if _should_fail_on_issue(i)]) == 0
264
-
265
- return result
266
-
267
- async def _validate_action(
268
- self,
269
- action: str,
270
- statement_idx: int,
271
- statement_sid: str | None,
272
- line_number: int | None,
273
- result: PolicyValidationResult,
274
- ) -> None:
275
- """Validate a single action."""
276
- result.actions_checked += 1
277
-
278
- # Handle wildcard patterns like "s3:Get*"
279
- if "*" in action and action != "*":
280
- # Validate the service prefix exists
281
- try:
282
- service_prefix = action.split(":")[0]
283
- await self.fetcher.fetch_service_by_name(service_prefix)
284
- # For now, accept wildcard actions if service exists
285
- logger.debug(f"Wildcard action validated: {action}")
286
- return
287
- except Exception:
288
- result.issues.append(
289
- ValidationIssue(
290
- severity="warning",
291
- statement_sid=statement_sid,
292
- statement_index=statement_idx,
293
- issue_type="wildcard_action",
294
- message=f"Wildcard action '{action}' uses unverified service",
295
- action=action,
296
- suggestion="Consider being more specific with action permissions",
297
- line_number=line_number,
298
- )
299
- )
300
- return
301
-
302
- is_valid, error_msg, is_wildcard = await self.fetcher.validate_action(action)
303
-
304
- if not is_valid:
305
- result.issues.append(
306
- ValidationIssue(
307
- severity="error",
308
- statement_sid=statement_sid,
309
- statement_index=statement_idx,
310
- issue_type="invalid_action",
311
- message=error_msg or f"Invalid action: {action}",
312
- action=action,
313
- line_number=line_number,
314
- )
315
- )
316
-
317
- async def _validate_condition_key(
318
- self,
319
- action: str,
320
- condition_key: str,
321
- statement_idx: int,
322
- statement_sid: str | None,
323
- line_number: int | None,
324
- result: PolicyValidationResult,
325
- ) -> None:
326
- """Validate a condition key against an action."""
327
- result.condition_keys_checked += 1
328
-
329
- is_valid, error_msg = await self.fetcher.validate_condition_key(action, condition_key)
330
-
331
- if not is_valid:
332
- result.issues.append(
333
- ValidationIssue(
334
- severity="warning",
335
- statement_sid=statement_sid,
336
- statement_index=statement_idx,
337
- issue_type="invalid_condition_key",
338
- message=error_msg or f"Invalid condition key: {condition_key}",
339
- action=action,
340
- condition_key=condition_key,
341
- line_number=line_number,
342
- )
343
- )
344
-
345
- def _validate_resource(
346
- self,
347
- resource: str,
348
- statement_idx: int,
349
- statement_sid: str | None,
350
- line_number: int | None,
351
- result: PolicyValidationResult,
352
- ) -> None:
353
- """Validate resource ARN format."""
354
- result.resources_checked += 1
355
-
356
- # Basic ARN format: arn:partition:service:region:account-id:resource-type/resource-id
357
- arn_pattern = r"^arn:(aws|aws-cn|aws-us-gov|aws-eusc|aws-iso|aws-iso-b|aws-iso-e|aws-iso-f):[a-z0-9\-]+:[a-z0-9\-]*:[0-9]*:.+$"
358
-
359
- if not re.match(arn_pattern, resource, re.IGNORECASE):
360
- result.issues.append(
361
- ValidationIssue(
362
- severity="error",
363
- statement_sid=statement_sid,
364
- statement_index=statement_idx,
365
- issue_type="invalid_resource",
366
- message=f"Invalid ARN format: {resource}",
367
- resource=resource,
368
- suggestion="ARN should follow format: arn:partition:service:region:account-id:resource",
369
- line_number=line_number,
370
- )
371
- )
372
-
373
- def _check_security_best_practices(
374
- self,
375
- statement: Statement,
376
- statement_idx: int,
377
- line_number: int | None,
378
- result: PolicyValidationResult,
379
- policy_file: str,
380
- ) -> None:
381
- """Check for security best practices."""
382
-
383
- # Check for overly permissive wildcards
384
- actions = statement.get_actions()
385
- resources = statement.get_resources()
386
-
387
- if statement.effect == "Allow":
388
- # Check for "*" in actions
389
- if "*" in actions:
390
- # Try to find "Action" field line
391
- action_field_line = None
392
- if line_number:
393
- action_field_line = self._find_field_line(policy_file, line_number, '"Action"')
394
- result.issues.append(
395
- ValidationIssue(
396
- severity="warning",
397
- statement_sid=statement.sid,
398
- statement_index=statement_idx,
399
- issue_type="overly_permissive",
400
- message="Statement allows all actions (*)",
401
- suggestion="Consider limiting to specific actions needed",
402
- line_number=action_field_line or line_number,
403
- )
404
- )
405
-
406
- # Check for "*" in resources
407
- if "*" in resources:
408
- # Try to find "Resource" field line
409
- resource_field_line = None
410
- if line_number:
411
- resource_field_line = self._find_field_line(
412
- policy_file, line_number, '"Resource"'
413
- )
414
- result.issues.append(
415
- ValidationIssue(
416
- severity="warning",
417
- statement_sid=statement.sid,
418
- statement_index=statement_idx,
419
- issue_type="overly_permissive",
420
- message="Statement applies to all resources (*)",
421
- suggestion="Consider limiting to specific resources",
422
- line_number=resource_field_line or line_number,
423
- )
424
- )
425
-
426
- # Check for both wildcards
427
- if "*" in actions and "*" in resources:
428
- result.issues.append(
429
- ValidationIssue(
430
- severity="error",
431
- statement_sid=statement.sid,
432
- statement_index=statement_idx,
433
- issue_type="security_risk",
434
- message="Statement allows all actions on all resources - CRITICAL SECURITY RISK",
435
- suggestion="This grants full administrative access. Restrict to specific actions and resources.",
436
- line_number=line_number,
437
- )
438
- )
439
-
440
- # Check for missing conditions on sensitive actions
441
- sensitive_actions = [
442
- "iam:PassRole",
443
- "iam:CreateUser",
444
- "iam:CreateRole",
445
- "iam:PutUserPolicy",
446
- "iam:PutRolePolicy",
447
- "s3:DeleteBucket",
448
- "s3:PutBucketPolicy",
449
- "ec2:TerminateInstances",
450
- ]
451
-
452
- for action in actions:
453
- if action in sensitive_actions and not statement.condition:
454
- # Try to find specific action line
455
- action_line = None
456
- if line_number:
457
- action_name = action.split(":")[-1] if ":" in action else action
458
- action_line = self._find_field_line(policy_file, line_number, action_name)
459
- result.issues.append(
460
- ValidationIssue(
461
- severity="warning",
462
- statement_sid=statement.sid,
463
- statement_index=statement_idx,
464
- issue_type="missing_condition",
465
- message=f"Sensitive action '{action}' has no conditions",
466
- action=action,
467
- suggestion="Consider adding conditions to restrict when this action can be performed",
468
- line_number=action_line or line_number,
469
- )
470
- )
471
-
472
-
473
49
  async def validate_policies(
474
- policies: list[tuple[str, IAMPolicy]],
50
+ policies: list[tuple[str, IAMPolicy]] | list[tuple[str, IAMPolicy, dict]],
475
51
  config_path: str | None = None,
476
- use_registry: bool = True,
477
52
  custom_checks_dir: str | None = None,
478
53
  policy_type: PolicyType = "IDENTITY_POLICY",
479
54
  ) -> list[PolicyValidationResult]:
480
55
  """Validate multiple policies concurrently.
481
56
 
482
57
  Args:
483
- policies: List of (file_path, policy) tuples
58
+ policies: List of (file_path, policy) or (file_path, policy, raw_dict) tuples
484
59
  config_path: Optional path to configuration file
485
- use_registry: If True, use CheckRegistry system; if False, use legacy validator
486
60
  custom_checks_dir: Optional path to directory containing custom checks for auto-discovery
487
61
  policy_type: Type of policy (IDENTITY_POLICY, RESOURCE_POLICY, SERVICE_CONTROL_POLICY)
488
62
 
489
63
  Returns:
490
64
  List of validation results
491
65
  """
492
- if not use_registry:
493
- # Legacy path - use old PolicyValidator
494
- # Load config for cache settings even in legacy mode
495
- from iam_validator.core.config.config_loader import ConfigLoader
496
-
497
- config = ConfigLoader.load_config(explicit_path=config_path, allow_missing=True)
498
- cache_enabled = config.get_setting("cache_enabled", True)
499
- cache_ttl_hours = config.get_setting("cache_ttl_hours", constants.DEFAULT_CACHE_TTL_HOURS)
500
- cache_directory = config.get_setting("cache_directory", None)
501
- aws_services_dir = config.get_setting("aws_services_dir", None)
502
- cache_ttl_seconds = cache_ttl_hours * constants.SECONDS_PER_HOUR
503
-
504
- async with AWSServiceFetcher(
505
- enable_cache=cache_enabled,
506
- cache_ttl=cache_ttl_seconds,
507
- cache_dir=cache_directory,
508
- aws_services_dir=aws_services_dir,
509
- ) as fetcher:
510
- validator = PolicyValidator(fetcher)
511
-
512
- tasks = [
513
- validator.validate_policy(policy, file_path, policy_type)
514
- for file_path, policy in policies
515
- ]
516
-
517
- results = await asyncio.gather(*tasks)
518
-
519
- return list(results)
520
-
521
- # New path - use CheckRegistry system
522
- from iam_validator.core.check_registry import create_default_registry
523
- from iam_validator.core.config.config_loader import ConfigLoader
524
-
525
66
  # Load configuration
526
67
  config = ConfigLoader.load_config(explicit_path=config_path, allow_missing=True)
527
68
 
@@ -581,9 +122,15 @@ async def validate_policies(
581
122
  ) as fetcher:
582
123
  tasks = [
583
124
  _validate_policy_with_registry(
584
- policy, file_path, registry, fetcher, fail_on_severities, policy_type
125
+ item[1], # policy
126
+ item[0], # file_path
127
+ registry,
128
+ fetcher,
129
+ fail_on_severities,
130
+ policy_type,
131
+ item[2] if len(item) == 3 else None, # raw_dict (optional)
585
132
  )
586
- for file_path, policy in policies
133
+ for item in policies
587
134
  ]
588
135
 
589
136
  results = await asyncio.gather(*tasks)
@@ -598,6 +145,7 @@ async def _validate_policy_with_registry(
598
145
  fetcher: AWSServiceFetcher,
599
146
  fail_on_severities: list[str] | None = None,
600
147
  policy_type: PolicyType = "IDENTITY_POLICY",
148
+ raw_policy_dict: dict | None = None,
601
149
  ) -> PolicyValidationResult:
602
150
  """Validate a single policy using the CheckRegistry system.
603
151
 
@@ -608,34 +156,45 @@ async def _validate_policy_with_registry(
608
156
  fetcher: AWS service fetcher instance
609
157
  fail_on_severities: List of severity levels that should cause validation to fail
610
158
  policy_type: Type of policy (IDENTITY_POLICY, RESOURCE_POLICY, SERVICE_CONTROL_POLICY)
159
+ raw_policy_dict: Raw policy dictionary for structural validation (optional, will be loaded if not provided)
611
160
 
612
161
  Returns:
613
162
  PolicyValidationResult with all findings
614
163
  """
615
164
  result = PolicyValidationResult(policy_file=policy_file, is_valid=True, policy_type=policy_type)
616
165
 
166
+ # Load raw dict if not provided (for structural validation)
167
+ if raw_policy_dict is None:
168
+ loader = PolicyLoader()
169
+ loaded_result = loader.load_from_file(policy_file, return_raw_dict=True)
170
+ if loaded_result and isinstance(loaded_result, tuple):
171
+ raw_policy_dict = loaded_result[1]
172
+
617
173
  # Apply automatic policy-type validation (not configurable - always runs)
618
- from iam_validator.checks import policy_type_validation
174
+ # Note: Import here to avoid circular import (policy_checks -> checks -> sdk -> policy_checks)
175
+ from iam_validator.checks import ( # pylint: disable=import-outside-toplevel
176
+ policy_type_validation,
177
+ )
619
178
 
620
179
  policy_type_issues = await policy_type_validation.execute_policy(
621
180
  policy, policy_file, policy_type=policy_type
622
181
  )
623
- result.issues.extend(policy_type_issues)
182
+ result.issues.extend(policy_type_issues) # pylint: disable=no-member
624
183
 
625
184
  # Run policy-level checks first (checks that need to see the entire policy)
626
185
  # These checks examine relationships between statements, not individual statements
627
186
  policy_level_issues = await registry.execute_policy_checks(
628
- policy, policy_file, fetcher, policy_type
187
+ policy, policy_file, fetcher, policy_type, raw_policy_dict=raw_policy_dict
629
188
  )
630
- result.issues.extend(policy_level_issues)
189
+ result.issues.extend(policy_level_issues) # pylint: disable=no-member
631
190
 
632
191
  # Execute all statement-level checks for each statement
633
- for idx, statement in enumerate(policy.statement):
192
+ for idx, statement in enumerate(policy.statement or []):
634
193
  # Execute all registered checks in parallel (with ignore_patterns filtering)
635
194
  issues = await registry.execute_checks_parallel(statement, idx, fetcher, policy_file)
636
195
 
637
196
  # Add issues to result
638
- result.issues.extend(issues)
197
+ result.issues.extend(issues) # pylint: disable=no-member
639
198
 
640
199
  # Update counters (approximate based on what was checked)
641
200
  actions = statement.get_actions()
@@ -29,6 +29,7 @@ import json
29
29
  import logging
30
30
  from collections.abc import Generator
31
31
  from pathlib import Path
32
+ from typing import overload
32
33
 
33
34
  import yaml
34
35
  from pydantic import ValidationError
@@ -45,6 +46,8 @@ class PolicyLoader:
45
46
  """
46
47
 
47
48
  SUPPORTED_EXTENSIONS = {".json", ".yaml", ".yml"}
49
+ # Directories to skip when scanning recursively (cache, build artifacts, etc.)
50
+ SKIP_DIRECTORIES = {".cache", ".git", "node_modules", "__pycache__", ".venv", "venv"}
48
51
 
49
52
  def __init__(self, max_file_size_mb: int = 100) -> None:
50
53
  """Initialize the policy loader.
@@ -148,14 +151,26 @@ class PolicyLoader:
148
151
  logger.error("Failed to check file size for %s: %s", path, e)
149
152
  return False
150
153
 
151
- def load_from_file(self, file_path: str) -> IAMPolicy | None:
154
+ @overload
155
+ def load_from_file(self, file_path: str, return_raw_dict: bool = False) -> IAMPolicy | None: ...
156
+
157
+ @overload
158
+ def load_from_file(
159
+ self, file_path: str, return_raw_dict: bool = True
160
+ ) -> tuple[IAMPolicy, dict] | None: ...
161
+
162
+ def load_from_file(
163
+ self, file_path: str, return_raw_dict: bool = False
164
+ ) -> IAMPolicy | tuple[IAMPolicy, dict] | None:
152
165
  """Load a single IAM policy from a file.
153
166
 
154
167
  Args:
155
168
  file_path: Path to the policy file
169
+ return_raw_dict: If True, return tuple of (policy, raw_dict) for validation
156
170
 
157
171
  Returns:
158
- Parsed IAMPolicy or None if loading fails
172
+ Parsed IAMPolicy, or tuple of (IAMPolicy, raw_dict) if return_raw_dict=True,
173
+ or None if loading fails
159
174
  """
160
175
  path = Path(file_path)
161
176
 
@@ -196,12 +211,12 @@ class PolicyLoader:
196
211
 
197
212
  # Attach line numbers to statements
198
213
  if statement_line_numbers:
199
- for idx, statement in enumerate(policy.statement):
214
+ for idx, statement in enumerate(policy.statement or []):
200
215
  if idx < len(statement_line_numbers):
201
216
  statement.line_number = statement_line_numbers[idx]
202
217
 
203
218
  logger.info("Successfully loaded policy from %s", file_path)
204
- return policy
219
+ return (policy, data) if return_raw_dict else policy
205
220
 
206
221
  except json.JSONDecodeError as e:
207
222
  error_msg = f"Invalid JSON: {e}"
@@ -270,6 +285,10 @@ class PolicyLoader:
270
285
  pattern = "**/*" if recursive else "*"
271
286
 
272
287
  for file_path in path.glob(pattern):
288
+ # Skip directories that shouldn't be scanned
289
+ if any(skip_dir in file_path.parts for skip_dir in self.SKIP_DIRECTORIES):
290
+ continue
291
+
273
292
  if file_path.is_file() and file_path.suffix.lower() in self.SUPPORTED_EXTENSIONS:
274
293
  policy = self.load_from_file(str(file_path))
275
294
  if policy:
@@ -341,6 +360,10 @@ class PolicyLoader:
341
360
  elif path_obj.is_dir():
342
361
  pattern = "**/*" if recursive else "*"
343
362
  for file_path in path_obj.glob(pattern):
363
+ # Skip directories that shouldn't be scanned
364
+ if any(skip_dir in file_path.parts for skip_dir in self.SKIP_DIRECTORIES):
365
+ continue
366
+
344
367
  if file_path.is_file() and file_path.suffix.lower() in self.SUPPORTED_EXTENSIONS:
345
368
  yield file_path
346
369
  else: