iam-policy-validator 1.13.1__py3-none-any.whl → 1.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/METADATA +1 -1
  2. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/RECORD +45 -39
  3. iam_validator/__version__.py +1 -1
  4. iam_validator/checks/action_condition_enforcement.py +6 -0
  5. iam_validator/checks/action_resource_matching.py +12 -12
  6. iam_validator/checks/action_validation.py +1 -0
  7. iam_validator/checks/condition_key_validation.py +2 -0
  8. iam_validator/checks/condition_type_mismatch.py +3 -0
  9. iam_validator/checks/full_wildcard.py +1 -0
  10. iam_validator/checks/mfa_condition_check.py +2 -0
  11. iam_validator/checks/policy_structure.py +9 -0
  12. iam_validator/checks/policy_type_validation.py +11 -0
  13. iam_validator/checks/principal_validation.py +5 -0
  14. iam_validator/checks/resource_validation.py +4 -0
  15. iam_validator/checks/sensitive_action.py +1 -0
  16. iam_validator/checks/service_wildcard.py +6 -3
  17. iam_validator/checks/set_operator_validation.py +3 -0
  18. iam_validator/checks/sid_uniqueness.py +2 -0
  19. iam_validator/checks/trust_policy_validation.py +3 -0
  20. iam_validator/checks/utils/__init__.py +16 -0
  21. iam_validator/checks/utils/action_parser.py +149 -0
  22. iam_validator/checks/wildcard_action.py +1 -0
  23. iam_validator/checks/wildcard_resource.py +231 -4
  24. iam_validator/commands/analyze.py +19 -1
  25. iam_validator/commands/completion.py +6 -2
  26. iam_validator/commands/validate.py +231 -12
  27. iam_validator/core/aws_service/fetcher.py +21 -9
  28. iam_validator/core/codeowners.py +245 -0
  29. iam_validator/core/config/check_documentation.py +390 -0
  30. iam_validator/core/config/config_loader.py +199 -0
  31. iam_validator/core/config/defaults.py +25 -0
  32. iam_validator/core/constants.py +1 -0
  33. iam_validator/core/diff_parser.py +8 -4
  34. iam_validator/core/finding_fingerprint.py +131 -0
  35. iam_validator/core/formatters/sarif.py +370 -128
  36. iam_validator/core/ignore_processor.py +309 -0
  37. iam_validator/core/ignored_findings.py +400 -0
  38. iam_validator/core/models.py +54 -4
  39. iam_validator/core/policy_loader.py +313 -4
  40. iam_validator/core/pr_commenter.py +223 -22
  41. iam_validator/core/report.py +22 -6
  42. iam_validator/integrations/github_integration.py +881 -123
  43. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/WHEEL +0 -0
  44. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/entry_points.txt +0 -0
  45. {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/licenses/LICENSE +0 -0
@@ -64,6 +64,10 @@ Examples:
64
64
 
65
65
  # Only GitHub Actions job summary
66
66
  iam-validator validate --path ./policies/ --github-summary
67
+
68
+ # CI mode: show enhanced output in logs, save JSON to file
69
+ iam-validator validate --path ./policies/ --ci --github-review
70
+ iam-validator validate --path ./policies/ --ci --ci-output results.json
67
71
  """
68
72
 
69
73
  def add_arguments(self, parser: argparse.ArgumentParser) -> None:
@@ -198,6 +202,33 @@ Examples:
198
202
  help="Show Issue Severity Breakdown section in enhanced format output",
199
203
  )
200
204
 
205
+ parser.add_argument(
206
+ "--allow-owner-ignore",
207
+ action="store_true",
208
+ default=True,
209
+ help="Allow CODEOWNERS to ignore findings by replying 'ignore' to review comments (default: enabled)",
210
+ )
211
+
212
+ parser.add_argument(
213
+ "--no-owner-ignore",
214
+ action="store_true",
215
+ help="Disable CODEOWNERS ignore feature",
216
+ )
217
+
218
+ parser.add_argument(
219
+ "--ci",
220
+ action="store_true",
221
+ help="CI mode: print enhanced console output for visibility in job logs, "
222
+ "and write JSON report to file (use --ci-output to specify filename, "
223
+ "defaults to 'validation-report.json').",
224
+ )
225
+
226
+ parser.add_argument(
227
+ "--ci-output",
228
+ default="validation-report.json",
229
+ help="Output file for JSON report in CI mode (default: validation-report.json)",
230
+ )
231
+
201
232
  async def execute(self, args: argparse.Namespace) -> int:
202
233
  """Execute the validate command."""
203
234
  # Check if streaming mode is enabled
@@ -266,8 +297,15 @@ Examples:
266
297
  generator = ReportGenerator()
267
298
  report = generator.generate_report(results, parsing_errors=loader.parsing_errors)
268
299
 
269
- # Output results
270
- if args.format is None:
300
+ # Handle --ci flag: show enhanced output in console, write JSON to file
301
+ ci_mode = getattr(args, "ci", False)
302
+ if ci_mode:
303
+ # CI mode: enhanced output to console, JSON to file
304
+ self._print_ci_console_output(report, generator)
305
+ ci_output_file = getattr(args, "ci_output", "validation-report.json")
306
+ generator.save_json_report(report, ci_output_file)
307
+ logging.info(f"Saved JSON report to {ci_output_file}")
308
+ elif args.format is None:
271
309
  # Default: use classic console output (direct Rich printing)
272
310
  generator.print_console_report(report)
273
311
  elif args.format == "json":
@@ -302,16 +340,26 @@ Examples:
302
340
  from iam_validator.core.config.config_loader import ConfigLoader
303
341
  from iam_validator.core.pr_commenter import PRCommenter
304
342
 
305
- # Load config to get fail_on_severity and severity_labels settings
343
+ # Load config to get fail_on_severity, severity_labels, and ignore settings
306
344
  config = ConfigLoader.load_config(config_path)
307
345
  fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
308
346
  severity_labels = config.get_setting("severity_labels", {})
309
347
 
348
+ # Get ignore settings from config, but CLI flag can override
349
+ ignore_settings = config.get_setting("ignore_settings", {})
350
+ enable_ignore = ignore_settings.get("enabled", True)
351
+ # CLI --no-owner-ignore takes precedence
352
+ if getattr(args, "no_owner_ignore", False):
353
+ enable_ignore = False
354
+ allowed_users = ignore_settings.get("allowed_users", [])
355
+
310
356
  async with GitHubIntegration() as github:
311
357
  commenter = PRCommenter(
312
358
  github,
313
359
  fail_on_severities=fail_on_severities,
314
360
  severity_labels=severity_labels,
361
+ enable_codeowners_ignore=enable_ignore,
362
+ allowed_ignore_users=allowed_users,
315
363
  )
316
364
  success = await commenter.post_findings_to_pr(
317
365
  report,
@@ -348,10 +396,8 @@ Examples:
348
396
 
349
397
  all_results = []
350
398
  total_processed = 0
351
-
352
- # Clean up old review comments at the start (before posting any new ones)
353
- if getattr(args, "github_review", False):
354
- await self._cleanup_old_comments(args)
399
+ # Track all validated files across the streaming session for final cleanup
400
+ all_validated_files: set[str] = set()
355
401
 
356
402
  logging.info(f"Starting streaming validation from {len(args.paths)} path(s)")
357
403
 
@@ -374,6 +420,11 @@ Examples:
374
420
  result = results[0]
375
421
  all_results.append(result)
376
422
 
423
+ # Track validated file (convert to relative path for cleanup)
424
+ relative_path = self._make_relative_path(file_path)
425
+ if relative_path:
426
+ all_validated_files.add(relative_path)
427
+
377
428
  # Print immediate feedback for this file
378
429
  if args.format == "console":
379
430
  if result.is_valid:
@@ -383,6 +434,8 @@ Examples:
383
434
  # Note: validation_success tracks overall status
384
435
 
385
436
  # Post to GitHub immediately for this file (progressive PR comments)
437
+ # skip_cleanup=True because we process files one at a time and don't want
438
+ # to delete comments from files processed earlier. Cleanup runs at the end.
386
439
  if getattr(args, "github_review", False):
387
440
  await self._post_file_review(result, args)
388
441
 
@@ -392,11 +445,23 @@ Examples:
392
445
 
393
446
  logging.info(f"\nCompleted validation of {total_processed} policies")
394
447
 
448
+ # Run final cleanup after all files are processed
449
+ # This uses the full report to know all current findings and deletes stale comments
450
+ if getattr(args, "github_review", False):
451
+ await self._run_final_review_cleanup(args, all_results, all_validated_files)
452
+
395
453
  # Generate final summary report
396
454
  report = generator.generate_report(all_results)
397
455
 
398
- # Output final results
399
- if args.format == "console":
456
+ # Handle --ci flag: show enhanced output in console, write JSON to file
457
+ ci_mode = getattr(args, "ci", False)
458
+ if ci_mode:
459
+ # CI mode: enhanced output to console, JSON to file
460
+ self._print_ci_console_output(report, generator)
461
+ ci_output_file = getattr(args, "ci_output", "validation-report.json")
462
+ generator.save_json_report(report, ci_output_file)
463
+ logging.info(f"Saved JSON report to {ci_output_file}")
464
+ elif args.format == "console":
400
465
  # Classic console output (direct Rich printing from report.py)
401
466
  generator.print_console_report(report)
402
467
  elif args.format == "json":
@@ -431,16 +496,26 @@ Examples:
431
496
  from iam_validator.core.config.config_loader import ConfigLoader
432
497
  from iam_validator.core.pr_commenter import PRCommenter
433
498
 
434
- # Load config to get fail_on_severity and severity_labels settings
499
+ # Load config to get fail_on_severity, severity_labels, and ignore settings
435
500
  config = ConfigLoader.load_config(config_path)
436
501
  fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
437
502
  severity_labels = config.get_setting("severity_labels", {})
438
503
 
504
+ # Get ignore settings from config, but CLI flag can override
505
+ ignore_settings = config.get_setting("ignore_settings", {})
506
+ enable_ignore = ignore_settings.get("enabled", True)
507
+ # CLI --no-owner-ignore takes precedence
508
+ if getattr(args, "no_owner_ignore", False):
509
+ enable_ignore = False
510
+ allowed_users = ignore_settings.get("allowed_users", [])
511
+
439
512
  async with GitHubIntegration() as github:
440
513
  commenter = PRCommenter(
441
514
  github,
442
515
  fail_on_severities=fail_on_severities,
443
516
  severity_labels=severity_labels,
517
+ enable_codeowners_ignore=enable_ignore,
518
+ allowed_ignore_users=allowed_users,
444
519
  )
445
520
  success = await commenter.post_findings_to_pr(
446
521
  report,
@@ -487,24 +562,34 @@ Examples:
487
562
  if not github.is_configured():
488
563
  return
489
564
 
490
- # Load config to get fail_on_severity setting
565
+ # Load config to get fail_on_severity and ignore settings
491
566
  config_path = getattr(args, "config", None)
492
567
  config = ConfigLoader.load_config(config_path)
493
568
  fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
494
569
 
570
+ # Get ignore settings from config, but CLI flag can override
571
+ ignore_settings = config.get_setting("ignore_settings", {})
572
+ enable_ignore = ignore_settings.get("enabled", True)
573
+ # CLI --no-owner-ignore takes precedence
574
+ if getattr(args, "no_owner_ignore", False):
575
+ enable_ignore = False
576
+ allowed_users = ignore_settings.get("allowed_users", [])
577
+
495
578
  # In streaming mode, don't cleanup comments (we want to keep earlier files)
496
579
  # Cleanup will happen once at the end
497
580
  commenter = PRCommenter(
498
581
  github,
499
582
  cleanup_old_comments=False,
500
583
  fail_on_severities=fail_on_severities,
584
+ enable_codeowners_ignore=enable_ignore,
585
+ allowed_ignore_users=allowed_users,
501
586
  )
502
587
 
503
588
  # Create a mini-report for just this file
504
589
  generator = ReportGenerator()
505
590
  mini_report = generator.generate_report([result])
506
591
 
507
- # Post line-specific comments
592
+ # Post line-specific comments (skip cleanup - runs at end of streaming)
508
593
  await commenter.post_findings_to_pr(
509
594
  mini_report,
510
595
  create_review=True,
@@ -513,6 +598,109 @@ Examples:
513
598
  except Exception as e:
514
599
  logging.warning(f"Failed to post review for {result.policy_file}: {e}")
515
600
 
601
+ def _make_relative_path(self, file_path: str) -> str | None:
602
+ """Convert absolute path to relative path for GitHub.
603
+
604
+ Args:
605
+ file_path: Absolute or relative path to file
606
+
607
+ Returns:
608
+ Relative path from repository root, or None if cannot be determined
609
+ """
610
+ from pathlib import Path
611
+
612
+ # If already relative, use as-is
613
+ if not os.path.isabs(file_path):
614
+ return file_path
615
+
616
+ # Try to get workspace path from environment
617
+ workspace = os.getenv("GITHUB_WORKSPACE")
618
+ if workspace:
619
+ try:
620
+ abs_file_path = Path(file_path).resolve()
621
+ workspace_path = Path(workspace).resolve()
622
+
623
+ if abs_file_path.is_relative_to(workspace_path):
624
+ relative = abs_file_path.relative_to(workspace_path)
625
+ return str(relative).replace("\\", "/")
626
+ except (ValueError, OSError) as exc:
627
+ logging.debug(f"Could not make path relative to GitHub workspace: {exc}")
628
+
629
+ # Fallback: try current working directory
630
+ try:
631
+ cwd = Path.cwd()
632
+ abs_file_path = Path(file_path).resolve()
633
+ if abs_file_path.is_relative_to(cwd):
634
+ relative = abs_file_path.relative_to(cwd)
635
+ return str(relative).replace("\\", "/")
636
+ except (ValueError, OSError) as exc:
637
+ logging.debug(f"Could not make path relative to cwd: {exc}")
638
+
639
+ return None
640
+
641
+ async def _run_final_review_cleanup(
642
+ self,
643
+ args: argparse.Namespace,
644
+ all_results: list,
645
+ all_validated_files: set[str],
646
+ ) -> None:
647
+ """Run final cleanup after all files are processed in streaming mode.
648
+
649
+ This deletes stale comments for findings that are no longer present,
650
+ using the complete set of validated files and current findings.
651
+
652
+ Args:
653
+ args: Command-line arguments
654
+ all_results: All validation results from the streaming session
655
+ all_validated_files: Set of all validated file paths (relative)
656
+ """
657
+ try:
658
+ from iam_validator.core.config.config_loader import ConfigLoader
659
+ from iam_validator.core.pr_commenter import PRCommenter
660
+
661
+ async with GitHubIntegration() as github:
662
+ if not github.is_configured():
663
+ return
664
+
665
+ # Load config
666
+ config_path = getattr(args, "config", None)
667
+ config = ConfigLoader.load_config(config_path)
668
+ fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
669
+
670
+ # Get ignore settings
671
+ ignore_settings = config.get_setting("ignore_settings", {})
672
+ enable_ignore = ignore_settings.get("enabled", True)
673
+ if getattr(args, "no_owner_ignore", False):
674
+ enable_ignore = False
675
+ allowed_users = ignore_settings.get("allowed_users", [])
676
+
677
+ # Create commenter WITH cleanup enabled for the final pass
678
+ commenter = PRCommenter(
679
+ github,
680
+ cleanup_old_comments=True, # Enable cleanup for final pass
681
+ fail_on_severities=fail_on_severities,
682
+ enable_codeowners_ignore=enable_ignore,
683
+ allowed_ignore_users=allowed_users,
684
+ )
685
+
686
+ # Create a full report with all results
687
+ generator = ReportGenerator()
688
+ full_report = generator.generate_report(all_results)
689
+
690
+ # Post with create_review=True to run the full update/create/delete logic
691
+ # but pass all_validated_files so cleanup knows the full scope
692
+ logging.info("Running final comment cleanup...")
693
+ await commenter.post_findings_to_pr(
694
+ full_report,
695
+ create_review=True,
696
+ add_summary_comment=False,
697
+ manage_labels=False, # Labels are managed separately
698
+ process_ignores=False, # Already processed per-file
699
+ )
700
+
701
+ except Exception as e:
702
+ logging.warning(f"Failed to run final review cleanup: {e}")
703
+
516
704
  def _write_github_actions_summary(self, report: ValidationReport) -> None:
517
705
  """Write a high-level summary to GitHub Actions job summary.
518
706
 
@@ -609,3 +797,34 @@ Examples:
609
797
 
610
798
  except Exception as e:
611
799
  logging.warning(f"Failed to write GitHub Actions summary: {e}")
800
+
801
+ def _print_ci_console_output(
802
+ self, report: ValidationReport, generator: ReportGenerator
803
+ ) -> None:
804
+ """Print enhanced console output for CI visibility.
805
+
806
+ This shows validation results in the CI job logs in a human-readable format.
807
+ JSON output is written to a separate file (specified by --ci-output).
808
+
809
+ Args:
810
+ report: Validation report to print
811
+ generator: ReportGenerator instance
812
+ """
813
+ # Generate enhanced format output with summary and severity breakdown
814
+ try:
815
+ enhanced_output = generator.format_report(
816
+ report,
817
+ "enhanced",
818
+ show_summary=True,
819
+ show_severity_breakdown=True,
820
+ )
821
+ print(enhanced_output)
822
+
823
+ except Exception as e:
824
+ # Fallback to basic summary if enhanced format fails
825
+ logging.warning(f"Failed to generate enhanced output: {e}")
826
+ print("\nValidation Summary:")
827
+ print(f" Total policies: {report.total_policies}")
828
+ print(f" Valid: {report.valid_policies}")
829
+ print(f" Invalid: {report.invalid_policies}")
830
+ print(f" Total issues: {report.total_issues}\n")
@@ -100,6 +100,9 @@ class AWSServiceFetcher:
100
100
  "wafv2",
101
101
  ]
102
102
 
103
+ # Default concurrency limits
104
+ DEFAULT_MAX_CONCURRENT_REQUESTS = 10
105
+
103
106
  def __init__(
104
107
  self,
105
108
  timeout: float = constants.DEFAULT_HTTP_TIMEOUT_SECONDS,
@@ -112,6 +115,7 @@ class AWSServiceFetcher:
112
115
  prefetch_common: bool = True,
113
116
  cache_dir: Path | str | None = None,
114
117
  aws_services_dir: Path | str | None = None,
118
+ max_concurrent_requests: int = DEFAULT_MAX_CONCURRENT_REQUESTS,
115
119
  ):
116
120
  """Initialize AWS service fetcher.
117
121
 
@@ -130,10 +134,13 @@ class AWSServiceFetcher:
130
134
  instead of making API calls. Directory should contain:
131
135
  - _services.json: List of all services
132
136
  - {service}.json: Individual service files (e.g., s3.json)
137
+ max_concurrent_requests: Maximum number of concurrent HTTP requests (default: 10)
133
138
  """
134
139
  self.prefetch_common = prefetch_common
135
140
  self.aws_services_dir = Path(aws_services_dir) if aws_services_dir else None
136
141
  self._prefetched_services: set[str] = set()
142
+ # Semaphore for limiting concurrent requests
143
+ self._request_semaphore = asyncio.Semaphore(max_concurrent_requests)
137
144
 
138
145
  # Initialize storage component
139
146
  self._storage = ServiceFileStorage(
@@ -343,7 +350,10 @@ class AWSServiceFetcher:
343
350
  raise ValueError(f"Service `{service_name}` not found")
344
351
 
345
352
  async def fetch_multiple_services(self, service_names: list[str]) -> dict[str, ServiceDetail]:
346
- """Fetch multiple services concurrently with optimized batching.
353
+ """Fetch multiple services concurrently with controlled parallelism.
354
+
355
+ Uses a semaphore to limit concurrent requests and prevent overwhelming
356
+ the AWS service reference API.
347
357
 
348
358
  Args:
349
359
  service_names: List of service names to fetch
@@ -361,14 +371,16 @@ class AWSServiceFetcher:
361
371
  """
362
372
 
363
373
  async def fetch_single(name: str) -> tuple[str, ServiceDetail]:
364
- try:
365
- detail = await self.fetch_service_by_name(name)
366
- return name, detail
367
- except Exception as e: # pylint: disable=broad-exception-caught
368
- logger.error(f"Failed to fetch service {name}: {e}")
369
- raise
370
-
371
- # Fetch all services concurrently
374
+ # Use semaphore to limit concurrent requests
375
+ async with self._request_semaphore:
376
+ try:
377
+ detail = await self.fetch_service_by_name(name)
378
+ return name, detail
379
+ except Exception as e: # pylint: disable=broad-exception-caught
380
+ logger.error(f"Failed to fetch service {name}: {e}")
381
+ raise
382
+
383
+ # Fetch all services concurrently (semaphore controls parallelism)
372
384
  tasks = [fetch_single(name) for name in service_names]
373
385
  results = await asyncio.gather(*tasks, return_exceptions=True)
374
386
 
@@ -0,0 +1,245 @@
1
+ """CODEOWNERS file parser for GitHub repositories.
2
+
3
+ This module provides functionality to parse GitHub CODEOWNERS files and
4
+ determine which users/teams own specific files. Used to authorize users
5
+ who can ignore validation findings.
6
+
7
+ Reference: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ from dataclasses import dataclass, field
14
+ from functools import lru_cache
15
+ from pathlib import PurePosixPath
16
+ from typing import ClassVar
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class CodeOwnerRule:
21
+ """A single rule from a CODEOWNERS file.
22
+
23
+ Attributes:
24
+ pattern: File pattern (glob-style, GitHub CODEOWNERS format)
25
+ owners: List of @users and/or @org/teams
26
+ compiled_pattern: Pre-compiled regex for fast matching
27
+ """
28
+
29
+ pattern: str
30
+ owners: list[str]
31
+ compiled_pattern: re.Pattern[str] | None = field(default=None, repr=False)
32
+
33
+ def __post_init__(self) -> None:
34
+ """Pre-compile the pattern for efficient matching."""
35
+ self.compiled_pattern = _compile_codeowners_pattern(self.pattern)
36
+
37
+
38
+ @lru_cache(maxsize=256)
39
+ def _compile_codeowners_pattern(pattern: str) -> re.Pattern[str]:
40
+ """Compile a CODEOWNERS pattern to regex with caching.
41
+
42
+ CODEOWNERS patterns follow these rules:
43
+ - Patterns starting with / are relative to repo root
44
+ - Patterns without / match any path containing that component
45
+ - * matches anything except /
46
+ - ** matches anything including /
47
+ - Trailing / matches directories
48
+
49
+ Args:
50
+ pattern: CODEOWNERS glob pattern
51
+
52
+ Returns:
53
+ Compiled regex pattern
54
+ """
55
+ # Normalize the pattern
56
+ original_pattern = pattern
57
+ pattern = pattern.strip()
58
+
59
+ # Handle leading slash (anchored to root)
60
+ anchored = pattern.startswith("/")
61
+ if anchored:
62
+ pattern = pattern[1:]
63
+
64
+ # Handle trailing slash (directory match)
65
+ is_dir = pattern.endswith("/")
66
+ if is_dir:
67
+ pattern = pattern[:-1]
68
+
69
+ # Escape special regex characters except * and ?
70
+ pattern = re.escape(pattern)
71
+
72
+ # Convert glob patterns to regex
73
+ # ** matches any number of directories
74
+ pattern = pattern.replace(r"\*\*", "<<<DOUBLE_STAR>>>")
75
+ # * matches anything except /
76
+ pattern = pattern.replace(r"\*", "[^/]*")
77
+ # ** -> match anything
78
+ pattern = pattern.replace("<<<DOUBLE_STAR>>>", ".*")
79
+ # ? matches single character except /
80
+ pattern = pattern.replace(r"\?", "[^/]")
81
+
82
+ # Build final regex
83
+ if anchored:
84
+ # Anchored patterns match from repo root
85
+ regex = f"^{pattern}"
86
+ elif "/" in original_pattern.lstrip("/"):
87
+ # Patterns with / in them are implicitly anchored
88
+ regex = f"^{pattern}"
89
+ else:
90
+ # Patterns without / can match anywhere in path
91
+ regex = f"(^|/){pattern}"
92
+
93
+ if is_dir:
94
+ # Directory patterns match the directory and anything under it
95
+ regex += "(/|$)"
96
+ else:
97
+ # File patterns match exactly or as prefix for directories
98
+ regex += "($|/)"
99
+
100
+ return re.compile(regex)
101
+
102
+
103
+ class CodeOwnersParser:
104
+ """Parser for GitHub CODEOWNERS file format.
105
+
106
+ Parses CODEOWNERS content and provides file-to-owner mapping.
107
+ Uses last-matching-pattern semantics as per GitHub's behavior.
108
+
109
+ Example:
110
+ >>> content = '''
111
+ ... # Default owners
112
+ ... * @default-team
113
+ ... # IAM policies owned by security
114
+ ... /policies/**/*.json @security-team @security-lead
115
+ ... '''
116
+ >>> parser = CodeOwnersParser(content)
117
+ >>> parser.get_owners_for_file("policies/admin/admin.json")
118
+ ['@security-team', '@security-lead']
119
+ """
120
+
121
+ CODEOWNERS_PATHS: ClassVar[list[str]] = [
122
+ "CODEOWNERS",
123
+ ".github/CODEOWNERS",
124
+ "docs/CODEOWNERS",
125
+ ]
126
+
127
+ def __init__(self, content: str) -> None:
128
+ """Initialize parser with CODEOWNERS content.
129
+
130
+ Args:
131
+ content: Raw content of CODEOWNERS file
132
+ """
133
+ self.rules: list[CodeOwnerRule] = []
134
+ self._parse(content)
135
+
136
+ def _parse(self, content: str) -> None:
137
+ """Parse CODEOWNERS file content.
138
+
139
+ Args:
140
+ content: Raw CODEOWNERS file content
141
+ """
142
+ for line in content.splitlines():
143
+ line = line.strip()
144
+
145
+ # Skip empty lines and comments
146
+ if not line or line.startswith("#"):
147
+ continue
148
+
149
+ # Split into pattern and owners
150
+ parts = line.split()
151
+ if len(parts) >= 2:
152
+ pattern = parts[0]
153
+ owners = parts[1:]
154
+ self.rules.append(CodeOwnerRule(pattern=pattern, owners=owners))
155
+ elif len(parts) == 1:
156
+ # Pattern with no owners (unsets ownership)
157
+ self.rules.append(CodeOwnerRule(pattern=parts[0], owners=[]))
158
+
159
+ def get_owners_for_file(self, file_path: str) -> list[str]:
160
+ """Get owners for a specific file path.
161
+
162
+ Uses last-matching-pattern semantics as per GitHub's behavior.
163
+ If multiple patterns match, the last one in the file wins.
164
+
165
+ Args:
166
+ file_path: Path to the file (relative to repo root)
167
+
168
+ Returns:
169
+ List of owners for the file, or empty list if no match
170
+ """
171
+ # Normalize path (remove leading ./ or /)
172
+ file_path = file_path.lstrip("./")
173
+
174
+ # Find all matching rules, last one wins
175
+ owners: list[str] = []
176
+ for rule in self.rules:
177
+ if rule.compiled_pattern and rule.compiled_pattern.search(file_path):
178
+ owners = rule.owners
179
+
180
+ return owners
181
+
182
+ def is_owner(self, username: str, file_path: str) -> bool:
183
+ """Check if a user is an owner of a file.
184
+
185
+ Note: This only checks direct username matches. For team membership,
186
+ use GitHubIntegration.is_user_codeowner() which resolves teams.
187
+
188
+ Args:
189
+ username: GitHub username (with or without @)
190
+ file_path: Path to the file
191
+
192
+ Returns:
193
+ True if user is directly listed as owner
194
+ """
195
+ # Normalize username
196
+ username = username.lstrip("@").lower()
197
+
198
+ owners = self.get_owners_for_file(file_path)
199
+ for owner in owners:
200
+ owner = owner.lstrip("@").lower()
201
+ # Direct username match (not team)
202
+ if "/" not in owner and owner == username:
203
+ return True
204
+
205
+ return False
206
+
207
+ def get_teams_for_file(self, file_path: str) -> list[tuple[str, str]]:
208
+ """Get team owners for a file as (org, team_slug) tuples.
209
+
210
+ Args:
211
+ file_path: Path to the file
212
+
213
+ Returns:
214
+ List of (org, team_slug) tuples
215
+ """
216
+ owners = self.get_owners_for_file(file_path)
217
+ teams: list[tuple[str, str]] = []
218
+
219
+ for owner in owners:
220
+ owner = owner.lstrip("@")
221
+ if "/" in owner:
222
+ parts = owner.split("/", 1)
223
+ if len(parts) == 2:
224
+ teams.append((parts[0], parts[1]))
225
+
226
+ return teams
227
+
228
+
229
+ def normalize_path(path: str) -> str:
230
+ """Normalize a file path for CODEOWNERS matching.
231
+
232
+ Args:
233
+ path: File path (may be absolute or relative)
234
+
235
+ Returns:
236
+ Normalized relative path
237
+ """
238
+ # Convert to posix-style path
239
+ path = str(PurePosixPath(path))
240
+ # Remove leading ./
241
+ if path.startswith("./"):
242
+ path = path[2:]
243
+ # Remove leading /
244
+ path = path.lstrip("/")
245
+ return path