iam-policy-validator 1.13.1__py3-none-any.whl → 1.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/METADATA +1 -1
- {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/RECORD +45 -39
- iam_validator/__version__.py +1 -1
- iam_validator/checks/action_condition_enforcement.py +6 -0
- iam_validator/checks/action_resource_matching.py +12 -12
- iam_validator/checks/action_validation.py +1 -0
- iam_validator/checks/condition_key_validation.py +2 -0
- iam_validator/checks/condition_type_mismatch.py +3 -0
- iam_validator/checks/full_wildcard.py +1 -0
- iam_validator/checks/mfa_condition_check.py +2 -0
- iam_validator/checks/policy_structure.py +9 -0
- iam_validator/checks/policy_type_validation.py +11 -0
- iam_validator/checks/principal_validation.py +5 -0
- iam_validator/checks/resource_validation.py +4 -0
- iam_validator/checks/sensitive_action.py +1 -0
- iam_validator/checks/service_wildcard.py +6 -3
- iam_validator/checks/set_operator_validation.py +3 -0
- iam_validator/checks/sid_uniqueness.py +2 -0
- iam_validator/checks/trust_policy_validation.py +3 -0
- iam_validator/checks/utils/__init__.py +16 -0
- iam_validator/checks/utils/action_parser.py +149 -0
- iam_validator/checks/wildcard_action.py +1 -0
- iam_validator/checks/wildcard_resource.py +231 -4
- iam_validator/commands/analyze.py +19 -1
- iam_validator/commands/completion.py +6 -2
- iam_validator/commands/validate.py +231 -12
- iam_validator/core/aws_service/fetcher.py +21 -9
- iam_validator/core/codeowners.py +245 -0
- iam_validator/core/config/check_documentation.py +390 -0
- iam_validator/core/config/config_loader.py +199 -0
- iam_validator/core/config/defaults.py +25 -0
- iam_validator/core/constants.py +1 -0
- iam_validator/core/diff_parser.py +8 -4
- iam_validator/core/finding_fingerprint.py +131 -0
- iam_validator/core/formatters/sarif.py +370 -128
- iam_validator/core/ignore_processor.py +309 -0
- iam_validator/core/ignored_findings.py +400 -0
- iam_validator/core/models.py +54 -4
- iam_validator/core/policy_loader.py +313 -4
- iam_validator/core/pr_commenter.py +223 -22
- iam_validator/core/report.py +22 -6
- iam_validator/integrations/github_integration.py +881 -123
- {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/WHEEL +0 -0
- {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/entry_points.txt +0 -0
- {iam_policy_validator-1.13.1.dist-info → iam_policy_validator-1.14.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -64,6 +64,10 @@ Examples:
|
|
|
64
64
|
|
|
65
65
|
# Only GitHub Actions job summary
|
|
66
66
|
iam-validator validate --path ./policies/ --github-summary
|
|
67
|
+
|
|
68
|
+
# CI mode: show enhanced output in logs, save JSON to file
|
|
69
|
+
iam-validator validate --path ./policies/ --ci --github-review
|
|
70
|
+
iam-validator validate --path ./policies/ --ci --ci-output results.json
|
|
67
71
|
"""
|
|
68
72
|
|
|
69
73
|
def add_arguments(self, parser: argparse.ArgumentParser) -> None:
|
|
@@ -198,6 +202,33 @@ Examples:
|
|
|
198
202
|
help="Show Issue Severity Breakdown section in enhanced format output",
|
|
199
203
|
)
|
|
200
204
|
|
|
205
|
+
parser.add_argument(
|
|
206
|
+
"--allow-owner-ignore",
|
|
207
|
+
action="store_true",
|
|
208
|
+
default=True,
|
|
209
|
+
help="Allow CODEOWNERS to ignore findings by replying 'ignore' to review comments (default: enabled)",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
parser.add_argument(
|
|
213
|
+
"--no-owner-ignore",
|
|
214
|
+
action="store_true",
|
|
215
|
+
help="Disable CODEOWNERS ignore feature",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
parser.add_argument(
|
|
219
|
+
"--ci",
|
|
220
|
+
action="store_true",
|
|
221
|
+
help="CI mode: print enhanced console output for visibility in job logs, "
|
|
222
|
+
"and write JSON report to file (use --ci-output to specify filename, "
|
|
223
|
+
"defaults to 'validation-report.json').",
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
parser.add_argument(
|
|
227
|
+
"--ci-output",
|
|
228
|
+
default="validation-report.json",
|
|
229
|
+
help="Output file for JSON report in CI mode (default: validation-report.json)",
|
|
230
|
+
)
|
|
231
|
+
|
|
201
232
|
async def execute(self, args: argparse.Namespace) -> int:
|
|
202
233
|
"""Execute the validate command."""
|
|
203
234
|
# Check if streaming mode is enabled
|
|
@@ -266,8 +297,15 @@ Examples:
|
|
|
266
297
|
generator = ReportGenerator()
|
|
267
298
|
report = generator.generate_report(results, parsing_errors=loader.parsing_errors)
|
|
268
299
|
|
|
269
|
-
#
|
|
270
|
-
|
|
300
|
+
# Handle --ci flag: show enhanced output in console, write JSON to file
|
|
301
|
+
ci_mode = getattr(args, "ci", False)
|
|
302
|
+
if ci_mode:
|
|
303
|
+
# CI mode: enhanced output to console, JSON to file
|
|
304
|
+
self._print_ci_console_output(report, generator)
|
|
305
|
+
ci_output_file = getattr(args, "ci_output", "validation-report.json")
|
|
306
|
+
generator.save_json_report(report, ci_output_file)
|
|
307
|
+
logging.info(f"Saved JSON report to {ci_output_file}")
|
|
308
|
+
elif args.format is None:
|
|
271
309
|
# Default: use classic console output (direct Rich printing)
|
|
272
310
|
generator.print_console_report(report)
|
|
273
311
|
elif args.format == "json":
|
|
@@ -302,16 +340,26 @@ Examples:
|
|
|
302
340
|
from iam_validator.core.config.config_loader import ConfigLoader
|
|
303
341
|
from iam_validator.core.pr_commenter import PRCommenter
|
|
304
342
|
|
|
305
|
-
# Load config to get fail_on_severity and
|
|
343
|
+
# Load config to get fail_on_severity, severity_labels, and ignore settings
|
|
306
344
|
config = ConfigLoader.load_config(config_path)
|
|
307
345
|
fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
|
|
308
346
|
severity_labels = config.get_setting("severity_labels", {})
|
|
309
347
|
|
|
348
|
+
# Get ignore settings from config, but CLI flag can override
|
|
349
|
+
ignore_settings = config.get_setting("ignore_settings", {})
|
|
350
|
+
enable_ignore = ignore_settings.get("enabled", True)
|
|
351
|
+
# CLI --no-owner-ignore takes precedence
|
|
352
|
+
if getattr(args, "no_owner_ignore", False):
|
|
353
|
+
enable_ignore = False
|
|
354
|
+
allowed_users = ignore_settings.get("allowed_users", [])
|
|
355
|
+
|
|
310
356
|
async with GitHubIntegration() as github:
|
|
311
357
|
commenter = PRCommenter(
|
|
312
358
|
github,
|
|
313
359
|
fail_on_severities=fail_on_severities,
|
|
314
360
|
severity_labels=severity_labels,
|
|
361
|
+
enable_codeowners_ignore=enable_ignore,
|
|
362
|
+
allowed_ignore_users=allowed_users,
|
|
315
363
|
)
|
|
316
364
|
success = await commenter.post_findings_to_pr(
|
|
317
365
|
report,
|
|
@@ -348,10 +396,8 @@ Examples:
|
|
|
348
396
|
|
|
349
397
|
all_results = []
|
|
350
398
|
total_processed = 0
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
if getattr(args, "github_review", False):
|
|
354
|
-
await self._cleanup_old_comments(args)
|
|
399
|
+
# Track all validated files across the streaming session for final cleanup
|
|
400
|
+
all_validated_files: set[str] = set()
|
|
355
401
|
|
|
356
402
|
logging.info(f"Starting streaming validation from {len(args.paths)} path(s)")
|
|
357
403
|
|
|
@@ -374,6 +420,11 @@ Examples:
|
|
|
374
420
|
result = results[0]
|
|
375
421
|
all_results.append(result)
|
|
376
422
|
|
|
423
|
+
# Track validated file (convert to relative path for cleanup)
|
|
424
|
+
relative_path = self._make_relative_path(file_path)
|
|
425
|
+
if relative_path:
|
|
426
|
+
all_validated_files.add(relative_path)
|
|
427
|
+
|
|
377
428
|
# Print immediate feedback for this file
|
|
378
429
|
if args.format == "console":
|
|
379
430
|
if result.is_valid:
|
|
@@ -383,6 +434,8 @@ Examples:
|
|
|
383
434
|
# Note: validation_success tracks overall status
|
|
384
435
|
|
|
385
436
|
# Post to GitHub immediately for this file (progressive PR comments)
|
|
437
|
+
# skip_cleanup=True because we process files one at a time and don't want
|
|
438
|
+
# to delete comments from files processed earlier. Cleanup runs at the end.
|
|
386
439
|
if getattr(args, "github_review", False):
|
|
387
440
|
await self._post_file_review(result, args)
|
|
388
441
|
|
|
@@ -392,11 +445,23 @@ Examples:
|
|
|
392
445
|
|
|
393
446
|
logging.info(f"\nCompleted validation of {total_processed} policies")
|
|
394
447
|
|
|
448
|
+
# Run final cleanup after all files are processed
|
|
449
|
+
# This uses the full report to know all current findings and deletes stale comments
|
|
450
|
+
if getattr(args, "github_review", False):
|
|
451
|
+
await self._run_final_review_cleanup(args, all_results, all_validated_files)
|
|
452
|
+
|
|
395
453
|
# Generate final summary report
|
|
396
454
|
report = generator.generate_report(all_results)
|
|
397
455
|
|
|
398
|
-
#
|
|
399
|
-
|
|
456
|
+
# Handle --ci flag: show enhanced output in console, write JSON to file
|
|
457
|
+
ci_mode = getattr(args, "ci", False)
|
|
458
|
+
if ci_mode:
|
|
459
|
+
# CI mode: enhanced output to console, JSON to file
|
|
460
|
+
self._print_ci_console_output(report, generator)
|
|
461
|
+
ci_output_file = getattr(args, "ci_output", "validation-report.json")
|
|
462
|
+
generator.save_json_report(report, ci_output_file)
|
|
463
|
+
logging.info(f"Saved JSON report to {ci_output_file}")
|
|
464
|
+
elif args.format == "console":
|
|
400
465
|
# Classic console output (direct Rich printing from report.py)
|
|
401
466
|
generator.print_console_report(report)
|
|
402
467
|
elif args.format == "json":
|
|
@@ -431,16 +496,26 @@ Examples:
|
|
|
431
496
|
from iam_validator.core.config.config_loader import ConfigLoader
|
|
432
497
|
from iam_validator.core.pr_commenter import PRCommenter
|
|
433
498
|
|
|
434
|
-
# Load config to get fail_on_severity and
|
|
499
|
+
# Load config to get fail_on_severity, severity_labels, and ignore settings
|
|
435
500
|
config = ConfigLoader.load_config(config_path)
|
|
436
501
|
fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
|
|
437
502
|
severity_labels = config.get_setting("severity_labels", {})
|
|
438
503
|
|
|
504
|
+
# Get ignore settings from config, but CLI flag can override
|
|
505
|
+
ignore_settings = config.get_setting("ignore_settings", {})
|
|
506
|
+
enable_ignore = ignore_settings.get("enabled", True)
|
|
507
|
+
# CLI --no-owner-ignore takes precedence
|
|
508
|
+
if getattr(args, "no_owner_ignore", False):
|
|
509
|
+
enable_ignore = False
|
|
510
|
+
allowed_users = ignore_settings.get("allowed_users", [])
|
|
511
|
+
|
|
439
512
|
async with GitHubIntegration() as github:
|
|
440
513
|
commenter = PRCommenter(
|
|
441
514
|
github,
|
|
442
515
|
fail_on_severities=fail_on_severities,
|
|
443
516
|
severity_labels=severity_labels,
|
|
517
|
+
enable_codeowners_ignore=enable_ignore,
|
|
518
|
+
allowed_ignore_users=allowed_users,
|
|
444
519
|
)
|
|
445
520
|
success = await commenter.post_findings_to_pr(
|
|
446
521
|
report,
|
|
@@ -487,24 +562,34 @@ Examples:
|
|
|
487
562
|
if not github.is_configured():
|
|
488
563
|
return
|
|
489
564
|
|
|
490
|
-
# Load config to get fail_on_severity
|
|
565
|
+
# Load config to get fail_on_severity and ignore settings
|
|
491
566
|
config_path = getattr(args, "config", None)
|
|
492
567
|
config = ConfigLoader.load_config(config_path)
|
|
493
568
|
fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
|
|
494
569
|
|
|
570
|
+
# Get ignore settings from config, but CLI flag can override
|
|
571
|
+
ignore_settings = config.get_setting("ignore_settings", {})
|
|
572
|
+
enable_ignore = ignore_settings.get("enabled", True)
|
|
573
|
+
# CLI --no-owner-ignore takes precedence
|
|
574
|
+
if getattr(args, "no_owner_ignore", False):
|
|
575
|
+
enable_ignore = False
|
|
576
|
+
allowed_users = ignore_settings.get("allowed_users", [])
|
|
577
|
+
|
|
495
578
|
# In streaming mode, don't cleanup comments (we want to keep earlier files)
|
|
496
579
|
# Cleanup will happen once at the end
|
|
497
580
|
commenter = PRCommenter(
|
|
498
581
|
github,
|
|
499
582
|
cleanup_old_comments=False,
|
|
500
583
|
fail_on_severities=fail_on_severities,
|
|
584
|
+
enable_codeowners_ignore=enable_ignore,
|
|
585
|
+
allowed_ignore_users=allowed_users,
|
|
501
586
|
)
|
|
502
587
|
|
|
503
588
|
# Create a mini-report for just this file
|
|
504
589
|
generator = ReportGenerator()
|
|
505
590
|
mini_report = generator.generate_report([result])
|
|
506
591
|
|
|
507
|
-
# Post line-specific comments
|
|
592
|
+
# Post line-specific comments (skip cleanup - runs at end of streaming)
|
|
508
593
|
await commenter.post_findings_to_pr(
|
|
509
594
|
mini_report,
|
|
510
595
|
create_review=True,
|
|
@@ -513,6 +598,109 @@ Examples:
|
|
|
513
598
|
except Exception as e:
|
|
514
599
|
logging.warning(f"Failed to post review for {result.policy_file}: {e}")
|
|
515
600
|
|
|
601
|
+
def _make_relative_path(self, file_path: str) -> str | None:
|
|
602
|
+
"""Convert absolute path to relative path for GitHub.
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
file_path: Absolute or relative path to file
|
|
606
|
+
|
|
607
|
+
Returns:
|
|
608
|
+
Relative path from repository root, or None if cannot be determined
|
|
609
|
+
"""
|
|
610
|
+
from pathlib import Path
|
|
611
|
+
|
|
612
|
+
# If already relative, use as-is
|
|
613
|
+
if not os.path.isabs(file_path):
|
|
614
|
+
return file_path
|
|
615
|
+
|
|
616
|
+
# Try to get workspace path from environment
|
|
617
|
+
workspace = os.getenv("GITHUB_WORKSPACE")
|
|
618
|
+
if workspace:
|
|
619
|
+
try:
|
|
620
|
+
abs_file_path = Path(file_path).resolve()
|
|
621
|
+
workspace_path = Path(workspace).resolve()
|
|
622
|
+
|
|
623
|
+
if abs_file_path.is_relative_to(workspace_path):
|
|
624
|
+
relative = abs_file_path.relative_to(workspace_path)
|
|
625
|
+
return str(relative).replace("\\", "/")
|
|
626
|
+
except (ValueError, OSError) as exc:
|
|
627
|
+
logging.debug(f"Could not make path relative to GitHub workspace: {exc}")
|
|
628
|
+
|
|
629
|
+
# Fallback: try current working directory
|
|
630
|
+
try:
|
|
631
|
+
cwd = Path.cwd()
|
|
632
|
+
abs_file_path = Path(file_path).resolve()
|
|
633
|
+
if abs_file_path.is_relative_to(cwd):
|
|
634
|
+
relative = abs_file_path.relative_to(cwd)
|
|
635
|
+
return str(relative).replace("\\", "/")
|
|
636
|
+
except (ValueError, OSError) as exc:
|
|
637
|
+
logging.debug(f"Could not make path relative to cwd: {exc}")
|
|
638
|
+
|
|
639
|
+
return None
|
|
640
|
+
|
|
641
|
+
async def _run_final_review_cleanup(
|
|
642
|
+
self,
|
|
643
|
+
args: argparse.Namespace,
|
|
644
|
+
all_results: list,
|
|
645
|
+
all_validated_files: set[str],
|
|
646
|
+
) -> None:
|
|
647
|
+
"""Run final cleanup after all files are processed in streaming mode.
|
|
648
|
+
|
|
649
|
+
This deletes stale comments for findings that are no longer present,
|
|
650
|
+
using the complete set of validated files and current findings.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
args: Command-line arguments
|
|
654
|
+
all_results: All validation results from the streaming session
|
|
655
|
+
all_validated_files: Set of all validated file paths (relative)
|
|
656
|
+
"""
|
|
657
|
+
try:
|
|
658
|
+
from iam_validator.core.config.config_loader import ConfigLoader
|
|
659
|
+
from iam_validator.core.pr_commenter import PRCommenter
|
|
660
|
+
|
|
661
|
+
async with GitHubIntegration() as github:
|
|
662
|
+
if not github.is_configured():
|
|
663
|
+
return
|
|
664
|
+
|
|
665
|
+
# Load config
|
|
666
|
+
config_path = getattr(args, "config", None)
|
|
667
|
+
config = ConfigLoader.load_config(config_path)
|
|
668
|
+
fail_on_severities = config.get_setting("fail_on_severity", ["error", "critical"])
|
|
669
|
+
|
|
670
|
+
# Get ignore settings
|
|
671
|
+
ignore_settings = config.get_setting("ignore_settings", {})
|
|
672
|
+
enable_ignore = ignore_settings.get("enabled", True)
|
|
673
|
+
if getattr(args, "no_owner_ignore", False):
|
|
674
|
+
enable_ignore = False
|
|
675
|
+
allowed_users = ignore_settings.get("allowed_users", [])
|
|
676
|
+
|
|
677
|
+
# Create commenter WITH cleanup enabled for the final pass
|
|
678
|
+
commenter = PRCommenter(
|
|
679
|
+
github,
|
|
680
|
+
cleanup_old_comments=True, # Enable cleanup for final pass
|
|
681
|
+
fail_on_severities=fail_on_severities,
|
|
682
|
+
enable_codeowners_ignore=enable_ignore,
|
|
683
|
+
allowed_ignore_users=allowed_users,
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
# Create a full report with all results
|
|
687
|
+
generator = ReportGenerator()
|
|
688
|
+
full_report = generator.generate_report(all_results)
|
|
689
|
+
|
|
690
|
+
# Post with create_review=True to run the full update/create/delete logic
|
|
691
|
+
# but pass all_validated_files so cleanup knows the full scope
|
|
692
|
+
logging.info("Running final comment cleanup...")
|
|
693
|
+
await commenter.post_findings_to_pr(
|
|
694
|
+
full_report,
|
|
695
|
+
create_review=True,
|
|
696
|
+
add_summary_comment=False,
|
|
697
|
+
manage_labels=False, # Labels are managed separately
|
|
698
|
+
process_ignores=False, # Already processed per-file
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
except Exception as e:
|
|
702
|
+
logging.warning(f"Failed to run final review cleanup: {e}")
|
|
703
|
+
|
|
516
704
|
def _write_github_actions_summary(self, report: ValidationReport) -> None:
|
|
517
705
|
"""Write a high-level summary to GitHub Actions job summary.
|
|
518
706
|
|
|
@@ -609,3 +797,34 @@ Examples:
|
|
|
609
797
|
|
|
610
798
|
except Exception as e:
|
|
611
799
|
logging.warning(f"Failed to write GitHub Actions summary: {e}")
|
|
800
|
+
|
|
801
|
+
def _print_ci_console_output(
|
|
802
|
+
self, report: ValidationReport, generator: ReportGenerator
|
|
803
|
+
) -> None:
|
|
804
|
+
"""Print enhanced console output for CI visibility.
|
|
805
|
+
|
|
806
|
+
This shows validation results in the CI job logs in a human-readable format.
|
|
807
|
+
JSON output is written to a separate file (specified by --ci-output).
|
|
808
|
+
|
|
809
|
+
Args:
|
|
810
|
+
report: Validation report to print
|
|
811
|
+
generator: ReportGenerator instance
|
|
812
|
+
"""
|
|
813
|
+
# Generate enhanced format output with summary and severity breakdown
|
|
814
|
+
try:
|
|
815
|
+
enhanced_output = generator.format_report(
|
|
816
|
+
report,
|
|
817
|
+
"enhanced",
|
|
818
|
+
show_summary=True,
|
|
819
|
+
show_severity_breakdown=True,
|
|
820
|
+
)
|
|
821
|
+
print(enhanced_output)
|
|
822
|
+
|
|
823
|
+
except Exception as e:
|
|
824
|
+
# Fallback to basic summary if enhanced format fails
|
|
825
|
+
logging.warning(f"Failed to generate enhanced output: {e}")
|
|
826
|
+
print("\nValidation Summary:")
|
|
827
|
+
print(f" Total policies: {report.total_policies}")
|
|
828
|
+
print(f" Valid: {report.valid_policies}")
|
|
829
|
+
print(f" Invalid: {report.invalid_policies}")
|
|
830
|
+
print(f" Total issues: {report.total_issues}\n")
|
|
@@ -100,6 +100,9 @@ class AWSServiceFetcher:
|
|
|
100
100
|
"wafv2",
|
|
101
101
|
]
|
|
102
102
|
|
|
103
|
+
# Default concurrency limits
|
|
104
|
+
DEFAULT_MAX_CONCURRENT_REQUESTS = 10
|
|
105
|
+
|
|
103
106
|
def __init__(
|
|
104
107
|
self,
|
|
105
108
|
timeout: float = constants.DEFAULT_HTTP_TIMEOUT_SECONDS,
|
|
@@ -112,6 +115,7 @@ class AWSServiceFetcher:
|
|
|
112
115
|
prefetch_common: bool = True,
|
|
113
116
|
cache_dir: Path | str | None = None,
|
|
114
117
|
aws_services_dir: Path | str | None = None,
|
|
118
|
+
max_concurrent_requests: int = DEFAULT_MAX_CONCURRENT_REQUESTS,
|
|
115
119
|
):
|
|
116
120
|
"""Initialize AWS service fetcher.
|
|
117
121
|
|
|
@@ -130,10 +134,13 @@ class AWSServiceFetcher:
|
|
|
130
134
|
instead of making API calls. Directory should contain:
|
|
131
135
|
- _services.json: List of all services
|
|
132
136
|
- {service}.json: Individual service files (e.g., s3.json)
|
|
137
|
+
max_concurrent_requests: Maximum number of concurrent HTTP requests (default: 10)
|
|
133
138
|
"""
|
|
134
139
|
self.prefetch_common = prefetch_common
|
|
135
140
|
self.aws_services_dir = Path(aws_services_dir) if aws_services_dir else None
|
|
136
141
|
self._prefetched_services: set[str] = set()
|
|
142
|
+
# Semaphore for limiting concurrent requests
|
|
143
|
+
self._request_semaphore = asyncio.Semaphore(max_concurrent_requests)
|
|
137
144
|
|
|
138
145
|
# Initialize storage component
|
|
139
146
|
self._storage = ServiceFileStorage(
|
|
@@ -343,7 +350,10 @@ class AWSServiceFetcher:
|
|
|
343
350
|
raise ValueError(f"Service `{service_name}` not found")
|
|
344
351
|
|
|
345
352
|
async def fetch_multiple_services(self, service_names: list[str]) -> dict[str, ServiceDetail]:
|
|
346
|
-
"""Fetch multiple services concurrently with
|
|
353
|
+
"""Fetch multiple services concurrently with controlled parallelism.
|
|
354
|
+
|
|
355
|
+
Uses a semaphore to limit concurrent requests and prevent overwhelming
|
|
356
|
+
the AWS service reference API.
|
|
347
357
|
|
|
348
358
|
Args:
|
|
349
359
|
service_names: List of service names to fetch
|
|
@@ -361,14 +371,16 @@ class AWSServiceFetcher:
|
|
|
361
371
|
"""
|
|
362
372
|
|
|
363
373
|
async def fetch_single(name: str) -> tuple[str, ServiceDetail]:
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
374
|
+
# Use semaphore to limit concurrent requests
|
|
375
|
+
async with self._request_semaphore:
|
|
376
|
+
try:
|
|
377
|
+
detail = await self.fetch_service_by_name(name)
|
|
378
|
+
return name, detail
|
|
379
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
380
|
+
logger.error(f"Failed to fetch service {name}: {e}")
|
|
381
|
+
raise
|
|
382
|
+
|
|
383
|
+
# Fetch all services concurrently (semaphore controls parallelism)
|
|
372
384
|
tasks = [fetch_single(name) for name in service_names]
|
|
373
385
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
374
386
|
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""CODEOWNERS file parser for GitHub repositories.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to parse GitHub CODEOWNERS files and
|
|
4
|
+
determine which users/teams own specific files. Used to authorize users
|
|
5
|
+
who can ignore validation findings.
|
|
6
|
+
|
|
7
|
+
Reference: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from functools import lru_cache
|
|
15
|
+
from pathlib import PurePosixPath
|
|
16
|
+
from typing import ClassVar
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True)
|
|
20
|
+
class CodeOwnerRule:
|
|
21
|
+
"""A single rule from a CODEOWNERS file.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
pattern: File pattern (glob-style, GitHub CODEOWNERS format)
|
|
25
|
+
owners: List of @users and/or @org/teams
|
|
26
|
+
compiled_pattern: Pre-compiled regex for fast matching
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
pattern: str
|
|
30
|
+
owners: list[str]
|
|
31
|
+
compiled_pattern: re.Pattern[str] | None = field(default=None, repr=False)
|
|
32
|
+
|
|
33
|
+
def __post_init__(self) -> None:
|
|
34
|
+
"""Pre-compile the pattern for efficient matching."""
|
|
35
|
+
self.compiled_pattern = _compile_codeowners_pattern(self.pattern)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@lru_cache(maxsize=256)
|
|
39
|
+
def _compile_codeowners_pattern(pattern: str) -> re.Pattern[str]:
|
|
40
|
+
"""Compile a CODEOWNERS pattern to regex with caching.
|
|
41
|
+
|
|
42
|
+
CODEOWNERS patterns follow these rules:
|
|
43
|
+
- Patterns starting with / are relative to repo root
|
|
44
|
+
- Patterns without / match any path containing that component
|
|
45
|
+
- * matches anything except /
|
|
46
|
+
- ** matches anything including /
|
|
47
|
+
- Trailing / matches directories
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
pattern: CODEOWNERS glob pattern
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Compiled regex pattern
|
|
54
|
+
"""
|
|
55
|
+
# Normalize the pattern
|
|
56
|
+
original_pattern = pattern
|
|
57
|
+
pattern = pattern.strip()
|
|
58
|
+
|
|
59
|
+
# Handle leading slash (anchored to root)
|
|
60
|
+
anchored = pattern.startswith("/")
|
|
61
|
+
if anchored:
|
|
62
|
+
pattern = pattern[1:]
|
|
63
|
+
|
|
64
|
+
# Handle trailing slash (directory match)
|
|
65
|
+
is_dir = pattern.endswith("/")
|
|
66
|
+
if is_dir:
|
|
67
|
+
pattern = pattern[:-1]
|
|
68
|
+
|
|
69
|
+
# Escape special regex characters except * and ?
|
|
70
|
+
pattern = re.escape(pattern)
|
|
71
|
+
|
|
72
|
+
# Convert glob patterns to regex
|
|
73
|
+
# ** matches any number of directories
|
|
74
|
+
pattern = pattern.replace(r"\*\*", "<<<DOUBLE_STAR>>>")
|
|
75
|
+
# * matches anything except /
|
|
76
|
+
pattern = pattern.replace(r"\*", "[^/]*")
|
|
77
|
+
# ** -> match anything
|
|
78
|
+
pattern = pattern.replace("<<<DOUBLE_STAR>>>", ".*")
|
|
79
|
+
# ? matches single character except /
|
|
80
|
+
pattern = pattern.replace(r"\?", "[^/]")
|
|
81
|
+
|
|
82
|
+
# Build final regex
|
|
83
|
+
if anchored:
|
|
84
|
+
# Anchored patterns match from repo root
|
|
85
|
+
regex = f"^{pattern}"
|
|
86
|
+
elif "/" in original_pattern.lstrip("/"):
|
|
87
|
+
# Patterns with / in them are implicitly anchored
|
|
88
|
+
regex = f"^{pattern}"
|
|
89
|
+
else:
|
|
90
|
+
# Patterns without / can match anywhere in path
|
|
91
|
+
regex = f"(^|/){pattern}"
|
|
92
|
+
|
|
93
|
+
if is_dir:
|
|
94
|
+
# Directory patterns match the directory and anything under it
|
|
95
|
+
regex += "(/|$)"
|
|
96
|
+
else:
|
|
97
|
+
# File patterns match exactly or as prefix for directories
|
|
98
|
+
regex += "($|/)"
|
|
99
|
+
|
|
100
|
+
return re.compile(regex)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class CodeOwnersParser:
|
|
104
|
+
"""Parser for GitHub CODEOWNERS file format.
|
|
105
|
+
|
|
106
|
+
Parses CODEOWNERS content and provides file-to-owner mapping.
|
|
107
|
+
Uses last-matching-pattern semantics as per GitHub's behavior.
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
>>> content = '''
|
|
111
|
+
... # Default owners
|
|
112
|
+
... * @default-team
|
|
113
|
+
... # IAM policies owned by security
|
|
114
|
+
... /policies/**/*.json @security-team @security-lead
|
|
115
|
+
... '''
|
|
116
|
+
>>> parser = CodeOwnersParser(content)
|
|
117
|
+
>>> parser.get_owners_for_file("policies/admin/admin.json")
|
|
118
|
+
['@security-team', '@security-lead']
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
CODEOWNERS_PATHS: ClassVar[list[str]] = [
|
|
122
|
+
"CODEOWNERS",
|
|
123
|
+
".github/CODEOWNERS",
|
|
124
|
+
"docs/CODEOWNERS",
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
def __init__(self, content: str) -> None:
|
|
128
|
+
"""Initialize parser with CODEOWNERS content.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
content: Raw content of CODEOWNERS file
|
|
132
|
+
"""
|
|
133
|
+
self.rules: list[CodeOwnerRule] = []
|
|
134
|
+
self._parse(content)
|
|
135
|
+
|
|
136
|
+
def _parse(self, content: str) -> None:
|
|
137
|
+
"""Parse CODEOWNERS file content.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
content: Raw CODEOWNERS file content
|
|
141
|
+
"""
|
|
142
|
+
for line in content.splitlines():
|
|
143
|
+
line = line.strip()
|
|
144
|
+
|
|
145
|
+
# Skip empty lines and comments
|
|
146
|
+
if not line or line.startswith("#"):
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Split into pattern and owners
|
|
150
|
+
parts = line.split()
|
|
151
|
+
if len(parts) >= 2:
|
|
152
|
+
pattern = parts[0]
|
|
153
|
+
owners = parts[1:]
|
|
154
|
+
self.rules.append(CodeOwnerRule(pattern=pattern, owners=owners))
|
|
155
|
+
elif len(parts) == 1:
|
|
156
|
+
# Pattern with no owners (unsets ownership)
|
|
157
|
+
self.rules.append(CodeOwnerRule(pattern=parts[0], owners=[]))
|
|
158
|
+
|
|
159
|
+
def get_owners_for_file(self, file_path: str) -> list[str]:
|
|
160
|
+
"""Get owners for a specific file path.
|
|
161
|
+
|
|
162
|
+
Uses last-matching-pattern semantics as per GitHub's behavior.
|
|
163
|
+
If multiple patterns match, the last one in the file wins.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
file_path: Path to the file (relative to repo root)
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
List of owners for the file, or empty list if no match
|
|
170
|
+
"""
|
|
171
|
+
# Normalize path (remove leading ./ or /)
|
|
172
|
+
file_path = file_path.lstrip("./")
|
|
173
|
+
|
|
174
|
+
# Find all matching rules, last one wins
|
|
175
|
+
owners: list[str] = []
|
|
176
|
+
for rule in self.rules:
|
|
177
|
+
if rule.compiled_pattern and rule.compiled_pattern.search(file_path):
|
|
178
|
+
owners = rule.owners
|
|
179
|
+
|
|
180
|
+
return owners
|
|
181
|
+
|
|
182
|
+
def is_owner(self, username: str, file_path: str) -> bool:
|
|
183
|
+
"""Check if a user is an owner of a file.
|
|
184
|
+
|
|
185
|
+
Note: This only checks direct username matches. For team membership,
|
|
186
|
+
use GitHubIntegration.is_user_codeowner() which resolves teams.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
username: GitHub username (with or without @)
|
|
190
|
+
file_path: Path to the file
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
True if user is directly listed as owner
|
|
194
|
+
"""
|
|
195
|
+
# Normalize username
|
|
196
|
+
username = username.lstrip("@").lower()
|
|
197
|
+
|
|
198
|
+
owners = self.get_owners_for_file(file_path)
|
|
199
|
+
for owner in owners:
|
|
200
|
+
owner = owner.lstrip("@").lower()
|
|
201
|
+
# Direct username match (not team)
|
|
202
|
+
if "/" not in owner and owner == username:
|
|
203
|
+
return True
|
|
204
|
+
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
def get_teams_for_file(self, file_path: str) -> list[tuple[str, str]]:
|
|
208
|
+
"""Get team owners for a file as (org, team_slug) tuples.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
file_path: Path to the file
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
List of (org, team_slug) tuples
|
|
215
|
+
"""
|
|
216
|
+
owners = self.get_owners_for_file(file_path)
|
|
217
|
+
teams: list[tuple[str, str]] = []
|
|
218
|
+
|
|
219
|
+
for owner in owners:
|
|
220
|
+
owner = owner.lstrip("@")
|
|
221
|
+
if "/" in owner:
|
|
222
|
+
parts = owner.split("/", 1)
|
|
223
|
+
if len(parts) == 2:
|
|
224
|
+
teams.append((parts[0], parts[1]))
|
|
225
|
+
|
|
226
|
+
return teams
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def normalize_path(path: str) -> str:
|
|
230
|
+
"""Normalize a file path for CODEOWNERS matching.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
path: File path (may be absolute or relative)
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Normalized relative path
|
|
237
|
+
"""
|
|
238
|
+
# Convert to posix-style path
|
|
239
|
+
path = str(PurePosixPath(path))
|
|
240
|
+
# Remove leading ./
|
|
241
|
+
if path.startswith("./"):
|
|
242
|
+
path = path[2:]
|
|
243
|
+
# Remove leading /
|
|
244
|
+
path = path.lstrip("/")
|
|
245
|
+
return path
|