iam-policy-validator 1.10.3__py3-none-any.whl → 1.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,13 @@ from typing import Any, Final
28
28
  IAM_PASS_ROLE_REQUIREMENT: Final[dict[str, Any]] = {
29
29
  "actions": ["iam:PassRole"],
30
30
  "severity": "high",
31
+ "suggestion_text": (
32
+ "This action allows passing IAM roles to AWS services, which can lead to privilege escalation. "
33
+ "Always restrict which services can receive roles:\n"
34
+ "• Use `iam:PassedToService` to limit specific AWS services (e.g., lambda.amazonaws.com, ecs-tasks.amazonaws.com)\n"
35
+ "• Consider adding `iam:AssociatedResourceArn` to restrict which resources can use the role\n"
36
+ "• Require MFA for sensitive role passing (`aws:MultiFactorAuthPresent` = `true`)"
37
+ ),
31
38
  "required_conditions": [
32
39
  {
33
40
  "condition_key": "iam:PassedToService",
@@ -50,66 +57,96 @@ IAM_PASS_ROLE_REQUIREMENT: Final[dict[str, Any]] = {
50
57
  ],
51
58
  }
52
59
 
53
- # S3 Write Operations - Require organization ID
54
- S3_WRITE_ORG_ID: Final[dict[str, Any]] = {
55
- "actions": ["s3:PutObject"],
60
+ # S3 Organization Boundary - Prevent data exfiltration for both reads and writes
61
+ # Enforces that S3 operations only access resources within organizational boundaries
62
+ S3_ORG_BOUNDARY: Final[dict[str, Any]] = {
63
+ "actions": ["s3:GetObject", "s3:GetObjectVersion", "s3:PutObject"],
56
64
  "severity": "medium",
65
+ "suggestion_text": (
66
+ "These S3 actions can read or write data. Prevent data exfiltration by ensuring operations only access organization-owned buckets:\n"
67
+ "• Use organization ID (`aws:ResourceOrgID` = `${aws:PrincipalOrgID}`)\n"
68
+ "• OR use organization paths (`aws:ResourceOrgPaths` = `${aws:PrincipalOrgPaths}`)\n"
69
+ "• OR restrict by network boundary (IP/VPC/VPCe) + same account (`aws:ResourceAccount` = `${aws:PrincipalAccount}`)"
70
+ ),
57
71
  "required_conditions": {
58
72
  "any_of": [
59
- # Option 1: Use organization-level control with ResourceOrgID
73
+ # Option 1: Restrict to organization resources (strongest)
60
74
  {
61
- "all_of": [
62
- {
63
- "condition_key": "aws:ResourceOrgID",
64
- "description": "Restrict S3 write actions to resources within your AWS Organization",
65
- "expected_value": "${aws:PrincipalOrgID}",
66
- "example": (
67
- "{\n"
68
- ' "Condition": {\n'
69
- ' "StringEquals": {\n'
70
- ' "aws:ResourceOrgID": "${aws:PrincipalOrgID}",\n'
71
- ' "aws:ResourceAccount": "${aws:PrincipalAccount}"\n'
72
- " }\n"
73
- " }\n"
74
- "}"
75
- ),
76
- },
77
- {
78
- "condition_key": "aws:ResourceAccount",
79
- "description": "Ensure the S3 resource belongs to the same AWS account as the principal",
80
- "expected_value": "${aws:PrincipalAccount}",
81
- },
82
- ]
75
+ "condition_key": "aws:ResourceOrgID",
76
+ "description": "Restrict S3 operations to resources within your AWS Organization",
77
+ "expected_value": "${aws:PrincipalOrgID}",
78
+ "example": (
79
+ "{\n"
80
+ ' "Condition": {\n'
81
+ ' "StringEquals": {\n'
82
+ ' "aws:ResourceOrgID": "${aws:PrincipalOrgID}"\n'
83
+ " }\n"
84
+ " }\n"
85
+ "}"
86
+ ),
87
+ },
88
+ # Option 2: Restrict to organization paths
89
+ {
90
+ "condition_key": "aws:ResourceOrgPaths",
91
+ "description": "Restrict S3 operations to resources within your AWS Organization path",
92
+ "expected_value": "${aws:PrincipalOrgPaths}",
93
+ "example": (
94
+ "{\n"
95
+ ' "Condition": {\n'
96
+ ' "StringEquals": {\n'
97
+ ' "aws:ResourceOrgPaths": "${aws:PrincipalOrgPaths}"\n'
98
+ " }\n"
99
+ " }\n"
100
+ "}"
101
+ ),
83
102
  },
84
- # Option 2: Use organization path-based control
103
+ # Option 3: Network boundary - Source IP + same account
85
104
  {
86
- "all_of": [
87
- {
88
- "condition_key": "aws:ResourceOrgPaths",
89
- "description": "Restrict S3 write actions to resources within your AWS Organization path",
90
- "expected_value": "${aws:PrincipalOrgPaths}",
91
- "example": (
92
- "{\n"
93
- ' "Condition": {\n'
94
- ' "StringEquals": {\n'
95
- ' "aws:ResourceOrgPaths": "${aws:PrincipalOrgPaths}",\n'
96
- ' "aws:ResourceAccount": "${aws:PrincipalAccount}"\n'
97
- " }\n"
98
- " }\n"
99
- "}"
100
- ),
101
- },
102
- {
103
- "condition_key": "aws:ResourceAccount",
104
- "description": "Ensure the S3 resource belongs to the same AWS account as the principal",
105
- "expected_value": "${aws:PrincipalAccount}",
106
- },
107
- ]
105
+ "condition_key": "aws:SourceIp",
106
+ "description": "Restrict S3 operations by source IP address and same account",
107
+ "example": (
108
+ "{\n"
109
+ ' "Condition": {\n'
110
+ ' "IpAddress": {"aws:SourceIp": "10.0.0.0/8"},\n'
111
+ ' "StringEquals": {"aws:ResourceAccount": "${aws:PrincipalAccount}"}\n'
112
+ " }\n"
113
+ "}"
114
+ ),
115
+ },
116
+ # Option 4: Network boundary - Source VPC + same account
117
+ {
118
+ "condition_key": "aws:SourceVpc",
119
+ "description": "Restrict S3 operations by source VPC and same account",
120
+ "example": (
121
+ "{\n"
122
+ ' "Condition": {\n'
123
+ ' "StringEquals": {\n'
124
+ ' "aws:SourceVpc": "vpc-12345678",\n'
125
+ ' "aws:ResourceAccount": "${aws:PrincipalAccount}"\n'
126
+ " }\n"
127
+ " }\n"
128
+ "}"
129
+ ),
130
+ },
131
+ # Option 5: Network boundary - VPC Endpoint + same account
132
+ {
133
+ "condition_key": "aws:SourceVpce",
134
+ "description": "Restrict S3 operations by VPC endpoint and same account",
135
+ "example": (
136
+ "{\n"
137
+ ' "Condition": {\n'
138
+ ' "StringEquals": {\n'
139
+ ' "aws:SourceVpce": "vpce-12345678",\n'
140
+ ' "aws:ResourceAccount": "${aws:PrincipalAccount}"\n'
141
+ " }\n"
142
+ " }\n"
143
+ "}"
144
+ ),
108
145
  },
109
- # Option 3: Account-only control (less restrictive, but still secure)
146
+ # Option 6: Minimum - at least require same account
110
147
  {
111
148
  "condition_key": "aws:ResourceAccount",
112
- "description": "Restrict S3 write actions to resources within the same AWS account",
149
+ "description": "Restrict S3 operations to resources within the same AWS account",
113
150
  "expected_value": "${aws:PrincipalAccount}",
114
151
  "example": (
115
152
  "{\n"
@@ -130,10 +167,16 @@ SOURCE_IP_RESTRICTIONS: Final[dict[str, Any]] = {
130
167
  "action_patterns": [
131
168
  "^ssm:StartSession$",
132
169
  "^ssm:Run.*$",
133
- "^s3:GetObject$",
134
170
  "^rds-db:Connect$",
135
171
  ],
136
172
  "severity": "low",
173
+ "suggestion_text": (
174
+ "This action accesses sensitive resources or data. Restrict network access to trusted locations:\n"
175
+ "• Use `aws:SourceIp` to limit to corporate IP ranges (e.g., office networks, VPN endpoints)\n"
176
+ "• Alternative: Use `aws:SourceVpc` or `aws:SourceVpce` for VPC-based restrictions\n"
177
+ "• Consider combining with secure transport requirements\n"
178
+ "• For S3: Ensure account ownership (`aws:ResourceAccount` = `${aws:PrincipalAccount}`)"
179
+ ),
137
180
  "required_conditions": [
138
181
  {
139
182
  "condition_key": "aws:SourceIp",
@@ -146,7 +189,9 @@ SOURCE_IP_RESTRICTIONS: Final[dict[str, Any]] = {
146
189
  ' "10.0.0.0/8",\n'
147
190
  ' "172.16.0.0/12"\n'
148
191
  " ]\n"
149
- " }\n"
192
+ " },\n"
193
+ ' "Bool": {"aws:SecureTransport": "true"},\n'
194
+ ' "StringEquals": {"aws:ResourceAccount": "${aws:PrincipalAccount}"}\n'
150
195
  " }\n"
151
196
  "}"
152
197
  ),
@@ -158,6 +203,12 @@ SOURCE_IP_RESTRICTIONS: Final[dict[str, Any]] = {
158
203
  S3_SECURE_TRANSPORT: Final[dict[str, Any]] = {
159
204
  "actions": ["s3:GetObject", "s3:PutObject"],
160
205
  "severity": "critical",
206
+ "suggestion_text": (
207
+ "CRITICAL: This S3 action must enforce encrypted connections. Unencrypted HTTP connections expose data in transit:\n"
208
+ "• Set `aws:SecureTransport` to `true` to enforce HTTPS/TLS\n"
209
+ "• NEVER set `aws:SecureTransport` to `false` (this explicitly allows unencrypted connections)\n"
210
+ "• Combine with other controls (IP restrictions, account boundaries) for defense in depth"
211
+ ),
161
212
  "required_conditions": {
162
213
  "none_of": [
163
214
  {
@@ -200,7 +251,7 @@ PREVENT_PUBLIC_IP: Final[dict[str, Any]] = {
200
251
 
201
252
  CONDITION_REQUIREMENTS: Final[list[dict[str, Any]]] = [
202
253
  IAM_PASS_ROLE_REQUIREMENT,
203
- S3_WRITE_ORG_ID,
254
+ S3_ORG_BOUNDARY, # Unified S3 read/write organization boundary enforcement
204
255
  SOURCE_IP_RESTRICTIONS,
205
256
  S3_SECURE_TRANSPORT,
206
257
  PREVENT_PUBLIC_IP,
@@ -521,6 +521,82 @@ DEFAULT_CONFIG = {
521
521
  "ignore_patterns": [
522
522
  {"action_matches": "^iam:PassRole$"},
523
523
  ],
524
+ # Cross-statement privilege escalation patterns (policy-wide detection)
525
+ # These patterns detect dangerous action combinations across ANY statements in the policy
526
+ # Uses all_of logic: ALL actions must exist somewhere in the policy
527
+ "sensitive_actions": [
528
+ # User privilege escalation: Create user + attach admin policy
529
+ {
530
+ "all_of": ["iam:CreateUser", "iam:AttachUserPolicy"],
531
+ "severity": "critical",
532
+ "message": "Policy grants {actions} across statements - enables privilege escalation. {statements}",
533
+ "suggestion": (
534
+ "This combination allows an attacker to:\n"
535
+ "1. Create a new IAM user\n"
536
+ "2. Attach AdministratorAccess policy to that user\n"
537
+ "3. Escalate to full account access\n\n"
538
+ "Mitigation options:\n"
539
+ "• Remove both of these permissions\n"
540
+ "• Add strict IAM conditions (MFA, IP restrictions, force a specific policy with `iam:PolicyARN` condition)\n"
541
+ ),
542
+ },
543
+ # Role privilege escalation: Create role + attach admin policy
544
+ {
545
+ "all_of": ["iam:CreateRole", "iam:AttachRolePolicy"],
546
+ "severity": "high",
547
+ "message": "Policy grants {actions} across statements - enables privilege escalation. {statements}",
548
+ "suggestion": (
549
+ "This combination allows creating privileged roles with admin policies.\n\n"
550
+ "Mitigation options:\n"
551
+ "• Remove both of these permissions\n"
552
+ "• Add strict IAM conditions with a Permissions Boundary and ABAC Tagging, force a specific policy with `iam:PolicyARN` condition\n"
553
+ ),
554
+ },
555
+ # Lambda backdoor: Create/update function + invoke
556
+ {
557
+ "all_of": ["lambda:CreateFunction", "lambda:InvokeFunction"],
558
+ "severity": "medium",
559
+ "message": "Policy grants {actions} across statements - enables code execution. {statements}",
560
+ "suggestion": (
561
+ "This combination allows an attacker to:\n"
562
+ "1. Create a Lambda function with malicious code\n"
563
+ "2. Execute the function to perform operations with the Lambda's role\n\n"
564
+ "Mitigation options:\n"
565
+ "• Restrict Lambda creation to specific function names/paths\n"
566
+ "• Require resource tags on functions and tag-based invocation controls\n"
567
+ "• Require MFA for Lambda function creation\n"
568
+ "• Use separate policies for creation vs invocation"
569
+ ),
570
+ },
571
+ # Lambda code modification backdoor
572
+ {
573
+ "all_of": ["lambda:UpdateFunctionCode", "lambda:InvokeFunction"],
574
+ "severity": "medium",
575
+ "message": "Policy grants {actions} across statements - enables code injection. {statements}",
576
+ "suggestion": (
577
+ "This combination allows modifying existing Lambda functions and executing them.\n\n"
578
+ "Mitigation options:\n"
579
+ "• Use resource-based policies to restrict which functions can be modified\n"
580
+ "• Require MFA for code updates\n"
581
+ "• Use separate policies for code updates vs invocation\n"
582
+ "• Implement code signing for Lambda functions"
583
+ ),
584
+ },
585
+ # EC2 instance privilege escalation
586
+ {
587
+ "all_of": ["ec2:RunInstances", "iam:PassRole"],
588
+ "severity": "high",
589
+ "message": "Policy grants {actions} across statements - enables privilege escalation via instance profile. {statements}",
590
+ "suggestion": (
591
+ "This combination allows launching EC2 instances with privileged roles.\n\n"
592
+ "Mitigation options:\n"
593
+ "• Add iam:PassedToService condition requiring ec2.amazonaws.com\n"
594
+ "• Restrict instance creation to specific AMIs or instance types\n"
595
+ "• Limit PassRole to specific low-privilege roles\n"
596
+ "• Require tagging and ABAC controls"
597
+ ),
598
+ },
599
+ ],
524
600
  },
525
601
  # ========================================================================
526
602
  # 18. ACTION CONDITION ENFORCEMENT
@@ -533,7 +609,7 @@ DEFAULT_CONFIG = {
533
609
  # Available requirements:
534
610
  # Default (enabled):
535
611
  # - iam_pass_role: Requires iam:PassedToService
536
- # - s3_org_id: Requires organization ID for S3 writes
612
+ # - s3_org_boundary: Prevents S3 data exfiltration (reads + writes)
537
613
  # - source_ip_restrictions: Restricts to corporate IPs
538
614
  # - s3_secure_transport: Prevents insecure transport
539
615
  # - prevent_public_ip: Prevents 0.0.0.0/0 IP ranges
@@ -543,10 +619,10 @@ DEFAULT_CONFIG = {
543
619
  "enabled": True,
544
620
  "severity": "high", # Default severity (can be overridden per-requirement)
545
621
  "description": "Enforces conditions (MFA, IP, tags, etc.) for specific actions at both statement and policy level",
546
- # STATEMENT-LEVEL: Load 5 requirements from Python module
547
- # Deep copy to prevent mutation of the originals
548
- # These check individual statements independently
549
- "action_condition_requirements": __import__("copy").deepcopy(CONDITION_REQUIREMENTS),
622
+ # CRITICAL: This key is used by sensitive_action check for filtering
623
+ # It must be named "requirements" (not "action_condition_requirements")
624
+ # to enable automatic deduplication of warnings
625
+ "requirements": __import__("copy").deepcopy(CONDITION_REQUIREMENTS),
550
626
  # POLICY-LEVEL: Scan entire policy and enforce conditions across ALL matching statements
551
627
  # Example: "If ANY statement grants iam:CreateUser, then ALL such statements must have MFA"
552
628
  # Default: Empty list (opt-in feature)
@@ -571,6 +647,6 @@ def get_default_config() -> dict:
571
647
  Returns:
572
648
  A deep copy of the default configuration dictionary
573
649
  """
574
- import copy
650
+ import copy # pylint: disable=import-outside-toplevel
575
651
 
576
652
  return copy.deepcopy(DEFAULT_CONFIG)
@@ -28,8 +28,11 @@ DEFAULT_ALLOWED_WILDCARDS: Final[tuple[str, ...]] = (
28
28
  "cloudwatch:List*",
29
29
  # DynamoDB
30
30
  "dynamodb:Describe*",
31
+ "dynamodb:Get*",
32
+ "dynamodb:List*",
31
33
  # EC2
32
34
  "ec2:Describe*",
35
+ "ec2:List*",
33
36
  # Elastic Load Balancing
34
37
  "elasticloadbalancing:Describe*",
35
38
  # IAM (non-sensitive read operations)
@@ -0,0 +1,321 @@
1
+ """Diff Parser Module.
2
+
3
+ This module parses GitHub PR diff information to extract changed line numbers.
4
+ It supports GitHub's unified diff format and provides utilities for determining
5
+ which lines and statements were modified in a PR.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class ParsedDiff:
18
+ """Parsed GitHub PR diff information for a single file.
19
+
20
+ Attributes:
21
+ file_path: Relative path to the file from repository root
22
+ changed_lines: Set of all line numbers that were added or modified (new side)
23
+ added_lines: Set of line numbers that were added (new side)
24
+ deleted_lines: Set of line numbers that were deleted (old side)
25
+ status: File status (added, modified, removed, renamed)
26
+ """
27
+
28
+ file_path: str
29
+ changed_lines: set[int]
30
+ added_lines: set[int]
31
+ deleted_lines: set[int]
32
+ status: str
33
+
34
+
35
+ @dataclass
36
+ class StatementLocation:
37
+ """Location information for a statement in a policy file.
38
+
39
+ Attributes:
40
+ statement_index: Zero-based index of the statement
41
+ start_line: First line number of the statement (1-indexed)
42
+ end_line: Last line number of the statement (1-indexed)
43
+ has_changes: True if any line in this range was modified
44
+ """
45
+
46
+ statement_index: int
47
+ start_line: int
48
+ end_line: int
49
+ has_changes: bool
50
+
51
+
52
+ class DiffParser:
53
+ """Parser for GitHub PR diff information."""
54
+
55
+ @staticmethod
56
+ def parse_pr_files(pr_files: list[dict[str, Any]]) -> dict[str, ParsedDiff]:
57
+ """Parse GitHub PR files response to extract changed line information.
58
+
59
+ Args:
60
+ pr_files: List of file dicts from GitHub API's get_pr_files() call.
61
+ Each dict contains: filename, status, patch, additions, deletions
62
+
63
+ Returns:
64
+ Dict mapping file paths to ParsedDiff objects
65
+
66
+ Example:
67
+ >>> pr_files = [{
68
+ ... "filename": "policies/policy.json",
69
+ ... "status": "modified",
70
+ ... "patch": "@@ -5,3 +5,4 @@\\n context\\n-old\\n+new\\n+added"
71
+ ... }]
72
+ >>> result = DiffParser.parse_pr_files(pr_files)
73
+ >>> result["policies/policy.json"].changed_lines
74
+ {6, 7}
75
+ """
76
+ parsed: dict[str, ParsedDiff] = {}
77
+
78
+ for file_info in pr_files:
79
+ if not isinstance(file_info, dict):
80
+ continue
81
+
82
+ filename = file_info.get("filename")
83
+ if not filename or not isinstance(filename, str):
84
+ continue
85
+
86
+ status = file_info.get("status", "modified")
87
+ patch = file_info.get("patch")
88
+
89
+ # Files without patches (e.g., binary files, very large files)
90
+ if not patch or not isinstance(patch, str):
91
+ logger.debug(f"No patch available for {filename}, skipping diff parsing")
92
+ # Still track the file with empty change sets
93
+ parsed[filename] = ParsedDiff(
94
+ file_path=filename,
95
+ changed_lines=set(),
96
+ added_lines=set(),
97
+ deleted_lines=set(),
98
+ status=status,
99
+ )
100
+ continue
101
+
102
+ try:
103
+ diff = DiffParser.parse_unified_diff(patch)
104
+ parsed[filename] = ParsedDiff(
105
+ file_path=filename,
106
+ changed_lines=diff["changed_lines"],
107
+ added_lines=diff["added_lines"],
108
+ deleted_lines=diff["deleted_lines"],
109
+ status=status,
110
+ )
111
+ logger.debug(
112
+ f"Parsed diff for {filename}: {len(diff['changed_lines'])} changed lines"
113
+ )
114
+ except Exception as e: # pylint: disable=broad-exception-caught
115
+ logger.warning(f"Failed to parse diff for {filename}: {e}")
116
+ # Track file with empty change sets on parse error
117
+ parsed[filename] = ParsedDiff(
118
+ file_path=filename,
119
+ changed_lines=set(),
120
+ added_lines=set(),
121
+ deleted_lines=set(),
122
+ status=status,
123
+ )
124
+
125
+ return parsed
126
+
127
+ @staticmethod
128
+ def parse_unified_diff(patch: str) -> dict[str, set[int]]:
129
+ """Parse a unified diff patch to extract changed line numbers.
130
+
131
+ Unified diff format uses @@ headers to indicate line ranges:
132
+ @@ -old_start,old_count +new_start,new_count @@
133
+
134
+ Lines starting with:
135
+ - '-' are deletions (old side line numbers)
136
+ - '+' are additions (new side line numbers)
137
+ - ' ' are context (both sides)
138
+
139
+ Args:
140
+ patch: Unified diff string from GitHub API
141
+
142
+ Returns:
143
+ Dict with keys:
144
+ - changed_lines: All added/modified lines (new side)
145
+ - added_lines: Only added lines (new side)
146
+ - deleted_lines: Only deleted lines (old side)
147
+
148
+ Example:
149
+ >>> patch = '''@@ -5,3 +5,4 @@
150
+ ... context line
151
+ ... -deleted line
152
+ ... +added line
153
+ ... +another added line
154
+ ... context line'''
155
+ >>> result = DiffParser.parse_unified_diff(patch)
156
+ >>> result['added_lines']
157
+ {6, 7}
158
+ """
159
+ changed_lines: set[int] = set()
160
+ added_lines: set[int] = set()
161
+ deleted_lines: set[int] = set()
162
+
163
+ # Pattern to match @@ -old_start,old_count +new_start,new_count @@ headers
164
+ # Handles variations: @@ -5,3 +5,4 @@, @@ -5 +5,2 @@, etc.
165
+ hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
166
+
167
+ lines = patch.split("\n")
168
+ current_new_line = 0
169
+ current_old_line = 0
170
+
171
+ for line in lines:
172
+ # Check for hunk header
173
+ match = hunk_header_pattern.match(line)
174
+ if match:
175
+ old_start = int(match.group(1))
176
+ new_start = int(match.group(3))
177
+ current_old_line = old_start
178
+ current_new_line = new_start
179
+ continue
180
+
181
+ # Process diff lines
182
+ if not line:
183
+ continue
184
+
185
+ first_char = line[0]
186
+
187
+ if first_char == "+":
188
+ # Addition (new side only)
189
+ added_lines.add(current_new_line)
190
+ changed_lines.add(current_new_line)
191
+ current_new_line += 1
192
+ elif first_char == "-":
193
+ # Deletion (old side only)
194
+ deleted_lines.add(current_old_line)
195
+ current_old_line += 1
196
+ elif first_char == " ":
197
+ # Context line (both sides)
198
+ current_new_line += 1
199
+ current_old_line += 1
200
+ # Ignore lines that don't start with +, -, or space (e.g., \ No newline)
201
+
202
+ return {
203
+ "changed_lines": changed_lines,
204
+ "added_lines": added_lines,
205
+ "deleted_lines": deleted_lines,
206
+ }
207
+
208
+ @staticmethod
209
+ def get_modified_statements(
210
+ line_mapping: dict[int, int],
211
+ changed_lines: set[int],
212
+ policy_file: str,
213
+ ) -> dict[int, StatementLocation]:
214
+ """Determine which statements were modified based on changed lines.
215
+
216
+ A statement is considered modified if ANY line within its range appears
217
+ in the changed_lines set.
218
+
219
+ Args:
220
+ line_mapping: Dict mapping statement index to statement start line
221
+ (from PRCommenter._get_line_mapping())
222
+ changed_lines: Set of line numbers that were changed in the PR
223
+ policy_file: Path to the policy file (to determine statement end lines)
224
+
225
+ Returns:
226
+ Dict mapping statement indices to StatementLocation objects
227
+ Only includes statements that were modified.
228
+
229
+ Example:
230
+ >>> line_mapping = {0: 3, 1: 10, 2: 20} # Statement starts
231
+ >>> changed_lines = {5, 6} # Lines changed in statement 0
232
+ >>> result = get_modified_statements(line_mapping, changed_lines, "policy.json")
233
+ >>> result[0].has_changes
234
+ True
235
+ >>> 1 in result # Statement 1 not modified
236
+ False
237
+ """
238
+ if not line_mapping or not changed_lines:
239
+ return {}
240
+
241
+ # Determine end line for each statement
242
+ statement_ranges: dict[int, tuple[int, int]] = {}
243
+ sorted_indices = sorted(line_mapping.keys())
244
+
245
+ for i, stmt_idx in enumerate(sorted_indices):
246
+ start_line = line_mapping[stmt_idx]
247
+
248
+ # End line is either:
249
+ # 1. One line before next statement starts, OR
250
+ # 2. EOF for the last statement
251
+ if i < len(sorted_indices) - 1:
252
+ next_start = line_mapping[sorted_indices[i + 1]]
253
+ end_line = next_start - 1
254
+ else:
255
+ # For last statement, try to read file to get actual end
256
+ end_line = DiffParser.get_statement_end_line(policy_file, start_line)
257
+
258
+ statement_ranges[stmt_idx] = (start_line, end_line)
259
+
260
+ # Check which statements have changes
261
+ modified_statements: dict[int, StatementLocation] = {}
262
+
263
+ for stmt_idx, (start_line, end_line) in statement_ranges.items():
264
+ # Check if any line in this statement's range was changed
265
+ statement_lines = set(range(start_line, end_line + 1))
266
+ has_changes = bool(statement_lines & changed_lines)
267
+
268
+ if has_changes:
269
+ modified_statements[stmt_idx] = StatementLocation(
270
+ statement_index=stmt_idx,
271
+ start_line=start_line,
272
+ end_line=end_line,
273
+ has_changes=True,
274
+ )
275
+ logger.debug(f"Statement {stmt_idx} (lines {start_line}-{end_line}) was modified")
276
+
277
+ return modified_statements
278
+
279
+ @staticmethod
280
+ def get_statement_end_line(policy_file: str, start_line: int) -> int:
281
+ """Find the end line of a statement block starting at start_line.
282
+
283
+ Tracks brace depth to find where the statement object closes.
284
+
285
+ Args:
286
+ policy_file: Path to policy file
287
+ start_line: Line number where statement starts (1-indexed)
288
+
289
+ Returns:
290
+ Line number where statement ends (1-indexed)
291
+ """
292
+ try:
293
+ with open(policy_file, encoding="utf-8") as f:
294
+ lines = f.readlines()
295
+
296
+ # Start counting from the statement's opening brace
297
+ brace_depth = 0
298
+ in_statement = False
299
+
300
+ for line_num in range(start_line - 1, len(lines)): # Convert to 0-indexed
301
+ line = lines[line_num]
302
+
303
+ # Track braces
304
+ for char in line:
305
+ if char == "{":
306
+ brace_depth += 1
307
+ in_statement = True
308
+ elif char == "}":
309
+ brace_depth -= 1
310
+
311
+ # Found the closing brace for this statement
312
+ if in_statement and brace_depth == 0:
313
+ return line_num + 1 # Convert back to 1-indexed
314
+
315
+ # If we couldn't find the end, return a reasonable default
316
+ # (start_line + 20 or end of file)
317
+ return min(start_line + 20, len(lines))
318
+
319
+ except Exception as e: # pylint: disable=broad-exception-caught
320
+ logger.debug(f"Could not determine statement end line: {e}")
321
+ return start_line + 10 # Reasonable default