iam-policy-validator 1.10.3__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iam_policy_validator-1.11.0.dist-info/METADATA +782 -0
- {iam_policy_validator-1.10.3.dist-info → iam_policy_validator-1.11.0.dist-info}/RECORD +25 -21
- iam_validator/__version__.py +1 -1
- iam_validator/checks/action_condition_enforcement.py +27 -14
- iam_validator/checks/sensitive_action.py +123 -11
- iam_validator/checks/utils/policy_level_checks.py +47 -10
- iam_validator/commands/__init__.py +6 -0
- iam_validator/commands/completion.py +420 -0
- iam_validator/commands/query.py +485 -0
- iam_validator/commands/validate.py +21 -26
- iam_validator/core/config/category_suggestions.py +77 -0
- iam_validator/core/config/condition_requirements.py +105 -54
- iam_validator/core/config/defaults.py +82 -6
- iam_validator/core/config/wildcards.py +3 -0
- iam_validator/core/diff_parser.py +321 -0
- iam_validator/core/formatters/enhanced.py +34 -27
- iam_validator/core/models.py +2 -0
- iam_validator/core/pr_commenter.py +179 -51
- iam_validator/core/report.py +19 -17
- iam_validator/integrations/github_integration.py +250 -1
- iam_validator/sdk/__init__.py +33 -0
- iam_validator/sdk/query_utils.py +454 -0
- iam_policy_validator-1.10.3.dist-info/METADATA +0 -549
- {iam_policy_validator-1.10.3.dist-info → iam_policy_validator-1.11.0.dist-info}/WHEEL +0 -0
- {iam_policy_validator-1.10.3.dist-info → iam_policy_validator-1.11.0.dist-info}/entry_points.txt +0 -0
- {iam_policy_validator-1.10.3.dist-info → iam_policy_validator-1.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -28,6 +28,13 @@ from typing import Any, Final
|
|
|
28
28
|
IAM_PASS_ROLE_REQUIREMENT: Final[dict[str, Any]] = {
|
|
29
29
|
"actions": ["iam:PassRole"],
|
|
30
30
|
"severity": "high",
|
|
31
|
+
"suggestion_text": (
|
|
32
|
+
"This action allows passing IAM roles to AWS services, which can lead to privilege escalation. "
|
|
33
|
+
"Always restrict which services can receive roles:\n"
|
|
34
|
+
"• Use `iam:PassedToService` to limit specific AWS services (e.g., lambda.amazonaws.com, ecs-tasks.amazonaws.com)\n"
|
|
35
|
+
"• Consider adding `iam:AssociatedResourceArn` to restrict which resources can use the role\n"
|
|
36
|
+
"• Require MFA for sensitive role passing (`aws:MultiFactorAuthPresent` = `true`)"
|
|
37
|
+
),
|
|
31
38
|
"required_conditions": [
|
|
32
39
|
{
|
|
33
40
|
"condition_key": "iam:PassedToService",
|
|
@@ -50,66 +57,96 @@ IAM_PASS_ROLE_REQUIREMENT: Final[dict[str, Any]] = {
|
|
|
50
57
|
],
|
|
51
58
|
}
|
|
52
59
|
|
|
53
|
-
# S3
|
|
54
|
-
|
|
55
|
-
|
|
60
|
+
# S3 Organization Boundary - Prevent data exfiltration for both reads and writes
|
|
61
|
+
# Enforces that S3 operations only access resources within organizational boundaries
|
|
62
|
+
S3_ORG_BOUNDARY: Final[dict[str, Any]] = {
|
|
63
|
+
"actions": ["s3:GetObject", "s3:GetObjectVersion", "s3:PutObject"],
|
|
56
64
|
"severity": "medium",
|
|
65
|
+
"suggestion_text": (
|
|
66
|
+
"These S3 actions can read or write data. Prevent data exfiltration by ensuring operations only access organization-owned buckets:\n"
|
|
67
|
+
"• Use organization ID (`aws:ResourceOrgID` = `${aws:PrincipalOrgID}`)\n"
|
|
68
|
+
"• OR use organization paths (`aws:ResourceOrgPaths` = `${aws:PrincipalOrgPaths}`)\n"
|
|
69
|
+
"• OR restrict by network boundary (IP/VPC/VPCe) + same account (`aws:ResourceAccount` = `${aws:PrincipalAccount}`)"
|
|
70
|
+
),
|
|
57
71
|
"required_conditions": {
|
|
58
72
|
"any_of": [
|
|
59
|
-
# Option 1:
|
|
73
|
+
# Option 1: Restrict to organization resources (strongest)
|
|
60
74
|
{
|
|
61
|
-
"
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
75
|
+
"condition_key": "aws:ResourceOrgID",
|
|
76
|
+
"description": "Restrict S3 operations to resources within your AWS Organization",
|
|
77
|
+
"expected_value": "${aws:PrincipalOrgID}",
|
|
78
|
+
"example": (
|
|
79
|
+
"{\n"
|
|
80
|
+
' "Condition": {\n'
|
|
81
|
+
' "StringEquals": {\n'
|
|
82
|
+
' "aws:ResourceOrgID": "${aws:PrincipalOrgID}"\n'
|
|
83
|
+
" }\n"
|
|
84
|
+
" }\n"
|
|
85
|
+
"}"
|
|
86
|
+
),
|
|
87
|
+
},
|
|
88
|
+
# Option 2: Restrict to organization paths
|
|
89
|
+
{
|
|
90
|
+
"condition_key": "aws:ResourceOrgPaths",
|
|
91
|
+
"description": "Restrict S3 operations to resources within your AWS Organization path",
|
|
92
|
+
"expected_value": "${aws:PrincipalOrgPaths}",
|
|
93
|
+
"example": (
|
|
94
|
+
"{\n"
|
|
95
|
+
' "Condition": {\n'
|
|
96
|
+
' "StringEquals": {\n'
|
|
97
|
+
' "aws:ResourceOrgPaths": "${aws:PrincipalOrgPaths}"\n'
|
|
98
|
+
" }\n"
|
|
99
|
+
" }\n"
|
|
100
|
+
"}"
|
|
101
|
+
),
|
|
83
102
|
},
|
|
84
|
-
# Option
|
|
103
|
+
# Option 3: Network boundary - Source IP + same account
|
|
85
104
|
{
|
|
86
|
-
"
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
{
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
}
|
|
107
|
-
|
|
105
|
+
"condition_key": "aws:SourceIp",
|
|
106
|
+
"description": "Restrict S3 operations by source IP address and same account",
|
|
107
|
+
"example": (
|
|
108
|
+
"{\n"
|
|
109
|
+
' "Condition": {\n'
|
|
110
|
+
' "IpAddress": {"aws:SourceIp": "10.0.0.0/8"},\n'
|
|
111
|
+
' "StringEquals": {"aws:ResourceAccount": "${aws:PrincipalAccount}"}\n'
|
|
112
|
+
" }\n"
|
|
113
|
+
"}"
|
|
114
|
+
),
|
|
115
|
+
},
|
|
116
|
+
# Option 4: Network boundary - Source VPC + same account
|
|
117
|
+
{
|
|
118
|
+
"condition_key": "aws:SourceVpc",
|
|
119
|
+
"description": "Restrict S3 operations by source VPC and same account",
|
|
120
|
+
"example": (
|
|
121
|
+
"{\n"
|
|
122
|
+
' "Condition": {\n'
|
|
123
|
+
' "StringEquals": {\n'
|
|
124
|
+
' "aws:SourceVpc": "vpc-12345678",\n'
|
|
125
|
+
' "aws:ResourceAccount": "${aws:PrincipalAccount}"\n'
|
|
126
|
+
" }\n"
|
|
127
|
+
" }\n"
|
|
128
|
+
"}"
|
|
129
|
+
),
|
|
130
|
+
},
|
|
131
|
+
# Option 5: Network boundary - VPC Endpoint + same account
|
|
132
|
+
{
|
|
133
|
+
"condition_key": "aws:SourceVpce",
|
|
134
|
+
"description": "Restrict S3 operations by VPC endpoint and same account",
|
|
135
|
+
"example": (
|
|
136
|
+
"{\n"
|
|
137
|
+
' "Condition": {\n'
|
|
138
|
+
' "StringEquals": {\n'
|
|
139
|
+
' "aws:SourceVpce": "vpce-12345678",\n'
|
|
140
|
+
' "aws:ResourceAccount": "${aws:PrincipalAccount}"\n'
|
|
141
|
+
" }\n"
|
|
142
|
+
" }\n"
|
|
143
|
+
"}"
|
|
144
|
+
),
|
|
108
145
|
},
|
|
109
|
-
# Option
|
|
146
|
+
# Option 6: Minimum - at least require same account
|
|
110
147
|
{
|
|
111
148
|
"condition_key": "aws:ResourceAccount",
|
|
112
|
-
"description": "Restrict S3
|
|
149
|
+
"description": "Restrict S3 operations to resources within the same AWS account",
|
|
113
150
|
"expected_value": "${aws:PrincipalAccount}",
|
|
114
151
|
"example": (
|
|
115
152
|
"{\n"
|
|
@@ -130,10 +167,16 @@ SOURCE_IP_RESTRICTIONS: Final[dict[str, Any]] = {
|
|
|
130
167
|
"action_patterns": [
|
|
131
168
|
"^ssm:StartSession$",
|
|
132
169
|
"^ssm:Run.*$",
|
|
133
|
-
"^s3:GetObject$",
|
|
134
170
|
"^rds-db:Connect$",
|
|
135
171
|
],
|
|
136
172
|
"severity": "low",
|
|
173
|
+
"suggestion_text": (
|
|
174
|
+
"This action accesses sensitive resources or data. Restrict network access to trusted locations:\n"
|
|
175
|
+
"• Use `aws:SourceIp` to limit to corporate IP ranges (e.g., office networks, VPN endpoints)\n"
|
|
176
|
+
"• Alternative: Use `aws:SourceVpc` or `aws:SourceVpce` for VPC-based restrictions\n"
|
|
177
|
+
"• Consider combining with secure transport requirements\n"
|
|
178
|
+
"• For S3: Ensure account ownership (`aws:ResourceAccount` = `${aws:PrincipalAccount}`)"
|
|
179
|
+
),
|
|
137
180
|
"required_conditions": [
|
|
138
181
|
{
|
|
139
182
|
"condition_key": "aws:SourceIp",
|
|
@@ -146,7 +189,9 @@ SOURCE_IP_RESTRICTIONS: Final[dict[str, Any]] = {
|
|
|
146
189
|
' "10.0.0.0/8",\n'
|
|
147
190
|
' "172.16.0.0/12"\n'
|
|
148
191
|
" ]\n"
|
|
149
|
-
" }
|
|
192
|
+
" },\n"
|
|
193
|
+
' "Bool": {"aws:SecureTransport": "true"},\n'
|
|
194
|
+
' "StringEquals": {"aws:ResourceAccount": "${aws:PrincipalAccount}"}\n'
|
|
150
195
|
" }\n"
|
|
151
196
|
"}"
|
|
152
197
|
),
|
|
@@ -158,6 +203,12 @@ SOURCE_IP_RESTRICTIONS: Final[dict[str, Any]] = {
|
|
|
158
203
|
S3_SECURE_TRANSPORT: Final[dict[str, Any]] = {
|
|
159
204
|
"actions": ["s3:GetObject", "s3:PutObject"],
|
|
160
205
|
"severity": "critical",
|
|
206
|
+
"suggestion_text": (
|
|
207
|
+
"CRITICAL: This S3 action must enforce encrypted connections. Unencrypted HTTP connections expose data in transit:\n"
|
|
208
|
+
"• Set `aws:SecureTransport` to `true` to enforce HTTPS/TLS\n"
|
|
209
|
+
"• NEVER set `aws:SecureTransport` to `false` (this explicitly allows unencrypted connections)\n"
|
|
210
|
+
"• Combine with other controls (IP restrictions, account boundaries) for defense in depth"
|
|
211
|
+
),
|
|
161
212
|
"required_conditions": {
|
|
162
213
|
"none_of": [
|
|
163
214
|
{
|
|
@@ -200,7 +251,7 @@ PREVENT_PUBLIC_IP: Final[dict[str, Any]] = {
|
|
|
200
251
|
|
|
201
252
|
CONDITION_REQUIREMENTS: Final[list[dict[str, Any]]] = [
|
|
202
253
|
IAM_PASS_ROLE_REQUIREMENT,
|
|
203
|
-
|
|
254
|
+
S3_ORG_BOUNDARY, # Unified S3 read/write organization boundary enforcement
|
|
204
255
|
SOURCE_IP_RESTRICTIONS,
|
|
205
256
|
S3_SECURE_TRANSPORT,
|
|
206
257
|
PREVENT_PUBLIC_IP,
|
|
@@ -521,6 +521,82 @@ DEFAULT_CONFIG = {
|
|
|
521
521
|
"ignore_patterns": [
|
|
522
522
|
{"action_matches": "^iam:PassRole$"},
|
|
523
523
|
],
|
|
524
|
+
# Cross-statement privilege escalation patterns (policy-wide detection)
|
|
525
|
+
# These patterns detect dangerous action combinations across ANY statements in the policy
|
|
526
|
+
# Uses all_of logic: ALL actions must exist somewhere in the policy
|
|
527
|
+
"sensitive_actions": [
|
|
528
|
+
# User privilege escalation: Create user + attach admin policy
|
|
529
|
+
{
|
|
530
|
+
"all_of": ["iam:CreateUser", "iam:AttachUserPolicy"],
|
|
531
|
+
"severity": "critical",
|
|
532
|
+
"message": "Policy grants {actions} across statements - enables privilege escalation. {statements}",
|
|
533
|
+
"suggestion": (
|
|
534
|
+
"This combination allows an attacker to:\n"
|
|
535
|
+
"1. Create a new IAM user\n"
|
|
536
|
+
"2. Attach AdministratorAccess policy to that user\n"
|
|
537
|
+
"3. Escalate to full account access\n\n"
|
|
538
|
+
"Mitigation options:\n"
|
|
539
|
+
"• Remove both of these permissions\n"
|
|
540
|
+
"• Add strict IAM conditions (MFA, IP restrictions, force a specific policy with `iam:PolicyARN` condition)\n"
|
|
541
|
+
),
|
|
542
|
+
},
|
|
543
|
+
# Role privilege escalation: Create role + attach admin policy
|
|
544
|
+
{
|
|
545
|
+
"all_of": ["iam:CreateRole", "iam:AttachRolePolicy"],
|
|
546
|
+
"severity": "high",
|
|
547
|
+
"message": "Policy grants {actions} across statements - enables privilege escalation. {statements}",
|
|
548
|
+
"suggestion": (
|
|
549
|
+
"This combination allows creating privileged roles with admin policies.\n\n"
|
|
550
|
+
"Mitigation options:\n"
|
|
551
|
+
"• Remove both of these permissions\n"
|
|
552
|
+
"• Add strict IAM conditions with a Permissions Boundary and ABAC Tagging, force a specific policy with `iam:PolicyARN` condition\n"
|
|
553
|
+
),
|
|
554
|
+
},
|
|
555
|
+
# Lambda backdoor: Create/update function + invoke
|
|
556
|
+
{
|
|
557
|
+
"all_of": ["lambda:CreateFunction", "lambda:InvokeFunction"],
|
|
558
|
+
"severity": "medium",
|
|
559
|
+
"message": "Policy grants {actions} across statements - enables code execution. {statements}",
|
|
560
|
+
"suggestion": (
|
|
561
|
+
"This combination allows an attacker to:\n"
|
|
562
|
+
"1. Create a Lambda function with malicious code\n"
|
|
563
|
+
"2. Execute the function to perform operations with the Lambda's role\n\n"
|
|
564
|
+
"Mitigation options:\n"
|
|
565
|
+
"• Restrict Lambda creation to specific function names/paths\n"
|
|
566
|
+
"• Require resource tags on functions and tag-based invocation controls\n"
|
|
567
|
+
"• Require MFA for Lambda function creation\n"
|
|
568
|
+
"• Use separate policies for creation vs invocation"
|
|
569
|
+
),
|
|
570
|
+
},
|
|
571
|
+
# Lambda code modification backdoor
|
|
572
|
+
{
|
|
573
|
+
"all_of": ["lambda:UpdateFunctionCode", "lambda:InvokeFunction"],
|
|
574
|
+
"severity": "medium",
|
|
575
|
+
"message": "Policy grants {actions} across statements - enables code injection. {statements}",
|
|
576
|
+
"suggestion": (
|
|
577
|
+
"This combination allows modifying existing Lambda functions and executing them.\n\n"
|
|
578
|
+
"Mitigation options:\n"
|
|
579
|
+
"• Use resource-based policies to restrict which functions can be modified\n"
|
|
580
|
+
"• Require MFA for code updates\n"
|
|
581
|
+
"• Use separate policies for code updates vs invocation\n"
|
|
582
|
+
"• Implement code signing for Lambda functions"
|
|
583
|
+
),
|
|
584
|
+
},
|
|
585
|
+
# EC2 instance privilege escalation
|
|
586
|
+
{
|
|
587
|
+
"all_of": ["ec2:RunInstances", "iam:PassRole"],
|
|
588
|
+
"severity": "high",
|
|
589
|
+
"message": "Policy grants {actions} across statements - enables privilege escalation via instance profile. {statements}",
|
|
590
|
+
"suggestion": (
|
|
591
|
+
"This combination allows launching EC2 instances with privileged roles.\n\n"
|
|
592
|
+
"Mitigation options:\n"
|
|
593
|
+
"• Add iam:PassedToService condition requiring ec2.amazonaws.com\n"
|
|
594
|
+
"• Restrict instance creation to specific AMIs or instance types\n"
|
|
595
|
+
"• Limit PassRole to specific low-privilege roles\n"
|
|
596
|
+
"• Require tagging and ABAC controls"
|
|
597
|
+
),
|
|
598
|
+
},
|
|
599
|
+
],
|
|
524
600
|
},
|
|
525
601
|
# ========================================================================
|
|
526
602
|
# 18. ACTION CONDITION ENFORCEMENT
|
|
@@ -533,7 +609,7 @@ DEFAULT_CONFIG = {
|
|
|
533
609
|
# Available requirements:
|
|
534
610
|
# Default (enabled):
|
|
535
611
|
# - iam_pass_role: Requires iam:PassedToService
|
|
536
|
-
# -
|
|
612
|
+
# - s3_org_boundary: Prevents S3 data exfiltration (reads + writes)
|
|
537
613
|
# - source_ip_restrictions: Restricts to corporate IPs
|
|
538
614
|
# - s3_secure_transport: Prevents insecure transport
|
|
539
615
|
# - prevent_public_ip: Prevents 0.0.0.0/0 IP ranges
|
|
@@ -543,10 +619,10 @@ DEFAULT_CONFIG = {
|
|
|
543
619
|
"enabled": True,
|
|
544
620
|
"severity": "high", # Default severity (can be overridden per-requirement)
|
|
545
621
|
"description": "Enforces conditions (MFA, IP, tags, etc.) for specific actions at both statement and policy level",
|
|
546
|
-
#
|
|
547
|
-
#
|
|
548
|
-
#
|
|
549
|
-
"
|
|
622
|
+
# CRITICAL: This key is used by sensitive_action check for filtering
|
|
623
|
+
# It must be named "requirements" (not "action_condition_requirements")
|
|
624
|
+
# to enable automatic deduplication of warnings
|
|
625
|
+
"requirements": __import__("copy").deepcopy(CONDITION_REQUIREMENTS),
|
|
550
626
|
# POLICY-LEVEL: Scan entire policy and enforce conditions across ALL matching statements
|
|
551
627
|
# Example: "If ANY statement grants iam:CreateUser, then ALL such statements must have MFA"
|
|
552
628
|
# Default: Empty list (opt-in feature)
|
|
@@ -571,6 +647,6 @@ def get_default_config() -> dict:
|
|
|
571
647
|
Returns:
|
|
572
648
|
A deep copy of the default configuration dictionary
|
|
573
649
|
"""
|
|
574
|
-
import copy
|
|
650
|
+
import copy # pylint: disable=import-outside-toplevel
|
|
575
651
|
|
|
576
652
|
return copy.deepcopy(DEFAULT_CONFIG)
|
|
@@ -28,8 +28,11 @@ DEFAULT_ALLOWED_WILDCARDS: Final[tuple[str, ...]] = (
|
|
|
28
28
|
"cloudwatch:List*",
|
|
29
29
|
# DynamoDB
|
|
30
30
|
"dynamodb:Describe*",
|
|
31
|
+
"dynamodb:Get*",
|
|
32
|
+
"dynamodb:List*",
|
|
31
33
|
# EC2
|
|
32
34
|
"ec2:Describe*",
|
|
35
|
+
"ec2:List*",
|
|
33
36
|
# Elastic Load Balancing
|
|
34
37
|
"elasticloadbalancing:Describe*",
|
|
35
38
|
# IAM (non-sensitive read operations)
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""Diff Parser Module.
|
|
2
|
+
|
|
3
|
+
This module parses GitHub PR diff information to extract changed line numbers.
|
|
4
|
+
It supports GitHub's unified diff format and provides utilities for determining
|
|
5
|
+
which lines and statements were modified in a PR.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ParsedDiff:
|
|
18
|
+
"""Parsed GitHub PR diff information for a single file.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
file_path: Relative path to the file from repository root
|
|
22
|
+
changed_lines: Set of all line numbers that were added or modified (new side)
|
|
23
|
+
added_lines: Set of line numbers that were added (new side)
|
|
24
|
+
deleted_lines: Set of line numbers that were deleted (old side)
|
|
25
|
+
status: File status (added, modified, removed, renamed)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
file_path: str
|
|
29
|
+
changed_lines: set[int]
|
|
30
|
+
added_lines: set[int]
|
|
31
|
+
deleted_lines: set[int]
|
|
32
|
+
status: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class StatementLocation:
|
|
37
|
+
"""Location information for a statement in a policy file.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
statement_index: Zero-based index of the statement
|
|
41
|
+
start_line: First line number of the statement (1-indexed)
|
|
42
|
+
end_line: Last line number of the statement (1-indexed)
|
|
43
|
+
has_changes: True if any line in this range was modified
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
statement_index: int
|
|
47
|
+
start_line: int
|
|
48
|
+
end_line: int
|
|
49
|
+
has_changes: bool
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DiffParser:
|
|
53
|
+
"""Parser for GitHub PR diff information."""
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def parse_pr_files(pr_files: list[dict[str, Any]]) -> dict[str, ParsedDiff]:
|
|
57
|
+
"""Parse GitHub PR files response to extract changed line information.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
pr_files: List of file dicts from GitHub API's get_pr_files() call.
|
|
61
|
+
Each dict contains: filename, status, patch, additions, deletions
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dict mapping file paths to ParsedDiff objects
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> pr_files = [{
|
|
68
|
+
... "filename": "policies/policy.json",
|
|
69
|
+
... "status": "modified",
|
|
70
|
+
... "patch": "@@ -5,3 +5,4 @@\\n context\\n-old\\n+new\\n+added"
|
|
71
|
+
... }]
|
|
72
|
+
>>> result = DiffParser.parse_pr_files(pr_files)
|
|
73
|
+
>>> result["policies/policy.json"].changed_lines
|
|
74
|
+
{6, 7}
|
|
75
|
+
"""
|
|
76
|
+
parsed: dict[str, ParsedDiff] = {}
|
|
77
|
+
|
|
78
|
+
for file_info in pr_files:
|
|
79
|
+
if not isinstance(file_info, dict):
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
filename = file_info.get("filename")
|
|
83
|
+
if not filename or not isinstance(filename, str):
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
status = file_info.get("status", "modified")
|
|
87
|
+
patch = file_info.get("patch")
|
|
88
|
+
|
|
89
|
+
# Files without patches (e.g., binary files, very large files)
|
|
90
|
+
if not patch or not isinstance(patch, str):
|
|
91
|
+
logger.debug(f"No patch available for {filename}, skipping diff parsing")
|
|
92
|
+
# Still track the file with empty change sets
|
|
93
|
+
parsed[filename] = ParsedDiff(
|
|
94
|
+
file_path=filename,
|
|
95
|
+
changed_lines=set(),
|
|
96
|
+
added_lines=set(),
|
|
97
|
+
deleted_lines=set(),
|
|
98
|
+
status=status,
|
|
99
|
+
)
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
diff = DiffParser.parse_unified_diff(patch)
|
|
104
|
+
parsed[filename] = ParsedDiff(
|
|
105
|
+
file_path=filename,
|
|
106
|
+
changed_lines=diff["changed_lines"],
|
|
107
|
+
added_lines=diff["added_lines"],
|
|
108
|
+
deleted_lines=diff["deleted_lines"],
|
|
109
|
+
status=status,
|
|
110
|
+
)
|
|
111
|
+
logger.debug(
|
|
112
|
+
f"Parsed diff for {filename}: {len(diff['changed_lines'])} changed lines"
|
|
113
|
+
)
|
|
114
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
115
|
+
logger.warning(f"Failed to parse diff for {filename}: {e}")
|
|
116
|
+
# Track file with empty change sets on parse error
|
|
117
|
+
parsed[filename] = ParsedDiff(
|
|
118
|
+
file_path=filename,
|
|
119
|
+
changed_lines=set(),
|
|
120
|
+
added_lines=set(),
|
|
121
|
+
deleted_lines=set(),
|
|
122
|
+
status=status,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return parsed
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def parse_unified_diff(patch: str) -> dict[str, set[int]]:
|
|
129
|
+
"""Parse a unified diff patch to extract changed line numbers.
|
|
130
|
+
|
|
131
|
+
Unified diff format uses @@ headers to indicate line ranges:
|
|
132
|
+
@@ -old_start,old_count +new_start,new_count @@
|
|
133
|
+
|
|
134
|
+
Lines starting with:
|
|
135
|
+
- '-' are deletions (old side line numbers)
|
|
136
|
+
- '+' are additions (new side line numbers)
|
|
137
|
+
- ' ' are context (both sides)
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
patch: Unified diff string from GitHub API
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Dict with keys:
|
|
144
|
+
- changed_lines: All added/modified lines (new side)
|
|
145
|
+
- added_lines: Only added lines (new side)
|
|
146
|
+
- deleted_lines: Only deleted lines (old side)
|
|
147
|
+
|
|
148
|
+
Example:
|
|
149
|
+
>>> patch = '''@@ -5,3 +5,4 @@
|
|
150
|
+
... context line
|
|
151
|
+
... -deleted line
|
|
152
|
+
... +added line
|
|
153
|
+
... +another added line
|
|
154
|
+
... context line'''
|
|
155
|
+
>>> result = DiffParser.parse_unified_diff(patch)
|
|
156
|
+
>>> result['added_lines']
|
|
157
|
+
{6, 7}
|
|
158
|
+
"""
|
|
159
|
+
changed_lines: set[int] = set()
|
|
160
|
+
added_lines: set[int] = set()
|
|
161
|
+
deleted_lines: set[int] = set()
|
|
162
|
+
|
|
163
|
+
# Pattern to match @@ -old_start,old_count +new_start,new_count @@ headers
|
|
164
|
+
# Handles variations: @@ -5,3 +5,4 @@, @@ -5 +5,2 @@, etc.
|
|
165
|
+
hunk_header_pattern = re.compile(r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@")
|
|
166
|
+
|
|
167
|
+
lines = patch.split("\n")
|
|
168
|
+
current_new_line = 0
|
|
169
|
+
current_old_line = 0
|
|
170
|
+
|
|
171
|
+
for line in lines:
|
|
172
|
+
# Check for hunk header
|
|
173
|
+
match = hunk_header_pattern.match(line)
|
|
174
|
+
if match:
|
|
175
|
+
old_start = int(match.group(1))
|
|
176
|
+
new_start = int(match.group(3))
|
|
177
|
+
current_old_line = old_start
|
|
178
|
+
current_new_line = new_start
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
# Process diff lines
|
|
182
|
+
if not line:
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
first_char = line[0]
|
|
186
|
+
|
|
187
|
+
if first_char == "+":
|
|
188
|
+
# Addition (new side only)
|
|
189
|
+
added_lines.add(current_new_line)
|
|
190
|
+
changed_lines.add(current_new_line)
|
|
191
|
+
current_new_line += 1
|
|
192
|
+
elif first_char == "-":
|
|
193
|
+
# Deletion (old side only)
|
|
194
|
+
deleted_lines.add(current_old_line)
|
|
195
|
+
current_old_line += 1
|
|
196
|
+
elif first_char == " ":
|
|
197
|
+
# Context line (both sides)
|
|
198
|
+
current_new_line += 1
|
|
199
|
+
current_old_line += 1
|
|
200
|
+
# Ignore lines that don't start with +, -, or space (e.g., \ No newline)
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
"changed_lines": changed_lines,
|
|
204
|
+
"added_lines": added_lines,
|
|
205
|
+
"deleted_lines": deleted_lines,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
@staticmethod
|
|
209
|
+
def get_modified_statements(
|
|
210
|
+
line_mapping: dict[int, int],
|
|
211
|
+
changed_lines: set[int],
|
|
212
|
+
policy_file: str,
|
|
213
|
+
) -> dict[int, StatementLocation]:
|
|
214
|
+
"""Determine which statements were modified based on changed lines.
|
|
215
|
+
|
|
216
|
+
A statement is considered modified if ANY line within its range appears
|
|
217
|
+
in the changed_lines set.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
line_mapping: Dict mapping statement index to statement start line
|
|
221
|
+
(from PRCommenter._get_line_mapping())
|
|
222
|
+
changed_lines: Set of line numbers that were changed in the PR
|
|
223
|
+
policy_file: Path to the policy file (to determine statement end lines)
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Dict mapping statement indices to StatementLocation objects
|
|
227
|
+
Only includes statements that were modified.
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
>>> line_mapping = {0: 3, 1: 10, 2: 20} # Statement starts
|
|
231
|
+
>>> changed_lines = {5, 6} # Lines changed in statement 0
|
|
232
|
+
>>> result = get_modified_statements(line_mapping, changed_lines, "policy.json")
|
|
233
|
+
>>> result[0].has_changes
|
|
234
|
+
True
|
|
235
|
+
>>> 1 in result # Statement 1 not modified
|
|
236
|
+
False
|
|
237
|
+
"""
|
|
238
|
+
if not line_mapping or not changed_lines:
|
|
239
|
+
return {}
|
|
240
|
+
|
|
241
|
+
# Determine end line for each statement
|
|
242
|
+
statement_ranges: dict[int, tuple[int, int]] = {}
|
|
243
|
+
sorted_indices = sorted(line_mapping.keys())
|
|
244
|
+
|
|
245
|
+
for i, stmt_idx in enumerate(sorted_indices):
|
|
246
|
+
start_line = line_mapping[stmt_idx]
|
|
247
|
+
|
|
248
|
+
# End line is either:
|
|
249
|
+
# 1. One line before next statement starts, OR
|
|
250
|
+
# 2. EOF for the last statement
|
|
251
|
+
if i < len(sorted_indices) - 1:
|
|
252
|
+
next_start = line_mapping[sorted_indices[i + 1]]
|
|
253
|
+
end_line = next_start - 1
|
|
254
|
+
else:
|
|
255
|
+
# For last statement, try to read file to get actual end
|
|
256
|
+
end_line = DiffParser.get_statement_end_line(policy_file, start_line)
|
|
257
|
+
|
|
258
|
+
statement_ranges[stmt_idx] = (start_line, end_line)
|
|
259
|
+
|
|
260
|
+
# Check which statements have changes
|
|
261
|
+
modified_statements: dict[int, StatementLocation] = {}
|
|
262
|
+
|
|
263
|
+
for stmt_idx, (start_line, end_line) in statement_ranges.items():
|
|
264
|
+
# Check if any line in this statement's range was changed
|
|
265
|
+
statement_lines = set(range(start_line, end_line + 1))
|
|
266
|
+
has_changes = bool(statement_lines & changed_lines)
|
|
267
|
+
|
|
268
|
+
if has_changes:
|
|
269
|
+
modified_statements[stmt_idx] = StatementLocation(
|
|
270
|
+
statement_index=stmt_idx,
|
|
271
|
+
start_line=start_line,
|
|
272
|
+
end_line=end_line,
|
|
273
|
+
has_changes=True,
|
|
274
|
+
)
|
|
275
|
+
logger.debug(f"Statement {stmt_idx} (lines {start_line}-{end_line}) was modified")
|
|
276
|
+
|
|
277
|
+
return modified_statements
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
def get_statement_end_line(policy_file: str, start_line: int) -> int:
|
|
281
|
+
"""Find the end line of a statement block starting at start_line.
|
|
282
|
+
|
|
283
|
+
Tracks brace depth to find where the statement object closes.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
policy_file: Path to policy file
|
|
287
|
+
start_line: Line number where statement starts (1-indexed)
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Line number where statement ends (1-indexed)
|
|
291
|
+
"""
|
|
292
|
+
try:
|
|
293
|
+
with open(policy_file, encoding="utf-8") as f:
|
|
294
|
+
lines = f.readlines()
|
|
295
|
+
|
|
296
|
+
# Start counting from the statement's opening brace
|
|
297
|
+
brace_depth = 0
|
|
298
|
+
in_statement = False
|
|
299
|
+
|
|
300
|
+
for line_num in range(start_line - 1, len(lines)): # Convert to 0-indexed
|
|
301
|
+
line = lines[line_num]
|
|
302
|
+
|
|
303
|
+
# Track braces
|
|
304
|
+
for char in line:
|
|
305
|
+
if char == "{":
|
|
306
|
+
brace_depth += 1
|
|
307
|
+
in_statement = True
|
|
308
|
+
elif char == "}":
|
|
309
|
+
brace_depth -= 1
|
|
310
|
+
|
|
311
|
+
# Found the closing brace for this statement
|
|
312
|
+
if in_statement and brace_depth == 0:
|
|
313
|
+
return line_num + 1 # Convert back to 1-indexed
|
|
314
|
+
|
|
315
|
+
# If we couldn't find the end, return a reasonable default
|
|
316
|
+
# (start_line + 20 or end of file)
|
|
317
|
+
return min(start_line + 20, len(lines))
|
|
318
|
+
|
|
319
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
320
|
+
logger.debug(f"Could not determine statement end line: {e}")
|
|
321
|
+
return start_line + 10 # Reasonable default
|