iam-policy-validator 1.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iam_policy_validator-1.14.0.dist-info/METADATA +782 -0
- iam_policy_validator-1.14.0.dist-info/RECORD +106 -0
- iam_policy_validator-1.14.0.dist-info/WHEEL +4 -0
- iam_policy_validator-1.14.0.dist-info/entry_points.txt +2 -0
- iam_policy_validator-1.14.0.dist-info/licenses/LICENSE +21 -0
- iam_validator/__init__.py +27 -0
- iam_validator/__main__.py +11 -0
- iam_validator/__version__.py +9 -0
- iam_validator/checks/__init__.py +45 -0
- iam_validator/checks/action_condition_enforcement.py +1442 -0
- iam_validator/checks/action_resource_matching.py +472 -0
- iam_validator/checks/action_validation.py +67 -0
- iam_validator/checks/condition_key_validation.py +88 -0
- iam_validator/checks/condition_type_mismatch.py +257 -0
- iam_validator/checks/full_wildcard.py +62 -0
- iam_validator/checks/mfa_condition_check.py +105 -0
- iam_validator/checks/policy_size.py +114 -0
- iam_validator/checks/policy_structure.py +556 -0
- iam_validator/checks/policy_type_validation.py +331 -0
- iam_validator/checks/principal_validation.py +708 -0
- iam_validator/checks/resource_validation.py +135 -0
- iam_validator/checks/sensitive_action.py +438 -0
- iam_validator/checks/service_wildcard.py +98 -0
- iam_validator/checks/set_operator_validation.py +153 -0
- iam_validator/checks/sid_uniqueness.py +146 -0
- iam_validator/checks/trust_policy_validation.py +509 -0
- iam_validator/checks/utils/__init__.py +17 -0
- iam_validator/checks/utils/action_parser.py +149 -0
- iam_validator/checks/utils/policy_level_checks.py +190 -0
- iam_validator/checks/utils/sensitive_action_matcher.py +293 -0
- iam_validator/checks/utils/wildcard_expansion.py +86 -0
- iam_validator/checks/wildcard_action.py +58 -0
- iam_validator/checks/wildcard_resource.py +374 -0
- iam_validator/commands/__init__.py +31 -0
- iam_validator/commands/analyze.py +549 -0
- iam_validator/commands/base.py +48 -0
- iam_validator/commands/cache.py +393 -0
- iam_validator/commands/completion.py +471 -0
- iam_validator/commands/download_services.py +255 -0
- iam_validator/commands/post_to_pr.py +86 -0
- iam_validator/commands/query.py +485 -0
- iam_validator/commands/validate.py +830 -0
- iam_validator/core/__init__.py +13 -0
- iam_validator/core/access_analyzer.py +671 -0
- iam_validator/core/access_analyzer_report.py +640 -0
- iam_validator/core/aws_fetcher.py +29 -0
- iam_validator/core/aws_service/__init__.py +21 -0
- iam_validator/core/aws_service/cache.py +108 -0
- iam_validator/core/aws_service/client.py +205 -0
- iam_validator/core/aws_service/fetcher.py +641 -0
- iam_validator/core/aws_service/parsers.py +149 -0
- iam_validator/core/aws_service/patterns.py +51 -0
- iam_validator/core/aws_service/storage.py +291 -0
- iam_validator/core/aws_service/validators.py +380 -0
- iam_validator/core/check_registry.py +679 -0
- iam_validator/core/cli.py +134 -0
- iam_validator/core/codeowners.py +245 -0
- iam_validator/core/condition_validators.py +626 -0
- iam_validator/core/config/__init__.py +81 -0
- iam_validator/core/config/aws_api.py +35 -0
- iam_validator/core/config/aws_global_conditions.py +160 -0
- iam_validator/core/config/category_suggestions.py +181 -0
- iam_validator/core/config/check_documentation.py +390 -0
- iam_validator/core/config/condition_requirements.py +258 -0
- iam_validator/core/config/config_loader.py +670 -0
- iam_validator/core/config/defaults.py +739 -0
- iam_validator/core/config/principal_requirements.py +421 -0
- iam_validator/core/config/sensitive_actions.py +672 -0
- iam_validator/core/config/service_principals.py +132 -0
- iam_validator/core/config/wildcards.py +127 -0
- iam_validator/core/constants.py +149 -0
- iam_validator/core/diff_parser.py +325 -0
- iam_validator/core/finding_fingerprint.py +131 -0
- iam_validator/core/formatters/__init__.py +27 -0
- iam_validator/core/formatters/base.py +147 -0
- iam_validator/core/formatters/console.py +68 -0
- iam_validator/core/formatters/csv.py +171 -0
- iam_validator/core/formatters/enhanced.py +481 -0
- iam_validator/core/formatters/html.py +672 -0
- iam_validator/core/formatters/json.py +33 -0
- iam_validator/core/formatters/markdown.py +64 -0
- iam_validator/core/formatters/sarif.py +251 -0
- iam_validator/core/ignore_patterns.py +297 -0
- iam_validator/core/ignore_processor.py +309 -0
- iam_validator/core/ignored_findings.py +400 -0
- iam_validator/core/label_manager.py +197 -0
- iam_validator/core/models.py +404 -0
- iam_validator/core/policy_checks.py +220 -0
- iam_validator/core/policy_loader.py +785 -0
- iam_validator/core/pr_commenter.py +780 -0
- iam_validator/core/report.py +942 -0
- iam_validator/integrations/__init__.py +28 -0
- iam_validator/integrations/github_integration.py +1821 -0
- iam_validator/integrations/ms_teams.py +442 -0
- iam_validator/sdk/__init__.py +220 -0
- iam_validator/sdk/arn_matching.py +382 -0
- iam_validator/sdk/context.py +222 -0
- iam_validator/sdk/exceptions.py +48 -0
- iam_validator/sdk/helpers.py +177 -0
- iam_validator/sdk/policy_utils.py +451 -0
- iam_validator/sdk/query_utils.py +454 -0
- iam_validator/sdk/shortcuts.py +283 -0
- iam_validator/utils/__init__.py +35 -0
- iam_validator/utils/cache.py +105 -0
- iam_validator/utils/regex.py +205 -0
- iam_validator/utils/terminal.py +22 -0
|
@@ -0,0 +1,1821 @@
|
|
|
1
|
+
"""GitHub Integration Module.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to interact with GitHub,
|
|
4
|
+
including posting PR comments, line comments, labels, and retrieving PR information.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import base64
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
import time
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
|
|
18
|
+
from iam_validator.core import constants
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from iam_validator.core.codeowners import CodeOwnersParser
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GitHubRateLimitError(Exception):
|
|
27
|
+
"""Raised when GitHub API rate limit is exceeded."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, reset_time: int, message: str = "GitHub API rate limit exceeded"):
|
|
30
|
+
self.reset_time = reset_time
|
|
31
|
+
super().__init__(message)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class GitHubRetryableError(Exception):
|
|
35
|
+
"""Raised for transient GitHub API errors that should be retried."""
|
|
36
|
+
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Retry configuration
|
|
41
|
+
MAX_RETRIES = 3
|
|
42
|
+
INITIAL_BACKOFF_SECONDS = 1.0
|
|
43
|
+
MAX_BACKOFF_SECONDS = 30.0
|
|
44
|
+
BACKOFF_MULTIPLIER = 2.0
|
|
45
|
+
|
|
46
|
+
# HTTP status codes that should trigger retry
|
|
47
|
+
RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
|
|
48
|
+
|
|
49
|
+
# Concurrency limit for parallel API operations (deletions, updates)
|
|
50
|
+
# This prevents hitting GitHub's secondary rate limits while still being fast
|
|
51
|
+
MAX_CONCURRENT_API_CALLS = 10
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class PRState(str, Enum):
|
|
55
|
+
"""GitHub PR state."""
|
|
56
|
+
|
|
57
|
+
OPEN = "open"
|
|
58
|
+
CLOSED = "closed"
|
|
59
|
+
ALL = "all"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ReviewEvent(str, Enum):
|
|
63
|
+
"""GitHub PR review event types."""
|
|
64
|
+
|
|
65
|
+
APPROVE = "APPROVE"
|
|
66
|
+
REQUEST_CHANGES = "REQUEST_CHANGES"
|
|
67
|
+
COMMENT = "COMMENT"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class GitHubIntegration:
|
|
71
|
+
"""Handles comprehensive GitHub API interactions for PRs.
|
|
72
|
+
|
|
73
|
+
This class provides methods to:
|
|
74
|
+
- Post general PR comments
|
|
75
|
+
- Add line-specific review comments
|
|
76
|
+
- Manage PR labels
|
|
77
|
+
- Submit PR reviews
|
|
78
|
+
- Retrieve PR information and files
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
token: str | None = None,
|
|
84
|
+
repository: str | None = None,
|
|
85
|
+
pr_number: str | None = None,
|
|
86
|
+
):
|
|
87
|
+
"""Initialize GitHub integration.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
token: GitHub API token (defaults to GITHUB_TOKEN env var)
|
|
91
|
+
repository: Repository in format 'owner/repo' (defaults to GITHUB_REPOSITORY env var)
|
|
92
|
+
pr_number: PR number (defaults to GITHUB_PR_NUMBER env var)
|
|
93
|
+
"""
|
|
94
|
+
self.token = self._validate_token(token or os.environ.get("GITHUB_TOKEN"))
|
|
95
|
+
self.repository = self._validate_repository(
|
|
96
|
+
repository or os.environ.get("GITHUB_REPOSITORY")
|
|
97
|
+
)
|
|
98
|
+
self.pr_number = self._validate_pr_number(pr_number or os.environ.get("GITHUB_PR_NUMBER"))
|
|
99
|
+
self.api_url = self._validate_api_url(
|
|
100
|
+
os.environ.get("GITHUB_API_URL", "https://api.github.com")
|
|
101
|
+
)
|
|
102
|
+
self._client: httpx.AsyncClient | None = None
|
|
103
|
+
# Cache for team memberships: (org, team_slug) -> list[str]
|
|
104
|
+
# Reduces API calls when checking multiple users against same team
|
|
105
|
+
self._team_cache: dict[tuple[str, str], list[str]] = {}
|
|
106
|
+
# Cache for CODEOWNERS content (fetched once per instance)
|
|
107
|
+
self._codeowners_cache: str | None = None
|
|
108
|
+
self._codeowners_loaded: bool = False
|
|
109
|
+
|
|
110
|
+
def _validate_token(self, token: str | None) -> str | None:
|
|
111
|
+
"""Validate and sanitize GitHub token.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
token: GitHub token to validate
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Validated token or None
|
|
118
|
+
"""
|
|
119
|
+
if token is None:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
# Basic validation - ensure it's a string and not empty
|
|
123
|
+
if not isinstance(token, str) or not token.strip():
|
|
124
|
+
logger.warning("Invalid GitHub token provided (empty or non-string)")
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
# Sanitize - remove any whitespace
|
|
128
|
+
token = token.strip()
|
|
129
|
+
|
|
130
|
+
# Basic format check - GitHub tokens have specific patterns
|
|
131
|
+
# Personal access tokens: ghp_*, fine-grained: github_pat_*
|
|
132
|
+
# GitHub App tokens start with different prefixes
|
|
133
|
+
# Just ensure it's reasonable length and ASCII
|
|
134
|
+
if len(token) < 10 or len(token) > 500:
|
|
135
|
+
logger.warning(f"GitHub token has unusual length: {len(token)}")
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
# Ensure only ASCII characters (tokens should be ASCII)
|
|
139
|
+
if not token.isascii():
|
|
140
|
+
logger.warning("GitHub token contains non-ASCII characters")
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
return token
|
|
144
|
+
|
|
145
|
+
def _validate_repository(self, repository: str | None) -> str | None:
|
|
146
|
+
"""Validate repository format (owner/repo).
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
repository: Repository string to validate
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Validated repository or None
|
|
153
|
+
"""
|
|
154
|
+
if repository is None:
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
if not isinstance(repository, str) or not repository.strip():
|
|
158
|
+
logger.warning("Invalid repository provided (empty or non-string)")
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
repository = repository.strip()
|
|
162
|
+
|
|
163
|
+
# Must be in format owner/repo
|
|
164
|
+
if "/" not in repository:
|
|
165
|
+
logger.warning(f"Invalid repository format: {repository} (expected owner/repo)")
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
parts = repository.split("/")
|
|
169
|
+
if len(parts) != 2:
|
|
170
|
+
logger.warning(f"Invalid repository format: {repository} (expected exactly one slash)")
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
owner, repo = parts
|
|
174
|
+
if not owner or not repo:
|
|
175
|
+
logger.warning(f"Invalid repository format: {repository} (empty owner or repo)")
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
# Basic sanitization - alphanumeric, hyphens, underscores, dots
|
|
179
|
+
# GitHub allows these characters in usernames and repo names
|
|
180
|
+
valid_pattern = re.compile(r"^[a-zA-Z0-9._-]+$")
|
|
181
|
+
if not valid_pattern.match(owner) or not valid_pattern.match(repo):
|
|
182
|
+
logger.warning(
|
|
183
|
+
f"Invalid characters in repository: {repository} "
|
|
184
|
+
"(only alphanumeric, ., -, _ allowed)"
|
|
185
|
+
)
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
return repository
|
|
189
|
+
|
|
190
|
+
def _validate_pr_number(self, pr_number: str | None) -> str | None:
|
|
191
|
+
"""Validate PR number.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
pr_number: PR number to validate
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Validated PR number or None
|
|
198
|
+
"""
|
|
199
|
+
if pr_number is None:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
if not isinstance(pr_number, str) or not pr_number.strip():
|
|
203
|
+
logger.warning("Invalid PR number provided (empty or non-string)")
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
pr_number = pr_number.strip()
|
|
207
|
+
|
|
208
|
+
# Must be a positive integer
|
|
209
|
+
try:
|
|
210
|
+
pr_int = int(pr_number)
|
|
211
|
+
if pr_int <= 0:
|
|
212
|
+
logger.warning(f"Invalid PR number: {pr_number} (must be positive)")
|
|
213
|
+
return None
|
|
214
|
+
except ValueError:
|
|
215
|
+
logger.warning(f"Invalid PR number: {pr_number} (must be an integer)")
|
|
216
|
+
return None
|
|
217
|
+
|
|
218
|
+
return pr_number
|
|
219
|
+
|
|
220
|
+
def _validate_api_url(self, api_url: str) -> str:
|
|
221
|
+
"""Validate GitHub API URL.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
api_url: API URL to validate
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Validated API URL or default
|
|
228
|
+
"""
|
|
229
|
+
if not api_url or not isinstance(api_url, str):
|
|
230
|
+
logger.warning("Invalid API URL provided, using default")
|
|
231
|
+
return "https://api.github.com"
|
|
232
|
+
|
|
233
|
+
api_url = api_url.strip()
|
|
234
|
+
|
|
235
|
+
# Must be HTTPS (security requirement)
|
|
236
|
+
if not api_url.startswith("https://"):
|
|
237
|
+
logger.warning(
|
|
238
|
+
f"API URL must use HTTPS: {api_url}, using default https://api.github.com"
|
|
239
|
+
)
|
|
240
|
+
return "https://api.github.com"
|
|
241
|
+
|
|
242
|
+
# Basic URL validation
|
|
243
|
+
# Simple URL pattern check
|
|
244
|
+
url_pattern = re.compile(r"^https://[a-zA-Z0-9.-]+(?:/.*)?$")
|
|
245
|
+
if not url_pattern.match(api_url):
|
|
246
|
+
logger.warning(f"Invalid API URL format: {api_url}, using default")
|
|
247
|
+
return "https://api.github.com"
|
|
248
|
+
|
|
249
|
+
return api_url
|
|
250
|
+
|
|
251
|
+
async def __aenter__(self) -> "GitHubIntegration":
|
|
252
|
+
"""Async context manager entry."""
|
|
253
|
+
self._client = httpx.AsyncClient(
|
|
254
|
+
timeout=httpx.Timeout(30.0),
|
|
255
|
+
headers=self._get_headers(),
|
|
256
|
+
)
|
|
257
|
+
return self
|
|
258
|
+
|
|
259
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
260
|
+
"""Async context manager exit."""
|
|
261
|
+
del exc_type, exc_val, exc_tb # Unused
|
|
262
|
+
if self._client:
|
|
263
|
+
await self._client.aclose()
|
|
264
|
+
self._client = None
|
|
265
|
+
|
|
266
|
+
def _get_headers(self) -> dict[str, str]:
|
|
267
|
+
"""Get common request headers."""
|
|
268
|
+
return {
|
|
269
|
+
"Authorization": f"token {self.token}",
|
|
270
|
+
"Accept": "application/vnd.github.v3+json",
|
|
271
|
+
"Content-Type": "application/json",
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
def is_configured(self) -> bool:
|
|
275
|
+
"""Check if GitHub integration is properly configured.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
True if all required environment variables are set
|
|
279
|
+
"""
|
|
280
|
+
is_valid = all([self.token, self.repository, self.pr_number])
|
|
281
|
+
|
|
282
|
+
# Provide helpful debug info when not configured
|
|
283
|
+
if not is_valid:
|
|
284
|
+
missing = []
|
|
285
|
+
if not self.token:
|
|
286
|
+
missing.append("GITHUB_TOKEN")
|
|
287
|
+
if not self.repository:
|
|
288
|
+
missing.append("GITHUB_REPOSITORY")
|
|
289
|
+
if not self.pr_number:
|
|
290
|
+
missing.append("GITHUB_PR_NUMBER")
|
|
291
|
+
|
|
292
|
+
logger.debug(f"GitHub integration missing: {', '.join(missing)}")
|
|
293
|
+
if not self.pr_number and self.token and self.repository:
|
|
294
|
+
logger.info(
|
|
295
|
+
"GitHub PR integration requires GITHUB_PR_NUMBER. "
|
|
296
|
+
"This is only available when running on pull request events. "
|
|
297
|
+
"Current event may not have PR context."
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
return is_valid
|
|
301
|
+
|
|
302
|
+
async def _make_request(
|
|
303
|
+
self, method: str, endpoint: str, **kwargs: Any
|
|
304
|
+
) -> dict[str, Any] | None:
|
|
305
|
+
"""Make an HTTP request to GitHub API with retry and rate limit handling.
|
|
306
|
+
|
|
307
|
+
Implements exponential backoff for transient errors (5xx, 429) and
|
|
308
|
+
respects GitHub's rate limit headers.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
method: HTTP method (GET, POST, PATCH, DELETE)
|
|
312
|
+
endpoint: API endpoint path
|
|
313
|
+
**kwargs: Additional arguments to pass to httpx
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Response JSON or None on error
|
|
317
|
+
"""
|
|
318
|
+
if not self.is_configured():
|
|
319
|
+
logger.error("GitHub integration not configured")
|
|
320
|
+
return None
|
|
321
|
+
|
|
322
|
+
url = f"{self.api_url}/repos/{self.repository}/{endpoint}"
|
|
323
|
+
backoff = INITIAL_BACKOFF_SECONDS
|
|
324
|
+
last_error: Exception | None = None
|
|
325
|
+
|
|
326
|
+
for attempt in range(MAX_RETRIES + 1):
|
|
327
|
+
try:
|
|
328
|
+
if self._client:
|
|
329
|
+
response = await self._client.request(method, url, **kwargs)
|
|
330
|
+
else:
|
|
331
|
+
async with httpx.AsyncClient(headers=self._get_headers()) as client:
|
|
332
|
+
response = await client.request(method, url, **kwargs)
|
|
333
|
+
|
|
334
|
+
# Handle rate limiting (429)
|
|
335
|
+
if response.status_code == 429:
|
|
336
|
+
# Get reset time from headers
|
|
337
|
+
reset_time = response.headers.get("X-RateLimit-Reset")
|
|
338
|
+
retry_after = response.headers.get("Retry-After")
|
|
339
|
+
|
|
340
|
+
if retry_after:
|
|
341
|
+
wait_time = int(retry_after)
|
|
342
|
+
elif reset_time:
|
|
343
|
+
wait_time = max(0, int(reset_time) - int(time.time()))
|
|
344
|
+
else:
|
|
345
|
+
wait_time = min(backoff, MAX_BACKOFF_SECONDS)
|
|
346
|
+
|
|
347
|
+
if attempt < MAX_RETRIES:
|
|
348
|
+
logger.warning(
|
|
349
|
+
f"Rate limited on {method} {endpoint}, "
|
|
350
|
+
f"waiting {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES + 1})"
|
|
351
|
+
)
|
|
352
|
+
await asyncio.sleep(wait_time)
|
|
353
|
+
backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
|
|
354
|
+
continue
|
|
355
|
+
else:
|
|
356
|
+
raise GitHubRateLimitError(
|
|
357
|
+
int(reset_time or 0),
|
|
358
|
+
f"Rate limit exceeded after {MAX_RETRIES + 1} attempts",
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
# Handle retryable server errors (5xx)
|
|
362
|
+
if response.status_code in RETRYABLE_STATUS_CODES and attempt < MAX_RETRIES:
|
|
363
|
+
logger.warning(
|
|
364
|
+
f"Retryable error {response.status_code} on {method} {endpoint}, "
|
|
365
|
+
f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{MAX_RETRIES + 1})"
|
|
366
|
+
)
|
|
367
|
+
await asyncio.sleep(backoff)
|
|
368
|
+
backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
response.raise_for_status()
|
|
372
|
+
return response.json() if response.text else {}
|
|
373
|
+
|
|
374
|
+
except httpx.HTTPStatusError as e:
|
|
375
|
+
last_error = e
|
|
376
|
+
# Don't retry client errors (4xx) except rate limit
|
|
377
|
+
if 400 <= e.response.status_code < 500 and e.response.status_code != 429:
|
|
378
|
+
logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
|
|
379
|
+
return None
|
|
380
|
+
# For server errors, continue to retry logic
|
|
381
|
+
if attempt < MAX_RETRIES:
|
|
382
|
+
logger.warning(
|
|
383
|
+
f"HTTP error {e.response.status_code}, retrying in {backoff:.1f}s"
|
|
384
|
+
)
|
|
385
|
+
await asyncio.sleep(backoff)
|
|
386
|
+
backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
390
|
+
last_error = e
|
|
391
|
+
if attempt < MAX_RETRIES:
|
|
392
|
+
logger.warning(
|
|
393
|
+
f"Connection error on {method} {endpoint}: {e}, "
|
|
394
|
+
f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{MAX_RETRIES + 1})"
|
|
395
|
+
)
|
|
396
|
+
await asyncio.sleep(backoff)
|
|
397
|
+
backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
401
|
+
logger.error(f"Unexpected error on {method} {endpoint}: {e}")
|
|
402
|
+
return None
|
|
403
|
+
|
|
404
|
+
# All retries exhausted
|
|
405
|
+
if last_error:
|
|
406
|
+
logger.error(f"Request failed after {MAX_RETRIES + 1} attempts: {last_error}")
|
|
407
|
+
return None
|
|
408
|
+
|
|
409
|
+
async def _make_request_no_retry(
|
|
410
|
+
self, method: str, endpoint: str, **kwargs: Any
|
|
411
|
+
) -> dict[str, Any] | None:
|
|
412
|
+
"""Make an HTTP request without retry (for non-critical operations).
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
method: HTTP method (GET, POST, PATCH, DELETE)
|
|
416
|
+
endpoint: API endpoint path
|
|
417
|
+
**kwargs: Additional arguments to pass to httpx
|
|
418
|
+
|
|
419
|
+
Returns:
|
|
420
|
+
Response JSON or None on error
|
|
421
|
+
"""
|
|
422
|
+
if not self.is_configured():
|
|
423
|
+
logger.error("GitHub integration not configured")
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
url = f"{self.api_url}/repos/{self.repository}/{endpoint}"
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
if self._client:
|
|
430
|
+
response = await self._client.request(method, url, **kwargs)
|
|
431
|
+
else:
|
|
432
|
+
async with httpx.AsyncClient(headers=self._get_headers()) as client:
|
|
433
|
+
response = await client.request(method, url, **kwargs)
|
|
434
|
+
|
|
435
|
+
response.raise_for_status()
|
|
436
|
+
return response.json() if response.text else {}
|
|
437
|
+
|
|
438
|
+
except httpx.HTTPStatusError as e:
|
|
439
|
+
logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
|
|
440
|
+
return None
|
|
441
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
442
|
+
logger.error(f"Request failed: {e}")
|
|
443
|
+
return None
|
|
444
|
+
|
|
445
|
+
async def _make_paginated_request(
|
|
446
|
+
self, endpoint: str, max_pages: int = 100
|
|
447
|
+
) -> list[dict[str, Any]]:
|
|
448
|
+
"""Make a paginated GET request to GitHub API, fetching all pages.
|
|
449
|
+
|
|
450
|
+
GitHub API returns at most 100 items per page for list endpoints.
|
|
451
|
+
This method follows pagination links to fetch ALL items.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
endpoint: API endpoint path (e.g., "pulls/123/comments")
|
|
455
|
+
max_pages: Maximum number of pages to fetch (safety limit)
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
Combined list of all items across all pages
|
|
459
|
+
"""
|
|
460
|
+
if not self.is_configured():
|
|
461
|
+
logger.error("GitHub integration not configured")
|
|
462
|
+
return []
|
|
463
|
+
|
|
464
|
+
all_items: list[dict[str, Any]] = []
|
|
465
|
+
url: str | None = f"{self.api_url}/repos/{self.repository}/{endpoint}"
|
|
466
|
+
page_count = 0
|
|
467
|
+
|
|
468
|
+
# Add per_page=100 to maximize items per request
|
|
469
|
+
if "?" in endpoint:
|
|
470
|
+
url = f"{url}&per_page=100"
|
|
471
|
+
else:
|
|
472
|
+
url = f"{url}?per_page=100"
|
|
473
|
+
|
|
474
|
+
while url and page_count < max_pages:
|
|
475
|
+
page_count += 1
|
|
476
|
+
try:
|
|
477
|
+
if self._client:
|
|
478
|
+
response = await self._client.request("GET", url)
|
|
479
|
+
else:
|
|
480
|
+
async with httpx.AsyncClient(
|
|
481
|
+
timeout=httpx.Timeout(30.0), headers=self._get_headers()
|
|
482
|
+
) as client:
|
|
483
|
+
response = await client.request("GET", url)
|
|
484
|
+
|
|
485
|
+
response.raise_for_status()
|
|
486
|
+
items = response.json()
|
|
487
|
+
|
|
488
|
+
if isinstance(items, list):
|
|
489
|
+
all_items.extend(items)
|
|
490
|
+
logger.debug(
|
|
491
|
+
f"Fetched page {page_count} with {len(items)} items "
|
|
492
|
+
f"(total: {len(all_items)})"
|
|
493
|
+
)
|
|
494
|
+
else:
|
|
495
|
+
# Not a list response, shouldn't happen for list endpoints
|
|
496
|
+
logger.warning(f"Unexpected response type on page {page_count}")
|
|
497
|
+
break
|
|
498
|
+
|
|
499
|
+
# Check for next page in Link header
|
|
500
|
+
# Format: <url>; rel="next", <url>; rel="last"
|
|
501
|
+
link_header = response.headers.get("Link", "")
|
|
502
|
+
url = None # Reset for next iteration
|
|
503
|
+
|
|
504
|
+
if link_header:
|
|
505
|
+
for link in link_header.split(","):
|
|
506
|
+
if 'rel="next"' in link:
|
|
507
|
+
# Extract URL from <url>
|
|
508
|
+
match = re.search(r"<([^>]+)>", link)
|
|
509
|
+
if match:
|
|
510
|
+
url = match.group(1)
|
|
511
|
+
break
|
|
512
|
+
|
|
513
|
+
except httpx.HTTPStatusError as e:
|
|
514
|
+
logger.error(f"HTTP error during pagination: {e.response.status_code}")
|
|
515
|
+
break
|
|
516
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
517
|
+
logger.error(f"Error during pagination: {e}")
|
|
518
|
+
break
|
|
519
|
+
|
|
520
|
+
if page_count >= max_pages:
|
|
521
|
+
logger.warning(f"Reached max pages limit ({max_pages}), results may be incomplete")
|
|
522
|
+
|
|
523
|
+
logger.debug(
|
|
524
|
+
f"Paginated request complete: {len(all_items)} total items from {page_count} page(s)"
|
|
525
|
+
)
|
|
526
|
+
return all_items
|
|
527
|
+
|
|
528
|
+
# ==================== PR Comments ====================
|
|
529
|
+
|
|
530
|
+
async def post_comment(self, comment_body: str) -> bool:
|
|
531
|
+
"""Post a general comment to a PR.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
comment_body: The markdown content to post
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
True if successful, False otherwise
|
|
538
|
+
"""
|
|
539
|
+
result = await self._make_request(
|
|
540
|
+
"POST",
|
|
541
|
+
f"issues/{self.pr_number}/comments",
|
|
542
|
+
json={"body": comment_body},
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
if result:
|
|
546
|
+
logger.info(f"Successfully posted comment to PR #{self.pr_number}")
|
|
547
|
+
return True
|
|
548
|
+
return False
|
|
549
|
+
|
|
550
|
+
async def update_or_create_comment(
|
|
551
|
+
self, comment_body: str, identifier: str = "<!-- iam-policy-validator -->"
|
|
552
|
+
) -> bool:
|
|
553
|
+
"""Update an existing comment or create a new one.
|
|
554
|
+
|
|
555
|
+
This method will look for an existing comment with the identifier
|
|
556
|
+
and update it, or create a new comment if none exists.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
comment_body: The markdown content to post
|
|
560
|
+
identifier: HTML comment identifier to find existing comments
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
True if successful, False otherwise
|
|
564
|
+
"""
|
|
565
|
+
# Add identifier to comment body
|
|
566
|
+
full_body = f"{identifier}\n{comment_body}"
|
|
567
|
+
|
|
568
|
+
# Try to find and update existing comment
|
|
569
|
+
existing_comment_id = await self._find_existing_comment(identifier)
|
|
570
|
+
|
|
571
|
+
if existing_comment_id:
|
|
572
|
+
return await self._update_comment(existing_comment_id, full_body)
|
|
573
|
+
else:
|
|
574
|
+
return await self.post_comment(full_body)
|
|
575
|
+
|
|
576
|
+
async def post_multipart_comments(
|
|
577
|
+
self,
|
|
578
|
+
comment_parts: list[str],
|
|
579
|
+
identifier: str = "<!-- iam-policy-validator -->",
|
|
580
|
+
) -> bool:
|
|
581
|
+
"""Post or update multiple related comments (for large reports).
|
|
582
|
+
|
|
583
|
+
For single-part comments (most common case), this will UPDATE the
|
|
584
|
+
existing comment in place rather than delete and recreate it.
|
|
585
|
+
This preserves comment history and avoids PR timeline noise.
|
|
586
|
+
|
|
587
|
+
For multi-part comments:
|
|
588
|
+
1. Delete all old comments with the identifier
|
|
589
|
+
2. Post new comments in sequence with part indicators
|
|
590
|
+
3. Validate each part stays under GitHub's limit
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
comment_parts: List of comment bodies to post (split into parts)
|
|
594
|
+
identifier: HTML comment identifier to find/manage existing comments
|
|
595
|
+
|
|
596
|
+
Returns:
|
|
597
|
+
True if all parts posted successfully, False otherwise
|
|
598
|
+
"""
|
|
599
|
+
# GitHub's actual limit
|
|
600
|
+
github_comment_limit = 65536
|
|
601
|
+
|
|
602
|
+
total_parts = len(comment_parts)
|
|
603
|
+
|
|
604
|
+
# Optimization: For single-part comments, use update-or-create
|
|
605
|
+
# This preserves the existing comment and avoids PR timeline noise
|
|
606
|
+
if total_parts == 1:
|
|
607
|
+
part_body = comment_parts[0]
|
|
608
|
+
full_body = f"{identifier}\n\n{part_body}"
|
|
609
|
+
|
|
610
|
+
# Safety check: ensure we don't exceed GitHub's limit
|
|
611
|
+
if len(full_body) > github_comment_limit:
|
|
612
|
+
logger.error(
|
|
613
|
+
f"Comment exceeds GitHub's limit ({len(full_body)} > {github_comment_limit} chars). "
|
|
614
|
+
f"Comment will be truncated."
|
|
615
|
+
)
|
|
616
|
+
available_space = github_comment_limit - 500
|
|
617
|
+
truncated_body = part_body[:available_space]
|
|
618
|
+
truncation_warning = (
|
|
619
|
+
"\n\n---\n\n"
|
|
620
|
+
"> ⚠️ **This comment was truncated to fit GitHub's size limit**\n"
|
|
621
|
+
">\n"
|
|
622
|
+
"> Download the full report using `--output report.json` or "
|
|
623
|
+
"`--format markdown --output report.md`\n"
|
|
624
|
+
)
|
|
625
|
+
full_body = f"{identifier}\n\n{truncated_body}{truncation_warning}"
|
|
626
|
+
|
|
627
|
+
success = await self.update_or_create_comment(full_body, identifier)
|
|
628
|
+
if success:
|
|
629
|
+
logger.info("Successfully updated summary comment")
|
|
630
|
+
return success
|
|
631
|
+
|
|
632
|
+
# Multi-part: Delete all existing comments with this identifier first
|
|
633
|
+
await self._delete_comments_with_identifier(identifier)
|
|
634
|
+
|
|
635
|
+
# Post each part
|
|
636
|
+
success = True
|
|
637
|
+
|
|
638
|
+
for part_num, part_body in enumerate(comment_parts, 1):
|
|
639
|
+
# Add identifier and part indicator
|
|
640
|
+
part_indicator = f"**(Part {part_num}/{total_parts})**"
|
|
641
|
+
full_body = f"{identifier}\n{part_indicator}\n\n{part_body}"
|
|
642
|
+
|
|
643
|
+
# Safety check: ensure we don't exceed GitHub's limit
|
|
644
|
+
if len(full_body) > github_comment_limit:
|
|
645
|
+
logger.error(
|
|
646
|
+
f"Part {part_num}/{total_parts} exceeds GitHub's comment limit "
|
|
647
|
+
f"({len(full_body)} > {github_comment_limit} chars). "
|
|
648
|
+
f"This part will be truncated."
|
|
649
|
+
)
|
|
650
|
+
# Truncate with warning message
|
|
651
|
+
available_space = github_comment_limit - 500 # Reserve space for truncation message
|
|
652
|
+
truncated_body = part_body[:available_space]
|
|
653
|
+
truncation_warning = (
|
|
654
|
+
"\n\n---\n\n"
|
|
655
|
+
"> ⚠️ **This comment was truncated to fit GitHub's size limit**\n"
|
|
656
|
+
">\n"
|
|
657
|
+
"> Download the full report using `--output report.json` or "
|
|
658
|
+
"`--format markdown --output report.md`\n"
|
|
659
|
+
)
|
|
660
|
+
full_body = (
|
|
661
|
+
f"{identifier}\n{part_indicator}\n\n{truncated_body}{truncation_warning}"
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
if not await self.post_comment(full_body):
|
|
665
|
+
logger.error(f"Failed to post comment part {part_num}/{total_parts}")
|
|
666
|
+
success = False
|
|
667
|
+
else:
|
|
668
|
+
logger.debug(
|
|
669
|
+
f"Posted part {part_num}/{total_parts} ({len(full_body):,} characters)"
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
if success:
|
|
673
|
+
logger.info(f"Successfully posted {total_parts} comment part(s)")
|
|
674
|
+
|
|
675
|
+
return success
|
|
676
|
+
|
|
677
|
+
async def _delete_comments_with_identifier(self, identifier: str) -> int:
|
|
678
|
+
"""Delete all comments with the given identifier.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
identifier: HTML comment identifier to find comments
|
|
682
|
+
|
|
683
|
+
Returns:
|
|
684
|
+
Number of comments deleted
|
|
685
|
+
"""
|
|
686
|
+
result = await self._make_request("GET", f"issues/{self.pr_number}/comments")
|
|
687
|
+
|
|
688
|
+
deleted_count = 0
|
|
689
|
+
if result and isinstance(result, list):
|
|
690
|
+
for comment in result:
|
|
691
|
+
if not isinstance(comment, dict):
|
|
692
|
+
continue
|
|
693
|
+
|
|
694
|
+
body = comment.get("body", "")
|
|
695
|
+
comment_id = comment.get("id")
|
|
696
|
+
|
|
697
|
+
if identifier in str(body) and isinstance(comment_id, int):
|
|
698
|
+
delete_result = await self._make_request(
|
|
699
|
+
"DELETE", f"issues/comments/{comment_id}"
|
|
700
|
+
)
|
|
701
|
+
if delete_result is not None:
|
|
702
|
+
deleted_count += 1
|
|
703
|
+
|
|
704
|
+
if deleted_count > 0:
|
|
705
|
+
logger.info(f"Deleted {deleted_count} old comments")
|
|
706
|
+
|
|
707
|
+
return deleted_count
|
|
708
|
+
|
|
709
|
+
async def _find_existing_comment(self, identifier: str) -> int | None:
|
|
710
|
+
"""Find an existing comment with the given identifier."""
|
|
711
|
+
result = await self._make_request("GET", f"issues/{self.pr_number}/comments")
|
|
712
|
+
|
|
713
|
+
if result and isinstance(result, list):
|
|
714
|
+
for comment in result:
|
|
715
|
+
if isinstance(comment, dict) and identifier in str(comment.get("body", "")):
|
|
716
|
+
comment_id = comment.get("id")
|
|
717
|
+
if isinstance(comment_id, int):
|
|
718
|
+
return comment_id
|
|
719
|
+
|
|
720
|
+
return None
|
|
721
|
+
|
|
722
|
+
async def _update_comment(self, comment_id: int, comment_body: str) -> bool:
|
|
723
|
+
"""Update an existing GitHub comment."""
|
|
724
|
+
result = await self._make_request(
|
|
725
|
+
"PATCH",
|
|
726
|
+
f"issues/comments/{comment_id}",
|
|
727
|
+
json={"body": comment_body},
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
if result:
|
|
731
|
+
logger.info(f"Successfully updated comment {comment_id}")
|
|
732
|
+
return True
|
|
733
|
+
return False
|
|
734
|
+
|
|
735
|
+
# ==================== PR Review Comments (Line-specific) ====================
|
|
736
|
+
|
|
737
|
+
async def get_review_comments(self) -> list[dict[str, Any]]:
|
|
738
|
+
"""Get all review comments on the PR with pagination.
|
|
739
|
+
|
|
740
|
+
Fetches ALL review comments across all pages. This is critical for
|
|
741
|
+
proper comment deduplication and cleanup when there are many findings.
|
|
742
|
+
|
|
743
|
+
Returns:
|
|
744
|
+
List of all review comment dicts
|
|
745
|
+
"""
|
|
746
|
+
return await self._make_paginated_request(f"pulls/{self.pr_number}/comments")
|
|
747
|
+
|
|
748
|
+
async def get_bot_review_comments_with_location(
|
|
749
|
+
self, identifier: str = constants.BOT_IDENTIFIER
|
|
750
|
+
) -> dict[tuple[str, int, str], dict[str, Any]]:
|
|
751
|
+
"""Get bot review comments indexed by file path, line number, and issue type.
|
|
752
|
+
|
|
753
|
+
This enables efficient lookup to update existing comments.
|
|
754
|
+
Uses (path, line, issue_type) as key to support multiple issues at the same line.
|
|
755
|
+
|
|
756
|
+
Args:
|
|
757
|
+
identifier: String to identify bot comments
|
|
758
|
+
|
|
759
|
+
Returns:
|
|
760
|
+
Dict mapping (file_path, line_number, issue_type) to comment metadata dict
|
|
761
|
+
Comment dict contains: id, body, path, line, issue_type, commit_id
|
|
762
|
+
"""
|
|
763
|
+
comments = await self.get_review_comments()
|
|
764
|
+
bot_comments_map: dict[tuple[str, int, str], dict[str, Any]] = {}
|
|
765
|
+
|
|
766
|
+
for comment in comments:
|
|
767
|
+
if not isinstance(comment, dict):
|
|
768
|
+
continue
|
|
769
|
+
|
|
770
|
+
body = comment.get("body", "")
|
|
771
|
+
comment_id = comment.get("id")
|
|
772
|
+
path = comment.get("path")
|
|
773
|
+
line = comment.get("line") or comment.get("original_line")
|
|
774
|
+
|
|
775
|
+
# Check if this is a bot comment with valid location
|
|
776
|
+
if (
|
|
777
|
+
identifier in str(body)
|
|
778
|
+
and isinstance(comment_id, int)
|
|
779
|
+
and isinstance(path, str)
|
|
780
|
+
and isinstance(line, int)
|
|
781
|
+
):
|
|
782
|
+
# Extract issue type from HTML comment
|
|
783
|
+
issue_type_match = re.search(r"<!-- issue-type: (\w+) -->", body)
|
|
784
|
+
issue_type = issue_type_match.group(1) if issue_type_match else "unknown"
|
|
785
|
+
|
|
786
|
+
key = (path, line, issue_type)
|
|
787
|
+
bot_comments_map[key] = {
|
|
788
|
+
"id": comment_id,
|
|
789
|
+
"body": body,
|
|
790
|
+
"path": path,
|
|
791
|
+
"line": line,
|
|
792
|
+
"issue_type": issue_type,
|
|
793
|
+
"commit_id": comment.get("commit_id"),
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
logger.debug(f"Found {len(bot_comments_map)} bot review comments at specific locations")
|
|
797
|
+
return bot_comments_map
|
|
798
|
+
|
|
799
|
+
async def delete_review_comment(self, comment_id: int) -> bool:
|
|
800
|
+
"""Delete a specific review comment.
|
|
801
|
+
|
|
802
|
+
Args:
|
|
803
|
+
comment_id: ID of the comment to delete
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
True if successful, False otherwise
|
|
807
|
+
"""
|
|
808
|
+
result = await self._make_request(
|
|
809
|
+
"DELETE",
|
|
810
|
+
f"pulls/comments/{comment_id}",
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
if result is not None: # DELETE returns empty dict on success
|
|
814
|
+
logger.debug(f"Successfully deleted review comment {comment_id}")
|
|
815
|
+
return True
|
|
816
|
+
return False
|
|
817
|
+
|
|
818
|
+
async def _delete_comments_parallel(
|
|
819
|
+
self, comment_ids: list[int], max_concurrent: int = MAX_CONCURRENT_API_CALLS
|
|
820
|
+
) -> tuple[int, int]:
|
|
821
|
+
"""Delete multiple review comments in parallel with controlled concurrency.
|
|
822
|
+
|
|
823
|
+
Uses a semaphore to limit concurrent API calls, preventing rate limit issues
|
|
824
|
+
while still being much faster than sequential deletion.
|
|
825
|
+
|
|
826
|
+
Args:
|
|
827
|
+
comment_ids: List of comment IDs to delete
|
|
828
|
+
max_concurrent: Maximum number of concurrent deletions (default: 10)
|
|
829
|
+
|
|
830
|
+
Returns:
|
|
831
|
+
Tuple of (successful_count, failed_count)
|
|
832
|
+
"""
|
|
833
|
+
if not comment_ids:
|
|
834
|
+
return (0, 0)
|
|
835
|
+
|
|
836
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
837
|
+
|
|
838
|
+
async def delete_with_limit(comment_id: int) -> bool:
|
|
839
|
+
async with semaphore:
|
|
840
|
+
return await self.delete_review_comment(comment_id)
|
|
841
|
+
|
|
842
|
+
# Run all deletions in parallel (semaphore controls actual concurrency)
|
|
843
|
+
results = await asyncio.gather(
|
|
844
|
+
*[delete_with_limit(cid) for cid in comment_ids],
|
|
845
|
+
return_exceptions=True,
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
successful = sum(1 for r in results if r is True)
|
|
849
|
+
failed = len(results) - successful
|
|
850
|
+
|
|
851
|
+
if successful > 0:
|
|
852
|
+
logger.info(f"Parallel deletion: {successful} deleted, {failed} failed")
|
|
853
|
+
|
|
854
|
+
return (successful, failed)
|
|
855
|
+
|
|
856
|
+
# NOTE: resolve_review_comment was removed because GitHub REST API doesn't support
|
|
857
|
+
# resolving review comments via {"state": "resolved"}. Resolving review threads
|
|
858
|
+
# requires the GraphQL API with resolveReviewThread mutation.
|
|
859
|
+
# See: https://docs.github.com/en/graphql/reference/mutations#resolvereviewthread
|
|
860
|
+
|
|
861
|
+
async def update_review_comment(self, comment_id: int, new_body: str) -> bool:
|
|
862
|
+
"""Update the body text of an existing review comment.
|
|
863
|
+
|
|
864
|
+
Args:
|
|
865
|
+
comment_id: ID of the comment to update
|
|
866
|
+
new_body: New comment text (markdown supported)
|
|
867
|
+
|
|
868
|
+
Returns:
|
|
869
|
+
True if successful, False otherwise
|
|
870
|
+
"""
|
|
871
|
+
result = await self._make_request(
|
|
872
|
+
"PATCH",
|
|
873
|
+
f"pulls/comments/{comment_id}",
|
|
874
|
+
json={"body": new_body},
|
|
875
|
+
)
|
|
876
|
+
|
|
877
|
+
if result is not None:
|
|
878
|
+
logger.debug(f"Successfully updated review comment {comment_id}")
|
|
879
|
+
return True
|
|
880
|
+
return False
|
|
881
|
+
|
|
882
|
+
async def cleanup_bot_review_comments(self, identifier: str = constants.BOT_IDENTIFIER) -> int:
|
|
883
|
+
"""Delete all review comments from the bot (from previous runs).
|
|
884
|
+
|
|
885
|
+
This ensures old/outdated comments are removed before posting new ones.
|
|
886
|
+
Uses parallel deletion for speed when there are many comments.
|
|
887
|
+
|
|
888
|
+
Args:
|
|
889
|
+
identifier: String to identify bot comments
|
|
890
|
+
|
|
891
|
+
Returns:
|
|
892
|
+
Number of comments deleted
|
|
893
|
+
"""
|
|
894
|
+
comments = await self.get_review_comments()
|
|
895
|
+
|
|
896
|
+
# Collect all bot comment IDs to delete
|
|
897
|
+
comment_ids_to_delete: list[int] = []
|
|
898
|
+
for comment in comments:
|
|
899
|
+
if not isinstance(comment, dict):
|
|
900
|
+
continue
|
|
901
|
+
|
|
902
|
+
body = comment.get("body", "")
|
|
903
|
+
comment_id = comment.get("id")
|
|
904
|
+
|
|
905
|
+
# Check if this is a bot comment
|
|
906
|
+
if identifier in str(body) and isinstance(comment_id, int):
|
|
907
|
+
comment_ids_to_delete.append(comment_id)
|
|
908
|
+
|
|
909
|
+
if not comment_ids_to_delete:
|
|
910
|
+
return 0
|
|
911
|
+
|
|
912
|
+
# Delete all bot comments in parallel
|
|
913
|
+
successful, _failed = await self._delete_comments_parallel(comment_ids_to_delete)
|
|
914
|
+
|
|
915
|
+
if successful > 0:
|
|
916
|
+
logger.info(f"Cleaned up {successful} old review comments")
|
|
917
|
+
|
|
918
|
+
return successful
|
|
919
|
+
|
|
920
|
+
# NOTE: cleanup_bot_review_comments_by_resolving was removed because it depended on
|
|
921
|
+
# resolve_review_comment which doesn't work with GitHub REST API.
|
|
922
|
+
# Use cleanup_bot_review_comments (deletion) instead, or implement GraphQL-based
|
|
923
|
+
# resolution if audit trail preservation is needed.
|
|
924
|
+
|
|
925
|
+
async def create_review_comment(
|
|
926
|
+
self,
|
|
927
|
+
commit_id: str,
|
|
928
|
+
file_path: str,
|
|
929
|
+
line: int,
|
|
930
|
+
body: str,
|
|
931
|
+
side: str = "RIGHT",
|
|
932
|
+
) -> bool:
|
|
933
|
+
"""Create a line-specific review comment on a file in the PR.
|
|
934
|
+
|
|
935
|
+
Args:
|
|
936
|
+
commit_id: The SHA of the commit to comment on
|
|
937
|
+
file_path: The relative path to the file in the repo
|
|
938
|
+
line: The line number in the file to comment on
|
|
939
|
+
body: The comment text (markdown supported)
|
|
940
|
+
side: Which side of the diff ("LEFT" for deletion, "RIGHT" for addition)
|
|
941
|
+
|
|
942
|
+
Returns:
|
|
943
|
+
True if successful, False otherwise
|
|
944
|
+
"""
|
|
945
|
+
result = await self._make_request(
|
|
946
|
+
"POST",
|
|
947
|
+
f"pulls/{self.pr_number}/comments",
|
|
948
|
+
json={
|
|
949
|
+
"commit_id": commit_id,
|
|
950
|
+
"path": file_path,
|
|
951
|
+
"line": line,
|
|
952
|
+
"side": side,
|
|
953
|
+
"body": body,
|
|
954
|
+
},
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
if result:
|
|
958
|
+
logger.info(f"Successfully posted review comment on {file_path}:{line}")
|
|
959
|
+
return True
|
|
960
|
+
return False
|
|
961
|
+
|
|
962
|
+
async def create_review_with_comments(
|
|
963
|
+
self,
|
|
964
|
+
comments: list[dict[str, Any]],
|
|
965
|
+
body: str = "",
|
|
966
|
+
event: ReviewEvent = ReviewEvent.COMMENT,
|
|
967
|
+
) -> bool:
|
|
968
|
+
"""Create a review with multiple line-specific comments.
|
|
969
|
+
|
|
970
|
+
Args:
|
|
971
|
+
comments: List of comment dicts with keys: path, line, body, (optional) side
|
|
972
|
+
body: The overall review body text
|
|
973
|
+
event: The review event type (APPROVE, REQUEST_CHANGES, COMMENT)
|
|
974
|
+
|
|
975
|
+
Returns:
|
|
976
|
+
True if successful, False otherwise
|
|
977
|
+
|
|
978
|
+
Example:
|
|
979
|
+
comments = [
|
|
980
|
+
{
|
|
981
|
+
"path": "policies/policy.json",
|
|
982
|
+
"line": 5,
|
|
983
|
+
"body": "Invalid action detected here",
|
|
984
|
+
},
|
|
985
|
+
{
|
|
986
|
+
"path": "policies/policy.json",
|
|
987
|
+
"line": 12,
|
|
988
|
+
"body": "Missing condition key",
|
|
989
|
+
},
|
|
990
|
+
]
|
|
991
|
+
"""
|
|
992
|
+
# Get the latest commit SHA
|
|
993
|
+
pr_info = await self.get_pr_info()
|
|
994
|
+
if not pr_info:
|
|
995
|
+
return False
|
|
996
|
+
|
|
997
|
+
head_info = pr_info.get("head")
|
|
998
|
+
if not isinstance(head_info, dict):
|
|
999
|
+
logger.error("Invalid PR head information")
|
|
1000
|
+
return False
|
|
1001
|
+
|
|
1002
|
+
commit_id = head_info.get("sha")
|
|
1003
|
+
if not isinstance(commit_id, str):
|
|
1004
|
+
logger.error("Could not get commit SHA from PR")
|
|
1005
|
+
return False
|
|
1006
|
+
|
|
1007
|
+
# Format comments for the review API
|
|
1008
|
+
formatted_comments: list[dict[str, Any]] = []
|
|
1009
|
+
for comment in comments:
|
|
1010
|
+
formatted_comments.append(
|
|
1011
|
+
{
|
|
1012
|
+
"path": comment["path"],
|
|
1013
|
+
"line": comment["line"],
|
|
1014
|
+
"body": comment["body"],
|
|
1015
|
+
"side": comment.get("side", "RIGHT"),
|
|
1016
|
+
}
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
result = await self._make_request(
|
|
1020
|
+
"POST",
|
|
1021
|
+
f"pulls/{self.pr_number}/reviews",
|
|
1022
|
+
json={
|
|
1023
|
+
"commit_id": commit_id,
|
|
1024
|
+
"body": body,
|
|
1025
|
+
"event": event.value,
|
|
1026
|
+
"comments": formatted_comments,
|
|
1027
|
+
},
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
if result:
|
|
1031
|
+
logger.info(f"Successfully created review with {len(comments)} comments")
|
|
1032
|
+
return True
|
|
1033
|
+
return False
|
|
1034
|
+
|
|
1035
|
+
async def update_or_create_review_comments(
|
|
1036
|
+
self,
|
|
1037
|
+
comments: list[dict[str, Any]],
|
|
1038
|
+
body: str = "",
|
|
1039
|
+
event: ReviewEvent = ReviewEvent.COMMENT,
|
|
1040
|
+
identifier: str = constants.REVIEW_IDENTIFIER,
|
|
1041
|
+
validated_files: set[str] | None = None,
|
|
1042
|
+
skip_cleanup: bool = False,
|
|
1043
|
+
) -> bool:
|
|
1044
|
+
"""Smart comment management using fingerprint-based matching.
|
|
1045
|
+
|
|
1046
|
+
This method uses finding fingerprints (stable IDs) as the PRIMARY key
|
|
1047
|
+
for matching comments, with location as SECONDARY for new comments.
|
|
1048
|
+
|
|
1049
|
+
Strategy:
|
|
1050
|
+
1. Index existing comments by finding_id (from HTML comment)
|
|
1051
|
+
2. For each new comment:
|
|
1052
|
+
- If finding_id exists: UPDATE (even if line changed)
|
|
1053
|
+
- If new: CREATE at specified line
|
|
1054
|
+
3. Delete comments whose finding_id is not in new set (resolved)
|
|
1055
|
+
(unless skip_cleanup=True)
|
|
1056
|
+
|
|
1057
|
+
Note: Comments stay at their original line even if the issue moved,
|
|
1058
|
+
because GitHub doesn't support moving review comments. The comment
|
|
1059
|
+
body is updated to reflect any changes.
|
|
1060
|
+
|
|
1061
|
+
Args:
|
|
1062
|
+
comments: List of comment dicts with keys: path, line, body, (optional) side
|
|
1063
|
+
body: The overall review body text
|
|
1064
|
+
event: The review event type (APPROVE, REQUEST_CHANGES, COMMENT)
|
|
1065
|
+
identifier: String to identify bot comments (for matching existing)
|
|
1066
|
+
validated_files: Set of all file paths that were validated in this run.
|
|
1067
|
+
Used to clean up comments for files that no longer have findings.
|
|
1068
|
+
If None, only files with current findings are considered.
|
|
1069
|
+
skip_cleanup: If True, skip the cleanup phase (deleting resolved comments).
|
|
1070
|
+
Use this in streaming mode where files are processed one at a time
|
|
1071
|
+
to avoid deleting comments from files processed earlier.
|
|
1072
|
+
|
|
1073
|
+
Returns:
|
|
1074
|
+
True if successful, False otherwise
|
|
1075
|
+
|
|
1076
|
+
Example:
|
|
1077
|
+
# First run: Creates 3 comments
|
|
1078
|
+
comments = [
|
|
1079
|
+
{"path": "policy.json", "line": 5, "body": "<!-- finding-id: abc123 -->Issue A"},
|
|
1080
|
+
{"path": "policy.json", "line": 10, "body": "<!-- finding-id: def456 -->Issue B"},
|
|
1081
|
+
]
|
|
1082
|
+
|
|
1083
|
+
# Second run: Same findings, even if lines shifted
|
|
1084
|
+
comments = [
|
|
1085
|
+
{"path": "policy.json", "line": 8, "body": "<!-- finding-id: abc123 -->Issue A (updated)"},
|
|
1086
|
+
{"path": "policy.json", "line": 15, "body": "<!-- finding-id: def456 -->Issue B"},
|
|
1087
|
+
]
|
|
1088
|
+
# Result: Both comments UPDATED in place (not recreated), preserving conversation history
|
|
1089
|
+
"""
|
|
1090
|
+
# Step 1: Get existing bot comments indexed by fingerprint
|
|
1091
|
+
existing_by_fingerprint = await self._get_bot_comments_by_fingerprint(identifier)
|
|
1092
|
+
logger.debug(
|
|
1093
|
+
f"Found {len(existing_by_fingerprint)} existing bot comments with fingerprints"
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
# Also get location-based index for fallback (comments without fingerprints)
|
|
1097
|
+
existing_by_location = await self.get_bot_review_comments_with_location(identifier)
|
|
1098
|
+
|
|
1099
|
+
seen_fingerprints: set[str] = set()
|
|
1100
|
+
seen_locations: set[tuple[str, int, str]] = set()
|
|
1101
|
+
# Track comment IDs that were updated/matched - these should NOT be deleted
|
|
1102
|
+
matched_comment_ids: set[int] = set()
|
|
1103
|
+
updated_count = 0
|
|
1104
|
+
new_comments_for_review: list[dict[str, Any]] = []
|
|
1105
|
+
|
|
1106
|
+
for comment in comments:
|
|
1107
|
+
path = comment["path"]
|
|
1108
|
+
line = comment["line"]
|
|
1109
|
+
new_body = comment["body"]
|
|
1110
|
+
|
|
1111
|
+
# Try fingerprint-based matching first
|
|
1112
|
+
finding_id = self._extract_finding_id(new_body)
|
|
1113
|
+
|
|
1114
|
+
if finding_id:
|
|
1115
|
+
seen_fingerprints.add(finding_id)
|
|
1116
|
+
|
|
1117
|
+
if finding_id in existing_by_fingerprint:
|
|
1118
|
+
existing = existing_by_fingerprint[finding_id]
|
|
1119
|
+
matched_comment_ids.add(existing["id"])
|
|
1120
|
+
# Check if update needed (body changed)
|
|
1121
|
+
if existing["body"] != new_body:
|
|
1122
|
+
success = await self.update_review_comment(existing["id"], new_body)
|
|
1123
|
+
if success:
|
|
1124
|
+
updated_count += 1
|
|
1125
|
+
logger.debug(
|
|
1126
|
+
f"Updated comment for finding {finding_id[:8]}... "
|
|
1127
|
+
f"(was at {existing['path']}:{existing['line']})"
|
|
1128
|
+
)
|
|
1129
|
+
else:
|
|
1130
|
+
logger.debug(f"Comment for finding {finding_id[:8]}... unchanged")
|
|
1131
|
+
continue
|
|
1132
|
+
|
|
1133
|
+
# Fallback: location-based matching
|
|
1134
|
+
# This handles both:
|
|
1135
|
+
# 1. Legacy comments without fingerprints
|
|
1136
|
+
# 2. Comments with fingerprints that don't match (e.g., path changed)
|
|
1137
|
+
issue_type_match = re.search(r"<!-- issue-type: (\w+) -->", new_body)
|
|
1138
|
+
issue_type = issue_type_match.group(1) if issue_type_match else "unknown"
|
|
1139
|
+
location = (path, line, issue_type)
|
|
1140
|
+
seen_locations.add(location)
|
|
1141
|
+
|
|
1142
|
+
existing_loc = existing_by_location.get(location)
|
|
1143
|
+
if existing_loc:
|
|
1144
|
+
# Found existing comment at same location with same issue type
|
|
1145
|
+
# Update it (this handles both legacy comments and fingerprint mismatches)
|
|
1146
|
+
matched_comment_ids.add(existing_loc["id"])
|
|
1147
|
+
if existing_loc["body"] != new_body:
|
|
1148
|
+
success = await self.update_review_comment(existing_loc["id"], new_body)
|
|
1149
|
+
if success:
|
|
1150
|
+
updated_count += 1
|
|
1151
|
+
if finding_id:
|
|
1152
|
+
logger.debug(
|
|
1153
|
+
f"Updated comment at {path}:{line} (fingerprint mismatch, location match)"
|
|
1154
|
+
)
|
|
1155
|
+
else:
|
|
1156
|
+
logger.debug(f"Updated legacy comment at {path}:{line}")
|
|
1157
|
+
continue
|
|
1158
|
+
|
|
1159
|
+
# New comment - collect for batch creation
|
|
1160
|
+
new_comments_for_review.append(comment)
|
|
1161
|
+
|
|
1162
|
+
# Step 2: Create new comments via review API (if any)
|
|
1163
|
+
created_count = 0
|
|
1164
|
+
if new_comments_for_review:
|
|
1165
|
+
success = await self.create_review_with_comments(
|
|
1166
|
+
new_comments_for_review,
|
|
1167
|
+
body=body,
|
|
1168
|
+
event=event,
|
|
1169
|
+
)
|
|
1170
|
+
if success:
|
|
1171
|
+
created_count = len(new_comments_for_review)
|
|
1172
|
+
logger.info(f"Created {created_count} new review comments")
|
|
1173
|
+
else:
|
|
1174
|
+
logger.error("Failed to create new review comments")
|
|
1175
|
+
return False
|
|
1176
|
+
|
|
1177
|
+
# Step 3: Delete resolved comments (unless skip_cleanup is True)
|
|
1178
|
+
# In streaming mode, we skip cleanup because we're processing files one at a time
|
|
1179
|
+
# and don't want to delete comments from files processed earlier in the stream
|
|
1180
|
+
deleted_count = 0
|
|
1181
|
+
|
|
1182
|
+
if skip_cleanup:
|
|
1183
|
+
logger.debug("Skipping cleanup phase (streaming mode)")
|
|
1184
|
+
else:
|
|
1185
|
+
# Priority: fingerprint-based deletion, then location-based for legacy
|
|
1186
|
+
# Also clean up comments for files removed from the PR or files that were
|
|
1187
|
+
# validated but no longer have findings
|
|
1188
|
+
files_with_findings = {c["path"] for c in comments}
|
|
1189
|
+
|
|
1190
|
+
# Use validated_files if provided, otherwise fall back to files_with_findings
|
|
1191
|
+
# This ensures we clean up comments for files that were validated but have no findings
|
|
1192
|
+
files_in_scope = validated_files if validated_files is not None else files_with_findings
|
|
1193
|
+
|
|
1194
|
+
# Get current PR files to detect removed files
|
|
1195
|
+
# Note: get_pr_files() returns [] on error, so we check for non-empty result
|
|
1196
|
+
pr_files = await self.get_pr_files()
|
|
1197
|
+
if pr_files:
|
|
1198
|
+
current_pr_files: set[str] | None = {f["filename"] for f in pr_files}
|
|
1199
|
+
else:
|
|
1200
|
+
# Empty result could be an API error - fall back to batch-only cleanup
|
|
1201
|
+
# to avoid accidentally deleting valid comments
|
|
1202
|
+
logger.debug("Could not fetch PR files for cleanup, using batch-only mode")
|
|
1203
|
+
current_pr_files = None
|
|
1204
|
+
|
|
1205
|
+
def should_delete_comment(existing_path: str) -> bool:
|
|
1206
|
+
"""Check if a comment should be deleted based on file status.
|
|
1207
|
+
|
|
1208
|
+
A comment should be deleted if the file is part of this PR.
|
|
1209
|
+
The fingerprint check (done by caller) ensures we only delete
|
|
1210
|
+
comments for findings that are no longer present.
|
|
1211
|
+
|
|
1212
|
+
This aggressive cleanup ensures stale comments are removed even if:
|
|
1213
|
+
- The file was fixed but not re-validated in this specific run
|
|
1214
|
+
- The validation runs on a subset of PR files
|
|
1215
|
+
|
|
1216
|
+
We preserve comments for files NOT in the PR to avoid accidentally
|
|
1217
|
+
deleting comments from other branches/PRs.
|
|
1218
|
+
"""
|
|
1219
|
+
# If we successfully fetched PR files, delete comments for any PR file
|
|
1220
|
+
# whose finding is no longer present (fingerprint check done by caller)
|
|
1221
|
+
if current_pr_files is not None:
|
|
1222
|
+
return existing_path in current_pr_files
|
|
1223
|
+
|
|
1224
|
+
# Fallback: if we couldn't fetch PR files, only clean up validated files
|
|
1225
|
+
# to avoid accidentally deleting valid comments
|
|
1226
|
+
return existing_path in files_in_scope
|
|
1227
|
+
|
|
1228
|
+
# Collect all comment IDs to delete
|
|
1229
|
+
# Delete by fingerprint (primary) - comments that:
|
|
1230
|
+
# 1. Were NOT matched (updated) in this run
|
|
1231
|
+
# 2. Have a fingerprint not in the new findings
|
|
1232
|
+
# 3. Are in files that are part of this PR/validation
|
|
1233
|
+
comment_ids_to_delete: list[int] = []
|
|
1234
|
+
|
|
1235
|
+
for fingerprint, existing in existing_by_fingerprint.items():
|
|
1236
|
+
comment_id = existing["id"]
|
|
1237
|
+
# Skip if this comment was matched/updated via location fallback
|
|
1238
|
+
if comment_id in matched_comment_ids:
|
|
1239
|
+
continue
|
|
1240
|
+
if fingerprint not in seen_fingerprints and should_delete_comment(existing["path"]):
|
|
1241
|
+
comment_ids_to_delete.append(comment_id)
|
|
1242
|
+
logger.debug(f"Marking for deletion: resolved comment {fingerprint[:8]}...")
|
|
1243
|
+
|
|
1244
|
+
# Delete by location (legacy comments without fingerprints)
|
|
1245
|
+
for location, existing in existing_by_location.items():
|
|
1246
|
+
comment_id = existing["id"]
|
|
1247
|
+
# Skip if already matched/updated
|
|
1248
|
+
if comment_id in matched_comment_ids:
|
|
1249
|
+
continue
|
|
1250
|
+
# Skip if already marked for deletion by fingerprint above
|
|
1251
|
+
existing_fingerprint = self._extract_finding_id(existing.get("body", ""))
|
|
1252
|
+
if existing_fingerprint:
|
|
1253
|
+
continue # Already handled above
|
|
1254
|
+
|
|
1255
|
+
if location not in seen_locations and should_delete_comment(existing["path"]):
|
|
1256
|
+
comment_ids_to_delete.append(comment_id)
|
|
1257
|
+
logger.debug(f"Marking for deletion: resolved legacy comment at {location}")
|
|
1258
|
+
|
|
1259
|
+
# Delete all collected comments in parallel
|
|
1260
|
+
if comment_ids_to_delete:
|
|
1261
|
+
deleted_count, _failed = await self._delete_comments_parallel(comment_ids_to_delete)
|
|
1262
|
+
|
|
1263
|
+
logger.info(
|
|
1264
|
+
f"Review comment management: {updated_count} updated, "
|
|
1265
|
+
f"{created_count} created, {deleted_count} deleted (resolved)"
|
|
1266
|
+
)
|
|
1267
|
+
|
|
1268
|
+
return True
|
|
1269
|
+
|
|
1270
|
+
def _extract_finding_id(self, body: str) -> str | None:
|
|
1271
|
+
"""Extract finding ID from comment body HTML comment.
|
|
1272
|
+
|
|
1273
|
+
Args:
|
|
1274
|
+
body: Comment body text
|
|
1275
|
+
|
|
1276
|
+
Returns:
|
|
1277
|
+
16-character finding ID hash, or None if not found
|
|
1278
|
+
"""
|
|
1279
|
+
match = re.search(r"<!-- finding-id: ([a-f0-9]{16}) -->", body)
|
|
1280
|
+
return match.group(1) if match else None
|
|
1281
|
+
|
|
1282
|
+
async def _get_bot_comments_by_fingerprint(self, identifier: str) -> dict[str, dict[str, Any]]:
|
|
1283
|
+
"""Index existing bot comments by their finding fingerprint.
|
|
1284
|
+
|
|
1285
|
+
Args:
|
|
1286
|
+
identifier: String to identify bot comments
|
|
1287
|
+
|
|
1288
|
+
Returns:
|
|
1289
|
+
Dict mapping finding_id to comment metadata dict
|
|
1290
|
+
Comment dict contains: id, body, path, line
|
|
1291
|
+
"""
|
|
1292
|
+
comments = await self.get_review_comments()
|
|
1293
|
+
indexed: dict[str, dict[str, Any]] = {}
|
|
1294
|
+
|
|
1295
|
+
for comment in comments:
|
|
1296
|
+
if not isinstance(comment, dict):
|
|
1297
|
+
continue
|
|
1298
|
+
|
|
1299
|
+
body = comment.get("body", "")
|
|
1300
|
+
if identifier not in str(body):
|
|
1301
|
+
continue
|
|
1302
|
+
|
|
1303
|
+
finding_id = self._extract_finding_id(body)
|
|
1304
|
+
if finding_id:
|
|
1305
|
+
indexed[finding_id] = {
|
|
1306
|
+
"id": comment["id"],
|
|
1307
|
+
"body": body,
|
|
1308
|
+
"path": comment.get("path", ""),
|
|
1309
|
+
"line": comment.get("line") or comment.get("original_line"),
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
return indexed
|
|
1313
|
+
|
|
1314
|
+
# ==================== PR Labels ====================
|
|
1315
|
+
|
|
1316
|
+
async def add_labels(self, labels: list[str]) -> bool:
|
|
1317
|
+
"""Add labels to the PR.
|
|
1318
|
+
|
|
1319
|
+
Args:
|
|
1320
|
+
labels: List of label names to add
|
|
1321
|
+
|
|
1322
|
+
Returns:
|
|
1323
|
+
True if successful, False otherwise
|
|
1324
|
+
"""
|
|
1325
|
+
result = await self._make_request(
|
|
1326
|
+
"POST",
|
|
1327
|
+
f"issues/{self.pr_number}/labels",
|
|
1328
|
+
json={"labels": labels},
|
|
1329
|
+
)
|
|
1330
|
+
|
|
1331
|
+
if result:
|
|
1332
|
+
logger.info(f"Successfully added labels: {', '.join(labels)}")
|
|
1333
|
+
return True
|
|
1334
|
+
return False
|
|
1335
|
+
|
|
1336
|
+
async def remove_label(self, label: str) -> bool:
|
|
1337
|
+
"""Remove a label from the PR.
|
|
1338
|
+
|
|
1339
|
+
Args:
|
|
1340
|
+
label: Label name to remove
|
|
1341
|
+
|
|
1342
|
+
Returns:
|
|
1343
|
+
True if successful, False otherwise
|
|
1344
|
+
"""
|
|
1345
|
+
result = await self._make_request(
|
|
1346
|
+
"DELETE",
|
|
1347
|
+
f"issues/{self.pr_number}/labels/{label}",
|
|
1348
|
+
)
|
|
1349
|
+
|
|
1350
|
+
if result is not None: # DELETE returns empty dict on success
|
|
1351
|
+
logger.info(f"Successfully removed label: {label}")
|
|
1352
|
+
return True
|
|
1353
|
+
return False
|
|
1354
|
+
|
|
1355
|
+
async def get_labels(self) -> list[str]:
|
|
1356
|
+
"""Get all labels on the PR.
|
|
1357
|
+
|
|
1358
|
+
Returns:
|
|
1359
|
+
List of label names
|
|
1360
|
+
"""
|
|
1361
|
+
result = await self._make_request(
|
|
1362
|
+
"GET",
|
|
1363
|
+
f"issues/{self.pr_number}/labels",
|
|
1364
|
+
)
|
|
1365
|
+
|
|
1366
|
+
if result and isinstance(result, list):
|
|
1367
|
+
labels: list[str] = []
|
|
1368
|
+
for label in result:
|
|
1369
|
+
if isinstance(label, dict):
|
|
1370
|
+
name = label.get("name")
|
|
1371
|
+
if isinstance(name, str):
|
|
1372
|
+
labels.append(name)
|
|
1373
|
+
return labels
|
|
1374
|
+
return []
|
|
1375
|
+
|
|
1376
|
+
async def set_labels(self, labels: list[str]) -> bool:
|
|
1377
|
+
"""Set labels on the PR, replacing any existing labels.
|
|
1378
|
+
|
|
1379
|
+
Args:
|
|
1380
|
+
labels: List of label names to set
|
|
1381
|
+
|
|
1382
|
+
Returns:
|
|
1383
|
+
True if successful, False otherwise
|
|
1384
|
+
"""
|
|
1385
|
+
result = await self._make_request(
|
|
1386
|
+
"PUT",
|
|
1387
|
+
f"issues/{self.pr_number}/labels",
|
|
1388
|
+
json={"labels": labels},
|
|
1389
|
+
)
|
|
1390
|
+
|
|
1391
|
+
if result:
|
|
1392
|
+
logger.info(f"Successfully set labels: {', '.join(labels)}")
|
|
1393
|
+
return True
|
|
1394
|
+
return False
|
|
1395
|
+
|
|
1396
|
+
# ==================== PR Information ====================
|
|
1397
|
+
|
|
1398
|
+
async def get_pr_info(self) -> dict[str, Any] | None:
|
|
1399
|
+
"""Get detailed information about the PR.
|
|
1400
|
+
|
|
1401
|
+
Returns:
|
|
1402
|
+
PR information dict or None on error
|
|
1403
|
+
"""
|
|
1404
|
+
return await self._make_request("GET", f"pulls/{self.pr_number}")
|
|
1405
|
+
|
|
1406
|
+
async def get_pr_files(self) -> list[dict[str, Any]]:
|
|
1407
|
+
"""Get list of files changed in the PR.
|
|
1408
|
+
|
|
1409
|
+
Returns:
|
|
1410
|
+
List of file information dicts
|
|
1411
|
+
"""
|
|
1412
|
+
result = await self._make_request("GET", f"pulls/{self.pr_number}/files")
|
|
1413
|
+
|
|
1414
|
+
if result and isinstance(result, list):
|
|
1415
|
+
return result
|
|
1416
|
+
return []
|
|
1417
|
+
|
|
1418
|
+
async def get_pr_commits(self) -> list[dict[str, Any]]:
|
|
1419
|
+
"""Get list of commits in the PR.
|
|
1420
|
+
|
|
1421
|
+
Returns:
|
|
1422
|
+
List of commit information dicts
|
|
1423
|
+
"""
|
|
1424
|
+
result = await self._make_request("GET", f"pulls/{self.pr_number}/commits")
|
|
1425
|
+
|
|
1426
|
+
if result and isinstance(result, list):
|
|
1427
|
+
return result
|
|
1428
|
+
return []
|
|
1429
|
+
|
|
1430
|
+
# ==================== PR Status ====================
|
|
1431
|
+
|
|
1432
|
+
async def set_commit_status(
|
|
1433
|
+
self,
|
|
1434
|
+
state: str,
|
|
1435
|
+
context: str,
|
|
1436
|
+
description: str,
|
|
1437
|
+
target_url: str | None = None,
|
|
1438
|
+
) -> bool:
|
|
1439
|
+
"""Set a commit status on the PR's head commit.
|
|
1440
|
+
|
|
1441
|
+
Args:
|
|
1442
|
+
state: Status state ("error", "failure", "pending", "success")
|
|
1443
|
+
context: A string label to differentiate this status from others
|
|
1444
|
+
description: A short description of the status
|
|
1445
|
+
target_url: Optional URL to link to more details
|
|
1446
|
+
|
|
1447
|
+
Returns:
|
|
1448
|
+
True if successful, False otherwise
|
|
1449
|
+
"""
|
|
1450
|
+
pr_info = await self.get_pr_info()
|
|
1451
|
+
if not pr_info:
|
|
1452
|
+
return False
|
|
1453
|
+
|
|
1454
|
+
head_info = pr_info.get("head")
|
|
1455
|
+
if not isinstance(head_info, dict):
|
|
1456
|
+
return False
|
|
1457
|
+
|
|
1458
|
+
commit_sha = head_info.get("sha")
|
|
1459
|
+
if not isinstance(commit_sha, str):
|
|
1460
|
+
return False
|
|
1461
|
+
|
|
1462
|
+
payload: dict[str, Any] = {
|
|
1463
|
+
"state": state,
|
|
1464
|
+
"context": context,
|
|
1465
|
+
"description": description,
|
|
1466
|
+
}
|
|
1467
|
+
if target_url:
|
|
1468
|
+
payload["target_url"] = target_url
|
|
1469
|
+
|
|
1470
|
+
result = await self._make_request(
|
|
1471
|
+
"POST",
|
|
1472
|
+
f"statuses/{commit_sha}",
|
|
1473
|
+
json=payload,
|
|
1474
|
+
)
|
|
1475
|
+
|
|
1476
|
+
if result:
|
|
1477
|
+
logger.info(f"Successfully set commit status: {state}")
|
|
1478
|
+
return True
|
|
1479
|
+
return False
|
|
1480
|
+
|
|
1481
|
+
# ==================== CODEOWNERS and Ignore Commands ====================
|
|
1482
|
+
|
|
1483
|
+
async def get_codeowners_content(self) -> str | None:
|
|
1484
|
+
"""Fetch CODEOWNERS file content from repository.
|
|
1485
|
+
|
|
1486
|
+
Results are cached per instance to avoid redundant API calls.
|
|
1487
|
+
|
|
1488
|
+
Searches in standard CODEOWNERS locations:
|
|
1489
|
+
- CODEOWNERS
|
|
1490
|
+
- .github/CODEOWNERS
|
|
1491
|
+
- docs/CODEOWNERS
|
|
1492
|
+
|
|
1493
|
+
Returns:
|
|
1494
|
+
CODEOWNERS file content as string, or None if not found
|
|
1495
|
+
"""
|
|
1496
|
+
# Return cached result if already loaded
|
|
1497
|
+
if self._codeowners_loaded:
|
|
1498
|
+
return self._codeowners_cache
|
|
1499
|
+
|
|
1500
|
+
from iam_validator.core.codeowners import ( # pylint: disable=import-outside-toplevel
|
|
1501
|
+
CodeOwnersParser,
|
|
1502
|
+
)
|
|
1503
|
+
|
|
1504
|
+
for path in CodeOwnersParser.CODEOWNERS_PATHS:
|
|
1505
|
+
result = await self._make_request(
|
|
1506
|
+
"GET",
|
|
1507
|
+
f"contents/{path}",
|
|
1508
|
+
)
|
|
1509
|
+
|
|
1510
|
+
if result and isinstance(result, dict) and "content" in result:
|
|
1511
|
+
try:
|
|
1512
|
+
content = base64.b64decode(result["content"]).decode("utf-8")
|
|
1513
|
+
logger.debug(f"Found CODEOWNERS at {path}")
|
|
1514
|
+
# Cache the result
|
|
1515
|
+
self._codeowners_cache = content
|
|
1516
|
+
self._codeowners_loaded = True
|
|
1517
|
+
return content
|
|
1518
|
+
except (ValueError, UnicodeDecodeError) as e:
|
|
1519
|
+
logger.warning(f"Failed to decode CODEOWNERS at {path}: {e}")
|
|
1520
|
+
continue
|
|
1521
|
+
|
|
1522
|
+
logger.debug("No CODEOWNERS file found in repository")
|
|
1523
|
+
# Cache the negative result too
|
|
1524
|
+
self._codeowners_cache = None
|
|
1525
|
+
self._codeowners_loaded = True
|
|
1526
|
+
return None
|
|
1527
|
+
|
|
1528
|
+
async def get_team_members(self, org: str, team_slug: str) -> list[str]:
|
|
1529
|
+
"""Get members of a GitHub team.
|
|
1530
|
+
|
|
1531
|
+
Results are cached per instance to avoid redundant API calls
|
|
1532
|
+
when checking multiple users against the same team.
|
|
1533
|
+
|
|
1534
|
+
Note: This requires the token to have `read:org` scope for
|
|
1535
|
+
organization teams.
|
|
1536
|
+
|
|
1537
|
+
Args:
|
|
1538
|
+
org: Organization name
|
|
1539
|
+
team_slug: Team slug (URL-friendly name)
|
|
1540
|
+
|
|
1541
|
+
Returns:
|
|
1542
|
+
List of team member usernames (lowercase)
|
|
1543
|
+
"""
|
|
1544
|
+
# Check cache first
|
|
1545
|
+
cache_key = (org.lower(), team_slug.lower())
|
|
1546
|
+
if cache_key in self._team_cache:
|
|
1547
|
+
logger.debug(f"Using cached team members for {org}/{team_slug}")
|
|
1548
|
+
return self._team_cache[cache_key]
|
|
1549
|
+
|
|
1550
|
+
url = f"{self.api_url}/orgs/{org}/teams/{team_slug}/members"
|
|
1551
|
+
|
|
1552
|
+
try:
|
|
1553
|
+
if self._client:
|
|
1554
|
+
response = await self._client.request("GET", url)
|
|
1555
|
+
else:
|
|
1556
|
+
async with httpx.AsyncClient(
|
|
1557
|
+
headers=self._get_headers(), timeout=httpx.Timeout(30.0)
|
|
1558
|
+
) as client:
|
|
1559
|
+
response = await client.request("GET", url)
|
|
1560
|
+
|
|
1561
|
+
response.raise_for_status()
|
|
1562
|
+
result = response.json()
|
|
1563
|
+
|
|
1564
|
+
if isinstance(result, list):
|
|
1565
|
+
members = [
|
|
1566
|
+
member.get("login", "").lower()
|
|
1567
|
+
for member in result
|
|
1568
|
+
if isinstance(member, dict) and member.get("login")
|
|
1569
|
+
]
|
|
1570
|
+
# Cache the result
|
|
1571
|
+
self._team_cache[cache_key] = members
|
|
1572
|
+
logger.debug(f"Found {len(members)} members in team {org}/{team_slug}")
|
|
1573
|
+
return members
|
|
1574
|
+
|
|
1575
|
+
except httpx.HTTPStatusError as e:
|
|
1576
|
+
logger.warning(
|
|
1577
|
+
f"Failed to get team members for {org}/{team_slug}: HTTP {e.response.status_code}"
|
|
1578
|
+
)
|
|
1579
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
1580
|
+
logger.warning(f"Failed to get team members for {org}/{team_slug}: {e}")
|
|
1581
|
+
|
|
1582
|
+
# Cache empty result to avoid repeated failed API calls
|
|
1583
|
+
self._team_cache[cache_key] = []
|
|
1584
|
+
return []
|
|
1585
|
+
|
|
1586
|
+
async def is_user_codeowner(
|
|
1587
|
+
self,
|
|
1588
|
+
username: str,
|
|
1589
|
+
file_path: str,
|
|
1590
|
+
codeowners_parser: "CodeOwnersParser | None" = None,
|
|
1591
|
+
allowed_users: list[str] | None = None,
|
|
1592
|
+
) -> bool:
|
|
1593
|
+
"""Check if a user is authorized to ignore findings for a file.
|
|
1594
|
+
|
|
1595
|
+
Authorization is granted if:
|
|
1596
|
+
1. User is listed directly in CODEOWNERS for the file
|
|
1597
|
+
2. User is a member of a team listed in CODEOWNERS for the file
|
|
1598
|
+
3. User is in the allowed_users fallback list (when no CODEOWNERS)
|
|
1599
|
+
|
|
1600
|
+
Performance: Team membership checks are executed in parallel.
|
|
1601
|
+
|
|
1602
|
+
Args:
|
|
1603
|
+
username: GitHub username to check
|
|
1604
|
+
file_path: Path to the file being checked
|
|
1605
|
+
codeowners_parser: Pre-parsed CODEOWNERS (for caching)
|
|
1606
|
+
allowed_users: Fallback list of allowed users (when no CODEOWNERS)
|
|
1607
|
+
|
|
1608
|
+
Returns:
|
|
1609
|
+
True if user is authorized, False otherwise
|
|
1610
|
+
"""
|
|
1611
|
+
username_lower = username.lower()
|
|
1612
|
+
|
|
1613
|
+
# Check fallback allowed_users first (always applies if configured)
|
|
1614
|
+
if allowed_users:
|
|
1615
|
+
if username_lower in [u.lower() for u in allowed_users]:
|
|
1616
|
+
logger.debug(f"User {username} authorized via allowed_users config")
|
|
1617
|
+
return True
|
|
1618
|
+
|
|
1619
|
+
# Get or parse CODEOWNERS
|
|
1620
|
+
parser = codeowners_parser
|
|
1621
|
+
if parser is None:
|
|
1622
|
+
content = await self.get_codeowners_content()
|
|
1623
|
+
if content is None:
|
|
1624
|
+
# No CODEOWNERS and no allowed_users match = deny
|
|
1625
|
+
logger.debug(f"No CODEOWNERS file found, user {username} not in allowed_users")
|
|
1626
|
+
return False
|
|
1627
|
+
|
|
1628
|
+
from iam_validator.core.codeowners import ( # pylint: disable=import-outside-toplevel
|
|
1629
|
+
CodeOwnersParser,
|
|
1630
|
+
)
|
|
1631
|
+
|
|
1632
|
+
parser = CodeOwnersParser(content)
|
|
1633
|
+
|
|
1634
|
+
# Check direct user ownership
|
|
1635
|
+
if parser.is_owner(username, file_path):
|
|
1636
|
+
logger.debug(f"User {username} is direct owner of {file_path}")
|
|
1637
|
+
return True
|
|
1638
|
+
|
|
1639
|
+
# Check team membership - fetch all teams in parallel for speed
|
|
1640
|
+
teams = parser.get_teams_for_file(file_path)
|
|
1641
|
+
if not teams:
|
|
1642
|
+
logger.debug(f"User {username} is not authorized for {file_path}")
|
|
1643
|
+
return False
|
|
1644
|
+
|
|
1645
|
+
# Fetch all team memberships concurrently
|
|
1646
|
+
|
|
1647
|
+
async def check_team(org: str, team_slug: str) -> tuple[str, str, bool]:
|
|
1648
|
+
members = await self.get_team_members(org, team_slug)
|
|
1649
|
+
return (org, team_slug, username_lower in members)
|
|
1650
|
+
|
|
1651
|
+
results = await asyncio.gather(*[check_team(org, team_slug) for org, team_slug in teams])
|
|
1652
|
+
|
|
1653
|
+
for org, team_slug, is_member in results:
|
|
1654
|
+
if is_member:
|
|
1655
|
+
logger.debug(f"User {username} authorized via team {org}/{team_slug}")
|
|
1656
|
+
return True
|
|
1657
|
+
|
|
1658
|
+
logger.debug(f"User {username} is not authorized for {file_path}")
|
|
1659
|
+
return False
|
|
1660
|
+
|
|
1661
|
+
async def get_issue_comments(self) -> list[dict[str, Any]]:
|
|
1662
|
+
"""Get all issue comments (general PR comments, not review comments) with pagination.
|
|
1663
|
+
|
|
1664
|
+
Fetches ALL issue comments across all pages. This ensures proper
|
|
1665
|
+
comment management when there are many comments on a PR.
|
|
1666
|
+
|
|
1667
|
+
Returns:
|
|
1668
|
+
List of all issue comment dicts
|
|
1669
|
+
"""
|
|
1670
|
+
return await self._make_paginated_request(f"issues/{self.pr_number}/comments")
|
|
1671
|
+
|
|
1672
|
+
async def get_comment_by_id(self, comment_id: int) -> dict[str, Any] | None:
|
|
1673
|
+
"""Get a specific review comment by ID.
|
|
1674
|
+
|
|
1675
|
+
Used for verifying that ignore command replies still exist
|
|
1676
|
+
(tamper-resistant verification).
|
|
1677
|
+
|
|
1678
|
+
Args:
|
|
1679
|
+
comment_id: The ID of the review comment to fetch
|
|
1680
|
+
|
|
1681
|
+
Returns:
|
|
1682
|
+
Comment dict if found, None if deleted or error
|
|
1683
|
+
"""
|
|
1684
|
+
result = await self._make_request(
|
|
1685
|
+
"GET",
|
|
1686
|
+
f"pulls/comments/{comment_id}",
|
|
1687
|
+
)
|
|
1688
|
+
|
|
1689
|
+
if result and isinstance(result, dict):
|
|
1690
|
+
return result
|
|
1691
|
+
return None
|
|
1692
|
+
|
|
1693
|
+
async def post_reply_to_review_comment(
|
|
1694
|
+
self,
|
|
1695
|
+
comment_id: int,
|
|
1696
|
+
body: str,
|
|
1697
|
+
) -> bool:
|
|
1698
|
+
"""Post a reply to a review comment thread.
|
|
1699
|
+
|
|
1700
|
+
Args:
|
|
1701
|
+
comment_id: The ID of the review comment to reply to
|
|
1702
|
+
body: The reply text (markdown supported)
|
|
1703
|
+
|
|
1704
|
+
Returns:
|
|
1705
|
+
True if successful, False otherwise
|
|
1706
|
+
"""
|
|
1707
|
+
result = await self._make_request(
|
|
1708
|
+
"POST",
|
|
1709
|
+
f"pulls/{self.pr_number}/comments",
|
|
1710
|
+
json={
|
|
1711
|
+
"body": body,
|
|
1712
|
+
"in_reply_to": comment_id,
|
|
1713
|
+
},
|
|
1714
|
+
)
|
|
1715
|
+
|
|
1716
|
+
if result:
|
|
1717
|
+
logger.debug(f"Successfully posted reply to comment {comment_id}")
|
|
1718
|
+
return True
|
|
1719
|
+
return False
|
|
1720
|
+
|
|
1721
|
+
async def scan_for_ignore_commands(
|
|
1722
|
+
self,
|
|
1723
|
+
identifier: str = constants.BOT_IDENTIFIER,
|
|
1724
|
+
) -> list[tuple[dict[str, Any], dict[str, Any]]]:
|
|
1725
|
+
"""Scan for ignore commands in replies to bot review comments.
|
|
1726
|
+
|
|
1727
|
+
Looks for replies to bot comments that contain ignore commands.
|
|
1728
|
+
Supports formats: "ignore", "/ignore", "@iam-validator ignore",
|
|
1729
|
+
"skip", "suppress", and "ignore: reason here".
|
|
1730
|
+
|
|
1731
|
+
Args:
|
|
1732
|
+
identifier: String to identify bot comments
|
|
1733
|
+
|
|
1734
|
+
Returns:
|
|
1735
|
+
List of (bot_comment, reply_comment) tuples where reply
|
|
1736
|
+
contains an ignore command
|
|
1737
|
+
"""
|
|
1738
|
+
all_comments = await self.get_review_comments()
|
|
1739
|
+
ignore_commands: list[tuple[dict[str, Any], dict[str, Any]]] = []
|
|
1740
|
+
|
|
1741
|
+
# Index bot comments by ID for O(1) lookup
|
|
1742
|
+
bot_comments_by_id: dict[int, dict[str, Any]] = {}
|
|
1743
|
+
for comment in all_comments:
|
|
1744
|
+
if not isinstance(comment, dict):
|
|
1745
|
+
continue
|
|
1746
|
+
body = comment.get("body", "")
|
|
1747
|
+
comment_id = comment.get("id")
|
|
1748
|
+
if identifier in str(body) and isinstance(comment_id, int):
|
|
1749
|
+
bot_comments_by_id[comment_id] = comment
|
|
1750
|
+
|
|
1751
|
+
# Find replies with ignore commands
|
|
1752
|
+
for comment in all_comments:
|
|
1753
|
+
if not isinstance(comment, dict):
|
|
1754
|
+
continue
|
|
1755
|
+
|
|
1756
|
+
reply_to_id = comment.get("in_reply_to_id")
|
|
1757
|
+
if reply_to_id and reply_to_id in bot_comments_by_id:
|
|
1758
|
+
body = comment.get("body", "")
|
|
1759
|
+
if self._is_ignore_command(body):
|
|
1760
|
+
ignore_commands.append((bot_comments_by_id[reply_to_id], comment))
|
|
1761
|
+
|
|
1762
|
+
logger.debug(f"Found {len(ignore_commands)} ignore command(s) in PR comments")
|
|
1763
|
+
return ignore_commands
|
|
1764
|
+
|
|
1765
|
+
def _is_ignore_command(self, text: str) -> bool:
|
|
1766
|
+
"""Check if text is an ignore command.
|
|
1767
|
+
|
|
1768
|
+
Supports:
|
|
1769
|
+
- "ignore" (case insensitive)
|
|
1770
|
+
- "/ignore"
|
|
1771
|
+
- "@iam-validator ignore"
|
|
1772
|
+
- "skip", "suppress"
|
|
1773
|
+
- "ignore: reason here" (with optional reason)
|
|
1774
|
+
|
|
1775
|
+
Args:
|
|
1776
|
+
text: Comment text to check
|
|
1777
|
+
|
|
1778
|
+
Returns:
|
|
1779
|
+
True if text is an ignore command
|
|
1780
|
+
"""
|
|
1781
|
+
if not text:
|
|
1782
|
+
return False
|
|
1783
|
+
|
|
1784
|
+
text = text.strip().lower()
|
|
1785
|
+
|
|
1786
|
+
ignore_patterns = [
|
|
1787
|
+
r"^\s*ignore\s*$",
|
|
1788
|
+
r"^\s*/ignore\s*$",
|
|
1789
|
+
r"^\s*@?iam-validator\s+ignore\s*$",
|
|
1790
|
+
r"^\s*ignore\s*:\s*.+$", # With reason
|
|
1791
|
+
r"^\s*skip\s*$",
|
|
1792
|
+
r"^\s*suppress\s*$",
|
|
1793
|
+
]
|
|
1794
|
+
|
|
1795
|
+
return any(re.match(pattern, text, re.IGNORECASE) for pattern in ignore_patterns)
|
|
1796
|
+
|
|
1797
|
+
@staticmethod
|
|
1798
|
+
def extract_finding_id(comment_body: str) -> str | None:
|
|
1799
|
+
"""Extract finding ID from a bot comment.
|
|
1800
|
+
|
|
1801
|
+
Args:
|
|
1802
|
+
comment_body: The comment body text
|
|
1803
|
+
|
|
1804
|
+
Returns:
|
|
1805
|
+
Finding ID hash, or None if not found
|
|
1806
|
+
"""
|
|
1807
|
+
match = re.search(r"<!-- finding-id: ([a-f0-9]+) -->", comment_body)
|
|
1808
|
+
return match.group(1) if match else None
|
|
1809
|
+
|
|
1810
|
+
@staticmethod
|
|
1811
|
+
def extract_ignore_reason(text: str) -> str | None:
|
|
1812
|
+
"""Extract reason from ignore command.
|
|
1813
|
+
|
|
1814
|
+
Args:
|
|
1815
|
+
text: The ignore command text
|
|
1816
|
+
|
|
1817
|
+
Returns:
|
|
1818
|
+
Reason string, or None if no reason provided
|
|
1819
|
+
"""
|
|
1820
|
+
match = re.search(r"ignore\s*:\s*(.+)$", text.strip(), re.IGNORECASE)
|
|
1821
|
+
return match.group(1).strip() if match else None
|