iam-policy-validator 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. iam_policy_validator-1.14.0.dist-info/METADATA +782 -0
  2. iam_policy_validator-1.14.0.dist-info/RECORD +106 -0
  3. iam_policy_validator-1.14.0.dist-info/WHEEL +4 -0
  4. iam_policy_validator-1.14.0.dist-info/entry_points.txt +2 -0
  5. iam_policy_validator-1.14.0.dist-info/licenses/LICENSE +21 -0
  6. iam_validator/__init__.py +27 -0
  7. iam_validator/__main__.py +11 -0
  8. iam_validator/__version__.py +9 -0
  9. iam_validator/checks/__init__.py +45 -0
  10. iam_validator/checks/action_condition_enforcement.py +1442 -0
  11. iam_validator/checks/action_resource_matching.py +472 -0
  12. iam_validator/checks/action_validation.py +67 -0
  13. iam_validator/checks/condition_key_validation.py +88 -0
  14. iam_validator/checks/condition_type_mismatch.py +257 -0
  15. iam_validator/checks/full_wildcard.py +62 -0
  16. iam_validator/checks/mfa_condition_check.py +105 -0
  17. iam_validator/checks/policy_size.py +114 -0
  18. iam_validator/checks/policy_structure.py +556 -0
  19. iam_validator/checks/policy_type_validation.py +331 -0
  20. iam_validator/checks/principal_validation.py +708 -0
  21. iam_validator/checks/resource_validation.py +135 -0
  22. iam_validator/checks/sensitive_action.py +438 -0
  23. iam_validator/checks/service_wildcard.py +98 -0
  24. iam_validator/checks/set_operator_validation.py +153 -0
  25. iam_validator/checks/sid_uniqueness.py +146 -0
  26. iam_validator/checks/trust_policy_validation.py +509 -0
  27. iam_validator/checks/utils/__init__.py +17 -0
  28. iam_validator/checks/utils/action_parser.py +149 -0
  29. iam_validator/checks/utils/policy_level_checks.py +190 -0
  30. iam_validator/checks/utils/sensitive_action_matcher.py +293 -0
  31. iam_validator/checks/utils/wildcard_expansion.py +86 -0
  32. iam_validator/checks/wildcard_action.py +58 -0
  33. iam_validator/checks/wildcard_resource.py +374 -0
  34. iam_validator/commands/__init__.py +31 -0
  35. iam_validator/commands/analyze.py +549 -0
  36. iam_validator/commands/base.py +48 -0
  37. iam_validator/commands/cache.py +393 -0
  38. iam_validator/commands/completion.py +471 -0
  39. iam_validator/commands/download_services.py +255 -0
  40. iam_validator/commands/post_to_pr.py +86 -0
  41. iam_validator/commands/query.py +485 -0
  42. iam_validator/commands/validate.py +830 -0
  43. iam_validator/core/__init__.py +13 -0
  44. iam_validator/core/access_analyzer.py +671 -0
  45. iam_validator/core/access_analyzer_report.py +640 -0
  46. iam_validator/core/aws_fetcher.py +29 -0
  47. iam_validator/core/aws_service/__init__.py +21 -0
  48. iam_validator/core/aws_service/cache.py +108 -0
  49. iam_validator/core/aws_service/client.py +205 -0
  50. iam_validator/core/aws_service/fetcher.py +641 -0
  51. iam_validator/core/aws_service/parsers.py +149 -0
  52. iam_validator/core/aws_service/patterns.py +51 -0
  53. iam_validator/core/aws_service/storage.py +291 -0
  54. iam_validator/core/aws_service/validators.py +380 -0
  55. iam_validator/core/check_registry.py +679 -0
  56. iam_validator/core/cli.py +134 -0
  57. iam_validator/core/codeowners.py +245 -0
  58. iam_validator/core/condition_validators.py +626 -0
  59. iam_validator/core/config/__init__.py +81 -0
  60. iam_validator/core/config/aws_api.py +35 -0
  61. iam_validator/core/config/aws_global_conditions.py +160 -0
  62. iam_validator/core/config/category_suggestions.py +181 -0
  63. iam_validator/core/config/check_documentation.py +390 -0
  64. iam_validator/core/config/condition_requirements.py +258 -0
  65. iam_validator/core/config/config_loader.py +670 -0
  66. iam_validator/core/config/defaults.py +739 -0
  67. iam_validator/core/config/principal_requirements.py +421 -0
  68. iam_validator/core/config/sensitive_actions.py +672 -0
  69. iam_validator/core/config/service_principals.py +132 -0
  70. iam_validator/core/config/wildcards.py +127 -0
  71. iam_validator/core/constants.py +149 -0
  72. iam_validator/core/diff_parser.py +325 -0
  73. iam_validator/core/finding_fingerprint.py +131 -0
  74. iam_validator/core/formatters/__init__.py +27 -0
  75. iam_validator/core/formatters/base.py +147 -0
  76. iam_validator/core/formatters/console.py +68 -0
  77. iam_validator/core/formatters/csv.py +171 -0
  78. iam_validator/core/formatters/enhanced.py +481 -0
  79. iam_validator/core/formatters/html.py +672 -0
  80. iam_validator/core/formatters/json.py +33 -0
  81. iam_validator/core/formatters/markdown.py +64 -0
  82. iam_validator/core/formatters/sarif.py +251 -0
  83. iam_validator/core/ignore_patterns.py +297 -0
  84. iam_validator/core/ignore_processor.py +309 -0
  85. iam_validator/core/ignored_findings.py +400 -0
  86. iam_validator/core/label_manager.py +197 -0
  87. iam_validator/core/models.py +404 -0
  88. iam_validator/core/policy_checks.py +220 -0
  89. iam_validator/core/policy_loader.py +785 -0
  90. iam_validator/core/pr_commenter.py +780 -0
  91. iam_validator/core/report.py +942 -0
  92. iam_validator/integrations/__init__.py +28 -0
  93. iam_validator/integrations/github_integration.py +1821 -0
  94. iam_validator/integrations/ms_teams.py +442 -0
  95. iam_validator/sdk/__init__.py +220 -0
  96. iam_validator/sdk/arn_matching.py +382 -0
  97. iam_validator/sdk/context.py +222 -0
  98. iam_validator/sdk/exceptions.py +48 -0
  99. iam_validator/sdk/helpers.py +177 -0
  100. iam_validator/sdk/policy_utils.py +451 -0
  101. iam_validator/sdk/query_utils.py +454 -0
  102. iam_validator/sdk/shortcuts.py +283 -0
  103. iam_validator/utils/__init__.py +35 -0
  104. iam_validator/utils/cache.py +105 -0
  105. iam_validator/utils/regex.py +205 -0
  106. iam_validator/utils/terminal.py +22 -0
@@ -0,0 +1,1821 @@
1
+ """GitHub Integration Module.
2
+
3
+ This module provides functionality to interact with GitHub,
4
+ including posting PR comments, line comments, labels, and retrieving PR information.
5
+ """
6
+
7
+ import asyncio
8
+ import base64
9
+ import logging
10
+ import os
11
+ import re
12
+ import time
13
+ from enum import Enum
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ import httpx
17
+
18
+ from iam_validator.core import constants
19
+
20
+ if TYPE_CHECKING:
21
+ from iam_validator.core.codeowners import CodeOwnersParser
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class GitHubRateLimitError(Exception):
27
+ """Raised when GitHub API rate limit is exceeded."""
28
+
29
+ def __init__(self, reset_time: int, message: str = "GitHub API rate limit exceeded"):
30
+ self.reset_time = reset_time
31
+ super().__init__(message)
32
+
33
+
34
+ class GitHubRetryableError(Exception):
35
+ """Raised for transient GitHub API errors that should be retried."""
36
+
37
+ pass
38
+
39
+
40
+ # Retry configuration
41
+ MAX_RETRIES = 3
42
+ INITIAL_BACKOFF_SECONDS = 1.0
43
+ MAX_BACKOFF_SECONDS = 30.0
44
+ BACKOFF_MULTIPLIER = 2.0
45
+
46
+ # HTTP status codes that should trigger retry
47
+ RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
48
+
49
+ # Concurrency limit for parallel API operations (deletions, updates)
50
+ # This prevents hitting GitHub's secondary rate limits while still being fast
51
+ MAX_CONCURRENT_API_CALLS = 10
52
+
53
+
54
+ class PRState(str, Enum):
55
+ """GitHub PR state."""
56
+
57
+ OPEN = "open"
58
+ CLOSED = "closed"
59
+ ALL = "all"
60
+
61
+
62
+ class ReviewEvent(str, Enum):
63
+ """GitHub PR review event types."""
64
+
65
+ APPROVE = "APPROVE"
66
+ REQUEST_CHANGES = "REQUEST_CHANGES"
67
+ COMMENT = "COMMENT"
68
+
69
+
70
+ class GitHubIntegration:
71
+ """Handles comprehensive GitHub API interactions for PRs.
72
+
73
+ This class provides methods to:
74
+ - Post general PR comments
75
+ - Add line-specific review comments
76
+ - Manage PR labels
77
+ - Submit PR reviews
78
+ - Retrieve PR information and files
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ token: str | None = None,
84
+ repository: str | None = None,
85
+ pr_number: str | None = None,
86
+ ):
87
+ """Initialize GitHub integration.
88
+
89
+ Args:
90
+ token: GitHub API token (defaults to GITHUB_TOKEN env var)
91
+ repository: Repository in format 'owner/repo' (defaults to GITHUB_REPOSITORY env var)
92
+ pr_number: PR number (defaults to GITHUB_PR_NUMBER env var)
93
+ """
94
+ self.token = self._validate_token(token or os.environ.get("GITHUB_TOKEN"))
95
+ self.repository = self._validate_repository(
96
+ repository or os.environ.get("GITHUB_REPOSITORY")
97
+ )
98
+ self.pr_number = self._validate_pr_number(pr_number or os.environ.get("GITHUB_PR_NUMBER"))
99
+ self.api_url = self._validate_api_url(
100
+ os.environ.get("GITHUB_API_URL", "https://api.github.com")
101
+ )
102
+ self._client: httpx.AsyncClient | None = None
103
+ # Cache for team memberships: (org, team_slug) -> list[str]
104
+ # Reduces API calls when checking multiple users against same team
105
+ self._team_cache: dict[tuple[str, str], list[str]] = {}
106
+ # Cache for CODEOWNERS content (fetched once per instance)
107
+ self._codeowners_cache: str | None = None
108
+ self._codeowners_loaded: bool = False
109
+
110
+ def _validate_token(self, token: str | None) -> str | None:
111
+ """Validate and sanitize GitHub token.
112
+
113
+ Args:
114
+ token: GitHub token to validate
115
+
116
+ Returns:
117
+ Validated token or None
118
+ """
119
+ if token is None:
120
+ return None
121
+
122
+ # Basic validation - ensure it's a string and not empty
123
+ if not isinstance(token, str) or not token.strip():
124
+ logger.warning("Invalid GitHub token provided (empty or non-string)")
125
+ return None
126
+
127
+ # Sanitize - remove any whitespace
128
+ token = token.strip()
129
+
130
+ # Basic format check - GitHub tokens have specific patterns
131
+ # Personal access tokens: ghp_*, fine-grained: github_pat_*
132
+ # GitHub App tokens start with different prefixes
133
+ # Just ensure it's reasonable length and ASCII
134
+ if len(token) < 10 or len(token) > 500:
135
+ logger.warning(f"GitHub token has unusual length: {len(token)}")
136
+ return None
137
+
138
+ # Ensure only ASCII characters (tokens should be ASCII)
139
+ if not token.isascii():
140
+ logger.warning("GitHub token contains non-ASCII characters")
141
+ return None
142
+
143
+ return token
144
+
145
+ def _validate_repository(self, repository: str | None) -> str | None:
146
+ """Validate repository format (owner/repo).
147
+
148
+ Args:
149
+ repository: Repository string to validate
150
+
151
+ Returns:
152
+ Validated repository or None
153
+ """
154
+ if repository is None:
155
+ return None
156
+
157
+ if not isinstance(repository, str) or not repository.strip():
158
+ logger.warning("Invalid repository provided (empty or non-string)")
159
+ return None
160
+
161
+ repository = repository.strip()
162
+
163
+ # Must be in format owner/repo
164
+ if "/" not in repository:
165
+ logger.warning(f"Invalid repository format: {repository} (expected owner/repo)")
166
+ return None
167
+
168
+ parts = repository.split("/")
169
+ if len(parts) != 2:
170
+ logger.warning(f"Invalid repository format: {repository} (expected exactly one slash)")
171
+ return None
172
+
173
+ owner, repo = parts
174
+ if not owner or not repo:
175
+ logger.warning(f"Invalid repository format: {repository} (empty owner or repo)")
176
+ return None
177
+
178
+ # Basic sanitization - alphanumeric, hyphens, underscores, dots
179
+ # GitHub allows these characters in usernames and repo names
180
+ valid_pattern = re.compile(r"^[a-zA-Z0-9._-]+$")
181
+ if not valid_pattern.match(owner) or not valid_pattern.match(repo):
182
+ logger.warning(
183
+ f"Invalid characters in repository: {repository} "
184
+ "(only alphanumeric, ., -, _ allowed)"
185
+ )
186
+ return None
187
+
188
+ return repository
189
+
190
+ def _validate_pr_number(self, pr_number: str | None) -> str | None:
191
+ """Validate PR number.
192
+
193
+ Args:
194
+ pr_number: PR number to validate
195
+
196
+ Returns:
197
+ Validated PR number or None
198
+ """
199
+ if pr_number is None:
200
+ return None
201
+
202
+ if not isinstance(pr_number, str) or not pr_number.strip():
203
+ logger.warning("Invalid PR number provided (empty or non-string)")
204
+ return None
205
+
206
+ pr_number = pr_number.strip()
207
+
208
+ # Must be a positive integer
209
+ try:
210
+ pr_int = int(pr_number)
211
+ if pr_int <= 0:
212
+ logger.warning(f"Invalid PR number: {pr_number} (must be positive)")
213
+ return None
214
+ except ValueError:
215
+ logger.warning(f"Invalid PR number: {pr_number} (must be an integer)")
216
+ return None
217
+
218
+ return pr_number
219
+
220
+ def _validate_api_url(self, api_url: str) -> str:
221
+ """Validate GitHub API URL.
222
+
223
+ Args:
224
+ api_url: API URL to validate
225
+
226
+ Returns:
227
+ Validated API URL or default
228
+ """
229
+ if not api_url or not isinstance(api_url, str):
230
+ logger.warning("Invalid API URL provided, using default")
231
+ return "https://api.github.com"
232
+
233
+ api_url = api_url.strip()
234
+
235
+ # Must be HTTPS (security requirement)
236
+ if not api_url.startswith("https://"):
237
+ logger.warning(
238
+ f"API URL must use HTTPS: {api_url}, using default https://api.github.com"
239
+ )
240
+ return "https://api.github.com"
241
+
242
+ # Basic URL validation
243
+ # Simple URL pattern check
244
+ url_pattern = re.compile(r"^https://[a-zA-Z0-9.-]+(?:/.*)?$")
245
+ if not url_pattern.match(api_url):
246
+ logger.warning(f"Invalid API URL format: {api_url}, using default")
247
+ return "https://api.github.com"
248
+
249
+ return api_url
250
+
251
+ async def __aenter__(self) -> "GitHubIntegration":
252
+ """Async context manager entry."""
253
+ self._client = httpx.AsyncClient(
254
+ timeout=httpx.Timeout(30.0),
255
+ headers=self._get_headers(),
256
+ )
257
+ return self
258
+
259
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
260
+ """Async context manager exit."""
261
+ del exc_type, exc_val, exc_tb # Unused
262
+ if self._client:
263
+ await self._client.aclose()
264
+ self._client = None
265
+
266
+ def _get_headers(self) -> dict[str, str]:
267
+ """Get common request headers."""
268
+ return {
269
+ "Authorization": f"token {self.token}",
270
+ "Accept": "application/vnd.github.v3+json",
271
+ "Content-Type": "application/json",
272
+ }
273
+
274
+ def is_configured(self) -> bool:
275
+ """Check if GitHub integration is properly configured.
276
+
277
+ Returns:
278
+ True if all required environment variables are set
279
+ """
280
+ is_valid = all([self.token, self.repository, self.pr_number])
281
+
282
+ # Provide helpful debug info when not configured
283
+ if not is_valid:
284
+ missing = []
285
+ if not self.token:
286
+ missing.append("GITHUB_TOKEN")
287
+ if not self.repository:
288
+ missing.append("GITHUB_REPOSITORY")
289
+ if not self.pr_number:
290
+ missing.append("GITHUB_PR_NUMBER")
291
+
292
+ logger.debug(f"GitHub integration missing: {', '.join(missing)}")
293
+ if not self.pr_number and self.token and self.repository:
294
+ logger.info(
295
+ "GitHub PR integration requires GITHUB_PR_NUMBER. "
296
+ "This is only available when running on pull request events. "
297
+ "Current event may not have PR context."
298
+ )
299
+
300
+ return is_valid
301
+
302
+ async def _make_request(
303
+ self, method: str, endpoint: str, **kwargs: Any
304
+ ) -> dict[str, Any] | None:
305
+ """Make an HTTP request to GitHub API with retry and rate limit handling.
306
+
307
+ Implements exponential backoff for transient errors (5xx, 429) and
308
+ respects GitHub's rate limit headers.
309
+
310
+ Args:
311
+ method: HTTP method (GET, POST, PATCH, DELETE)
312
+ endpoint: API endpoint path
313
+ **kwargs: Additional arguments to pass to httpx
314
+
315
+ Returns:
316
+ Response JSON or None on error
317
+ """
318
+ if not self.is_configured():
319
+ logger.error("GitHub integration not configured")
320
+ return None
321
+
322
+ url = f"{self.api_url}/repos/{self.repository}/{endpoint}"
323
+ backoff = INITIAL_BACKOFF_SECONDS
324
+ last_error: Exception | None = None
325
+
326
+ for attempt in range(MAX_RETRIES + 1):
327
+ try:
328
+ if self._client:
329
+ response = await self._client.request(method, url, **kwargs)
330
+ else:
331
+ async with httpx.AsyncClient(headers=self._get_headers()) as client:
332
+ response = await client.request(method, url, **kwargs)
333
+
334
+ # Handle rate limiting (429)
335
+ if response.status_code == 429:
336
+ # Get reset time from headers
337
+ reset_time = response.headers.get("X-RateLimit-Reset")
338
+ retry_after = response.headers.get("Retry-After")
339
+
340
+ if retry_after:
341
+ wait_time = int(retry_after)
342
+ elif reset_time:
343
+ wait_time = max(0, int(reset_time) - int(time.time()))
344
+ else:
345
+ wait_time = min(backoff, MAX_BACKOFF_SECONDS)
346
+
347
+ if attempt < MAX_RETRIES:
348
+ logger.warning(
349
+ f"Rate limited on {method} {endpoint}, "
350
+ f"waiting {wait_time}s (attempt {attempt + 1}/{MAX_RETRIES + 1})"
351
+ )
352
+ await asyncio.sleep(wait_time)
353
+ backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
354
+ continue
355
+ else:
356
+ raise GitHubRateLimitError(
357
+ int(reset_time or 0),
358
+ f"Rate limit exceeded after {MAX_RETRIES + 1} attempts",
359
+ )
360
+
361
+ # Handle retryable server errors (5xx)
362
+ if response.status_code in RETRYABLE_STATUS_CODES and attempt < MAX_RETRIES:
363
+ logger.warning(
364
+ f"Retryable error {response.status_code} on {method} {endpoint}, "
365
+ f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{MAX_RETRIES + 1})"
366
+ )
367
+ await asyncio.sleep(backoff)
368
+ backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
369
+ continue
370
+
371
+ response.raise_for_status()
372
+ return response.json() if response.text else {}
373
+
374
+ except httpx.HTTPStatusError as e:
375
+ last_error = e
376
+ # Don't retry client errors (4xx) except rate limit
377
+ if 400 <= e.response.status_code < 500 and e.response.status_code != 429:
378
+ logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
379
+ return None
380
+ # For server errors, continue to retry logic
381
+ if attempt < MAX_RETRIES:
382
+ logger.warning(
383
+ f"HTTP error {e.response.status_code}, retrying in {backoff:.1f}s"
384
+ )
385
+ await asyncio.sleep(backoff)
386
+ backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
387
+ continue
388
+
389
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
390
+ last_error = e
391
+ if attempt < MAX_RETRIES:
392
+ logger.warning(
393
+ f"Connection error on {method} {endpoint}: {e}, "
394
+ f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{MAX_RETRIES + 1})"
395
+ )
396
+ await asyncio.sleep(backoff)
397
+ backoff = min(backoff * BACKOFF_MULTIPLIER, MAX_BACKOFF_SECONDS)
398
+ continue
399
+
400
+ except Exception as e: # pylint: disable=broad-exception-caught
401
+ logger.error(f"Unexpected error on {method} {endpoint}: {e}")
402
+ return None
403
+
404
+ # All retries exhausted
405
+ if last_error:
406
+ logger.error(f"Request failed after {MAX_RETRIES + 1} attempts: {last_error}")
407
+ return None
408
+
409
+ async def _make_request_no_retry(
410
+ self, method: str, endpoint: str, **kwargs: Any
411
+ ) -> dict[str, Any] | None:
412
+ """Make an HTTP request without retry (for non-critical operations).
413
+
414
+ Args:
415
+ method: HTTP method (GET, POST, PATCH, DELETE)
416
+ endpoint: API endpoint path
417
+ **kwargs: Additional arguments to pass to httpx
418
+
419
+ Returns:
420
+ Response JSON or None on error
421
+ """
422
+ if not self.is_configured():
423
+ logger.error("GitHub integration not configured")
424
+ return None
425
+
426
+ url = f"{self.api_url}/repos/{self.repository}/{endpoint}"
427
+
428
+ try:
429
+ if self._client:
430
+ response = await self._client.request(method, url, **kwargs)
431
+ else:
432
+ async with httpx.AsyncClient(headers=self._get_headers()) as client:
433
+ response = await client.request(method, url, **kwargs)
434
+
435
+ response.raise_for_status()
436
+ return response.json() if response.text else {}
437
+
438
+ except httpx.HTTPStatusError as e:
439
+ logger.error(f"HTTP error: {e.response.status_code} - {e.response.text}")
440
+ return None
441
+ except Exception as e: # pylint: disable=broad-exception-caught
442
+ logger.error(f"Request failed: {e}")
443
+ return None
444
+
445
+ async def _make_paginated_request(
446
+ self, endpoint: str, max_pages: int = 100
447
+ ) -> list[dict[str, Any]]:
448
+ """Make a paginated GET request to GitHub API, fetching all pages.
449
+
450
+ GitHub API returns at most 100 items per page for list endpoints.
451
+ This method follows pagination links to fetch ALL items.
452
+
453
+ Args:
454
+ endpoint: API endpoint path (e.g., "pulls/123/comments")
455
+ max_pages: Maximum number of pages to fetch (safety limit)
456
+
457
+ Returns:
458
+ Combined list of all items across all pages
459
+ """
460
+ if not self.is_configured():
461
+ logger.error("GitHub integration not configured")
462
+ return []
463
+
464
+ all_items: list[dict[str, Any]] = []
465
+ url: str | None = f"{self.api_url}/repos/{self.repository}/{endpoint}"
466
+ page_count = 0
467
+
468
+ # Add per_page=100 to maximize items per request
469
+ if "?" in endpoint:
470
+ url = f"{url}&per_page=100"
471
+ else:
472
+ url = f"{url}?per_page=100"
473
+
474
+ while url and page_count < max_pages:
475
+ page_count += 1
476
+ try:
477
+ if self._client:
478
+ response = await self._client.request("GET", url)
479
+ else:
480
+ async with httpx.AsyncClient(
481
+ timeout=httpx.Timeout(30.0), headers=self._get_headers()
482
+ ) as client:
483
+ response = await client.request("GET", url)
484
+
485
+ response.raise_for_status()
486
+ items = response.json()
487
+
488
+ if isinstance(items, list):
489
+ all_items.extend(items)
490
+ logger.debug(
491
+ f"Fetched page {page_count} with {len(items)} items "
492
+ f"(total: {len(all_items)})"
493
+ )
494
+ else:
495
+ # Not a list response, shouldn't happen for list endpoints
496
+ logger.warning(f"Unexpected response type on page {page_count}")
497
+ break
498
+
499
+ # Check for next page in Link header
500
+ # Format: <url>; rel="next", <url>; rel="last"
501
+ link_header = response.headers.get("Link", "")
502
+ url = None # Reset for next iteration
503
+
504
+ if link_header:
505
+ for link in link_header.split(","):
506
+ if 'rel="next"' in link:
507
+ # Extract URL from <url>
508
+ match = re.search(r"<([^>]+)>", link)
509
+ if match:
510
+ url = match.group(1)
511
+ break
512
+
513
+ except httpx.HTTPStatusError as e:
514
+ logger.error(f"HTTP error during pagination: {e.response.status_code}")
515
+ break
516
+ except Exception as e: # pylint: disable=broad-exception-caught
517
+ logger.error(f"Error during pagination: {e}")
518
+ break
519
+
520
+ if page_count >= max_pages:
521
+ logger.warning(f"Reached max pages limit ({max_pages}), results may be incomplete")
522
+
523
+ logger.debug(
524
+ f"Paginated request complete: {len(all_items)} total items from {page_count} page(s)"
525
+ )
526
+ return all_items
527
+
528
+ # ==================== PR Comments ====================
529
+
530
+ async def post_comment(self, comment_body: str) -> bool:
531
+ """Post a general comment to a PR.
532
+
533
+ Args:
534
+ comment_body: The markdown content to post
535
+
536
+ Returns:
537
+ True if successful, False otherwise
538
+ """
539
+ result = await self._make_request(
540
+ "POST",
541
+ f"issues/{self.pr_number}/comments",
542
+ json={"body": comment_body},
543
+ )
544
+
545
+ if result:
546
+ logger.info(f"Successfully posted comment to PR #{self.pr_number}")
547
+ return True
548
+ return False
549
+
550
+ async def update_or_create_comment(
551
+ self, comment_body: str, identifier: str = "<!-- iam-policy-validator -->"
552
+ ) -> bool:
553
+ """Update an existing comment or create a new one.
554
+
555
+ This method will look for an existing comment with the identifier
556
+ and update it, or create a new comment if none exists.
557
+
558
+ Args:
559
+ comment_body: The markdown content to post
560
+ identifier: HTML comment identifier to find existing comments
561
+
562
+ Returns:
563
+ True if successful, False otherwise
564
+ """
565
+ # Add identifier to comment body
566
+ full_body = f"{identifier}\n{comment_body}"
567
+
568
+ # Try to find and update existing comment
569
+ existing_comment_id = await self._find_existing_comment(identifier)
570
+
571
+ if existing_comment_id:
572
+ return await self._update_comment(existing_comment_id, full_body)
573
+ else:
574
+ return await self.post_comment(full_body)
575
+
576
+ async def post_multipart_comments(
577
+ self,
578
+ comment_parts: list[str],
579
+ identifier: str = "<!-- iam-policy-validator -->",
580
+ ) -> bool:
581
+ """Post or update multiple related comments (for large reports).
582
+
583
+ For single-part comments (most common case), this will UPDATE the
584
+ existing comment in place rather than delete and recreate it.
585
+ This preserves comment history and avoids PR timeline noise.
586
+
587
+ For multi-part comments:
588
+ 1. Delete all old comments with the identifier
589
+ 2. Post new comments in sequence with part indicators
590
+ 3. Validate each part stays under GitHub's limit
591
+
592
+ Args:
593
+ comment_parts: List of comment bodies to post (split into parts)
594
+ identifier: HTML comment identifier to find/manage existing comments
595
+
596
+ Returns:
597
+ True if all parts posted successfully, False otherwise
598
+ """
599
+ # GitHub's actual limit
600
+ github_comment_limit = 65536
601
+
602
+ total_parts = len(comment_parts)
603
+
604
+ # Optimization: For single-part comments, use update-or-create
605
+ # This preserves the existing comment and avoids PR timeline noise
606
+ if total_parts == 1:
607
+ part_body = comment_parts[0]
608
+ full_body = f"{identifier}\n\n{part_body}"
609
+
610
+ # Safety check: ensure we don't exceed GitHub's limit
611
+ if len(full_body) > github_comment_limit:
612
+ logger.error(
613
+ f"Comment exceeds GitHub's limit ({len(full_body)} > {github_comment_limit} chars). "
614
+ f"Comment will be truncated."
615
+ )
616
+ available_space = github_comment_limit - 500
617
+ truncated_body = part_body[:available_space]
618
+ truncation_warning = (
619
+ "\n\n---\n\n"
620
+ "> ⚠️ **This comment was truncated to fit GitHub's size limit**\n"
621
+ ">\n"
622
+ "> Download the full report using `--output report.json` or "
623
+ "`--format markdown --output report.md`\n"
624
+ )
625
+ full_body = f"{identifier}\n\n{truncated_body}{truncation_warning}"
626
+
627
+ success = await self.update_or_create_comment(full_body, identifier)
628
+ if success:
629
+ logger.info("Successfully updated summary comment")
630
+ return success
631
+
632
+ # Multi-part: Delete all existing comments with this identifier first
633
+ await self._delete_comments_with_identifier(identifier)
634
+
635
+ # Post each part
636
+ success = True
637
+
638
+ for part_num, part_body in enumerate(comment_parts, 1):
639
+ # Add identifier and part indicator
640
+ part_indicator = f"**(Part {part_num}/{total_parts})**"
641
+ full_body = f"{identifier}\n{part_indicator}\n\n{part_body}"
642
+
643
+ # Safety check: ensure we don't exceed GitHub's limit
644
+ if len(full_body) > github_comment_limit:
645
+ logger.error(
646
+ f"Part {part_num}/{total_parts} exceeds GitHub's comment limit "
647
+ f"({len(full_body)} > {github_comment_limit} chars). "
648
+ f"This part will be truncated."
649
+ )
650
+ # Truncate with warning message
651
+ available_space = github_comment_limit - 500 # Reserve space for truncation message
652
+ truncated_body = part_body[:available_space]
653
+ truncation_warning = (
654
+ "\n\n---\n\n"
655
+ "> ⚠️ **This comment was truncated to fit GitHub's size limit**\n"
656
+ ">\n"
657
+ "> Download the full report using `--output report.json` or "
658
+ "`--format markdown --output report.md`\n"
659
+ )
660
+ full_body = (
661
+ f"{identifier}\n{part_indicator}\n\n{truncated_body}{truncation_warning}"
662
+ )
663
+
664
+ if not await self.post_comment(full_body):
665
+ logger.error(f"Failed to post comment part {part_num}/{total_parts}")
666
+ success = False
667
+ else:
668
+ logger.debug(
669
+ f"Posted part {part_num}/{total_parts} ({len(full_body):,} characters)"
670
+ )
671
+
672
+ if success:
673
+ logger.info(f"Successfully posted {total_parts} comment part(s)")
674
+
675
+ return success
676
+
677
+ async def _delete_comments_with_identifier(self, identifier: str) -> int:
678
+ """Delete all comments with the given identifier.
679
+
680
+ Args:
681
+ identifier: HTML comment identifier to find comments
682
+
683
+ Returns:
684
+ Number of comments deleted
685
+ """
686
+ result = await self._make_request("GET", f"issues/{self.pr_number}/comments")
687
+
688
+ deleted_count = 0
689
+ if result and isinstance(result, list):
690
+ for comment in result:
691
+ if not isinstance(comment, dict):
692
+ continue
693
+
694
+ body = comment.get("body", "")
695
+ comment_id = comment.get("id")
696
+
697
+ if identifier in str(body) and isinstance(comment_id, int):
698
+ delete_result = await self._make_request(
699
+ "DELETE", f"issues/comments/{comment_id}"
700
+ )
701
+ if delete_result is not None:
702
+ deleted_count += 1
703
+
704
+ if deleted_count > 0:
705
+ logger.info(f"Deleted {deleted_count} old comments")
706
+
707
+ return deleted_count
708
+
709
+ async def _find_existing_comment(self, identifier: str) -> int | None:
710
+ """Find an existing comment with the given identifier."""
711
+ result = await self._make_request("GET", f"issues/{self.pr_number}/comments")
712
+
713
+ if result and isinstance(result, list):
714
+ for comment in result:
715
+ if isinstance(comment, dict) and identifier in str(comment.get("body", "")):
716
+ comment_id = comment.get("id")
717
+ if isinstance(comment_id, int):
718
+ return comment_id
719
+
720
+ return None
721
+
722
+ async def _update_comment(self, comment_id: int, comment_body: str) -> bool:
723
+ """Update an existing GitHub comment."""
724
+ result = await self._make_request(
725
+ "PATCH",
726
+ f"issues/comments/{comment_id}",
727
+ json={"body": comment_body},
728
+ )
729
+
730
+ if result:
731
+ logger.info(f"Successfully updated comment {comment_id}")
732
+ return True
733
+ return False
734
+
735
+ # ==================== PR Review Comments (Line-specific) ====================
736
+
737
+ async def get_review_comments(self) -> list[dict[str, Any]]:
738
+ """Get all review comments on the PR with pagination.
739
+
740
+ Fetches ALL review comments across all pages. This is critical for
741
+ proper comment deduplication and cleanup when there are many findings.
742
+
743
+ Returns:
744
+ List of all review comment dicts
745
+ """
746
+ return await self._make_paginated_request(f"pulls/{self.pr_number}/comments")
747
+
748
+ async def get_bot_review_comments_with_location(
749
+ self, identifier: str = constants.BOT_IDENTIFIER
750
+ ) -> dict[tuple[str, int, str], dict[str, Any]]:
751
+ """Get bot review comments indexed by file path, line number, and issue type.
752
+
753
+ This enables efficient lookup to update existing comments.
754
+ Uses (path, line, issue_type) as key to support multiple issues at the same line.
755
+
756
+ Args:
757
+ identifier: String to identify bot comments
758
+
759
+ Returns:
760
+ Dict mapping (file_path, line_number, issue_type) to comment metadata dict
761
+ Comment dict contains: id, body, path, line, issue_type, commit_id
762
+ """
763
+ comments = await self.get_review_comments()
764
+ bot_comments_map: dict[tuple[str, int, str], dict[str, Any]] = {}
765
+
766
+ for comment in comments:
767
+ if not isinstance(comment, dict):
768
+ continue
769
+
770
+ body = comment.get("body", "")
771
+ comment_id = comment.get("id")
772
+ path = comment.get("path")
773
+ line = comment.get("line") or comment.get("original_line")
774
+
775
+ # Check if this is a bot comment with valid location
776
+ if (
777
+ identifier in str(body)
778
+ and isinstance(comment_id, int)
779
+ and isinstance(path, str)
780
+ and isinstance(line, int)
781
+ ):
782
+ # Extract issue type from HTML comment
783
+ issue_type_match = re.search(r"<!-- issue-type: (\w+) -->", body)
784
+ issue_type = issue_type_match.group(1) if issue_type_match else "unknown"
785
+
786
+ key = (path, line, issue_type)
787
+ bot_comments_map[key] = {
788
+ "id": comment_id,
789
+ "body": body,
790
+ "path": path,
791
+ "line": line,
792
+ "issue_type": issue_type,
793
+ "commit_id": comment.get("commit_id"),
794
+ }
795
+
796
+ logger.debug(f"Found {len(bot_comments_map)} bot review comments at specific locations")
797
+ return bot_comments_map
798
+
799
+ async def delete_review_comment(self, comment_id: int) -> bool:
800
+ """Delete a specific review comment.
801
+
802
+ Args:
803
+ comment_id: ID of the comment to delete
804
+
805
+ Returns:
806
+ True if successful, False otherwise
807
+ """
808
+ result = await self._make_request(
809
+ "DELETE",
810
+ f"pulls/comments/{comment_id}",
811
+ )
812
+
813
+ if result is not None: # DELETE returns empty dict on success
814
+ logger.debug(f"Successfully deleted review comment {comment_id}")
815
+ return True
816
+ return False
817
+
818
+ async def _delete_comments_parallel(
819
+ self, comment_ids: list[int], max_concurrent: int = MAX_CONCURRENT_API_CALLS
820
+ ) -> tuple[int, int]:
821
+ """Delete multiple review comments in parallel with controlled concurrency.
822
+
823
+ Uses a semaphore to limit concurrent API calls, preventing rate limit issues
824
+ while still being much faster than sequential deletion.
825
+
826
+ Args:
827
+ comment_ids: List of comment IDs to delete
828
+ max_concurrent: Maximum number of concurrent deletions (default: 10)
829
+
830
+ Returns:
831
+ Tuple of (successful_count, failed_count)
832
+ """
833
+ if not comment_ids:
834
+ return (0, 0)
835
+
836
+ semaphore = asyncio.Semaphore(max_concurrent)
837
+
838
+ async def delete_with_limit(comment_id: int) -> bool:
839
+ async with semaphore:
840
+ return await self.delete_review_comment(comment_id)
841
+
842
+ # Run all deletions in parallel (semaphore controls actual concurrency)
843
+ results = await asyncio.gather(
844
+ *[delete_with_limit(cid) for cid in comment_ids],
845
+ return_exceptions=True,
846
+ )
847
+
848
+ successful = sum(1 for r in results if r is True)
849
+ failed = len(results) - successful
850
+
851
+ if successful > 0:
852
+ logger.info(f"Parallel deletion: {successful} deleted, {failed} failed")
853
+
854
+ return (successful, failed)
855
+
856
+ # NOTE: resolve_review_comment was removed because GitHub REST API doesn't support
857
+ # resolving review comments via {"state": "resolved"}. Resolving review threads
858
+ # requires the GraphQL API with resolveReviewThread mutation.
859
+ # See: https://docs.github.com/en/graphql/reference/mutations#resolvereviewthread
860
+
861
+ async def update_review_comment(self, comment_id: int, new_body: str) -> bool:
862
+ """Update the body text of an existing review comment.
863
+
864
+ Args:
865
+ comment_id: ID of the comment to update
866
+ new_body: New comment text (markdown supported)
867
+
868
+ Returns:
869
+ True if successful, False otherwise
870
+ """
871
+ result = await self._make_request(
872
+ "PATCH",
873
+ f"pulls/comments/{comment_id}",
874
+ json={"body": new_body},
875
+ )
876
+
877
+ if result is not None:
878
+ logger.debug(f"Successfully updated review comment {comment_id}")
879
+ return True
880
+ return False
881
+
882
+ async def cleanup_bot_review_comments(self, identifier: str = constants.BOT_IDENTIFIER) -> int:
883
+ """Delete all review comments from the bot (from previous runs).
884
+
885
+ This ensures old/outdated comments are removed before posting new ones.
886
+ Uses parallel deletion for speed when there are many comments.
887
+
888
+ Args:
889
+ identifier: String to identify bot comments
890
+
891
+ Returns:
892
+ Number of comments deleted
893
+ """
894
+ comments = await self.get_review_comments()
895
+
896
+ # Collect all bot comment IDs to delete
897
+ comment_ids_to_delete: list[int] = []
898
+ for comment in comments:
899
+ if not isinstance(comment, dict):
900
+ continue
901
+
902
+ body = comment.get("body", "")
903
+ comment_id = comment.get("id")
904
+
905
+ # Check if this is a bot comment
906
+ if identifier in str(body) and isinstance(comment_id, int):
907
+ comment_ids_to_delete.append(comment_id)
908
+
909
+ if not comment_ids_to_delete:
910
+ return 0
911
+
912
+ # Delete all bot comments in parallel
913
+ successful, _failed = await self._delete_comments_parallel(comment_ids_to_delete)
914
+
915
+ if successful > 0:
916
+ logger.info(f"Cleaned up {successful} old review comments")
917
+
918
+ return successful
919
+
920
+ # NOTE: cleanup_bot_review_comments_by_resolving was removed because it depended on
921
+ # resolve_review_comment which doesn't work with GitHub REST API.
922
+ # Use cleanup_bot_review_comments (deletion) instead, or implement GraphQL-based
923
+ # resolution if audit trail preservation is needed.
924
+
925
+ async def create_review_comment(
926
+ self,
927
+ commit_id: str,
928
+ file_path: str,
929
+ line: int,
930
+ body: str,
931
+ side: str = "RIGHT",
932
+ ) -> bool:
933
+ """Create a line-specific review comment on a file in the PR.
934
+
935
+ Args:
936
+ commit_id: The SHA of the commit to comment on
937
+ file_path: The relative path to the file in the repo
938
+ line: The line number in the file to comment on
939
+ body: The comment text (markdown supported)
940
+ side: Which side of the diff ("LEFT" for deletion, "RIGHT" for addition)
941
+
942
+ Returns:
943
+ True if successful, False otherwise
944
+ """
945
+ result = await self._make_request(
946
+ "POST",
947
+ f"pulls/{self.pr_number}/comments",
948
+ json={
949
+ "commit_id": commit_id,
950
+ "path": file_path,
951
+ "line": line,
952
+ "side": side,
953
+ "body": body,
954
+ },
955
+ )
956
+
957
+ if result:
958
+ logger.info(f"Successfully posted review comment on {file_path}:{line}")
959
+ return True
960
+ return False
961
+
962
+ async def create_review_with_comments(
963
+ self,
964
+ comments: list[dict[str, Any]],
965
+ body: str = "",
966
+ event: ReviewEvent = ReviewEvent.COMMENT,
967
+ ) -> bool:
968
+ """Create a review with multiple line-specific comments.
969
+
970
+ Args:
971
+ comments: List of comment dicts with keys: path, line, body, (optional) side
972
+ body: The overall review body text
973
+ event: The review event type (APPROVE, REQUEST_CHANGES, COMMENT)
974
+
975
+ Returns:
976
+ True if successful, False otherwise
977
+
978
+ Example:
979
+ comments = [
980
+ {
981
+ "path": "policies/policy.json",
982
+ "line": 5,
983
+ "body": "Invalid action detected here",
984
+ },
985
+ {
986
+ "path": "policies/policy.json",
987
+ "line": 12,
988
+ "body": "Missing condition key",
989
+ },
990
+ ]
991
+ """
992
+ # Get the latest commit SHA
993
+ pr_info = await self.get_pr_info()
994
+ if not pr_info:
995
+ return False
996
+
997
+ head_info = pr_info.get("head")
998
+ if not isinstance(head_info, dict):
999
+ logger.error("Invalid PR head information")
1000
+ return False
1001
+
1002
+ commit_id = head_info.get("sha")
1003
+ if not isinstance(commit_id, str):
1004
+ logger.error("Could not get commit SHA from PR")
1005
+ return False
1006
+
1007
+ # Format comments for the review API
1008
+ formatted_comments: list[dict[str, Any]] = []
1009
+ for comment in comments:
1010
+ formatted_comments.append(
1011
+ {
1012
+ "path": comment["path"],
1013
+ "line": comment["line"],
1014
+ "body": comment["body"],
1015
+ "side": comment.get("side", "RIGHT"),
1016
+ }
1017
+ )
1018
+
1019
+ result = await self._make_request(
1020
+ "POST",
1021
+ f"pulls/{self.pr_number}/reviews",
1022
+ json={
1023
+ "commit_id": commit_id,
1024
+ "body": body,
1025
+ "event": event.value,
1026
+ "comments": formatted_comments,
1027
+ },
1028
+ )
1029
+
1030
+ if result:
1031
+ logger.info(f"Successfully created review with {len(comments)} comments")
1032
+ return True
1033
+ return False
1034
+
1035
+ async def update_or_create_review_comments(
1036
+ self,
1037
+ comments: list[dict[str, Any]],
1038
+ body: str = "",
1039
+ event: ReviewEvent = ReviewEvent.COMMENT,
1040
+ identifier: str = constants.REVIEW_IDENTIFIER,
1041
+ validated_files: set[str] | None = None,
1042
+ skip_cleanup: bool = False,
1043
+ ) -> bool:
1044
+ """Smart comment management using fingerprint-based matching.
1045
+
1046
+ This method uses finding fingerprints (stable IDs) as the PRIMARY key
1047
+ for matching comments, with location as SECONDARY for new comments.
1048
+
1049
+ Strategy:
1050
+ 1. Index existing comments by finding_id (from HTML comment)
1051
+ 2. For each new comment:
1052
+ - If finding_id exists: UPDATE (even if line changed)
1053
+ - If new: CREATE at specified line
1054
+ 3. Delete comments whose finding_id is not in new set (resolved)
1055
+ (unless skip_cleanup=True)
1056
+
1057
+ Note: Comments stay at their original line even if the issue moved,
1058
+ because GitHub doesn't support moving review comments. The comment
1059
+ body is updated to reflect any changes.
1060
+
1061
+ Args:
1062
+ comments: List of comment dicts with keys: path, line, body, (optional) side
1063
+ body: The overall review body text
1064
+ event: The review event type (APPROVE, REQUEST_CHANGES, COMMENT)
1065
+ identifier: String to identify bot comments (for matching existing)
1066
+ validated_files: Set of all file paths that were validated in this run.
1067
+ Used to clean up comments for files that no longer have findings.
1068
+ If None, only files with current findings are considered.
1069
+ skip_cleanup: If True, skip the cleanup phase (deleting resolved comments).
1070
+ Use this in streaming mode where files are processed one at a time
1071
+ to avoid deleting comments from files processed earlier.
1072
+
1073
+ Returns:
1074
+ True if successful, False otherwise
1075
+
1076
+ Example:
1077
+ # First run: Creates 3 comments
1078
+ comments = [
1079
+ {"path": "policy.json", "line": 5, "body": "<!-- finding-id: abc123 -->Issue A"},
1080
+ {"path": "policy.json", "line": 10, "body": "<!-- finding-id: def456 -->Issue B"},
1081
+ ]
1082
+
1083
+ # Second run: Same findings, even if lines shifted
1084
+ comments = [
1085
+ {"path": "policy.json", "line": 8, "body": "<!-- finding-id: abc123 -->Issue A (updated)"},
1086
+ {"path": "policy.json", "line": 15, "body": "<!-- finding-id: def456 -->Issue B"},
1087
+ ]
1088
+ # Result: Both comments UPDATED in place (not recreated), preserving conversation history
1089
+ """
1090
+ # Step 1: Get existing bot comments indexed by fingerprint
1091
+ existing_by_fingerprint = await self._get_bot_comments_by_fingerprint(identifier)
1092
+ logger.debug(
1093
+ f"Found {len(existing_by_fingerprint)} existing bot comments with fingerprints"
1094
+ )
1095
+
1096
+ # Also get location-based index for fallback (comments without fingerprints)
1097
+ existing_by_location = await self.get_bot_review_comments_with_location(identifier)
1098
+
1099
+ seen_fingerprints: set[str] = set()
1100
+ seen_locations: set[tuple[str, int, str]] = set()
1101
+ # Track comment IDs that were updated/matched - these should NOT be deleted
1102
+ matched_comment_ids: set[int] = set()
1103
+ updated_count = 0
1104
+ new_comments_for_review: list[dict[str, Any]] = []
1105
+
1106
+ for comment in comments:
1107
+ path = comment["path"]
1108
+ line = comment["line"]
1109
+ new_body = comment["body"]
1110
+
1111
+ # Try fingerprint-based matching first
1112
+ finding_id = self._extract_finding_id(new_body)
1113
+
1114
+ if finding_id:
1115
+ seen_fingerprints.add(finding_id)
1116
+
1117
+ if finding_id in existing_by_fingerprint:
1118
+ existing = existing_by_fingerprint[finding_id]
1119
+ matched_comment_ids.add(existing["id"])
1120
+ # Check if update needed (body changed)
1121
+ if existing["body"] != new_body:
1122
+ success = await self.update_review_comment(existing["id"], new_body)
1123
+ if success:
1124
+ updated_count += 1
1125
+ logger.debug(
1126
+ f"Updated comment for finding {finding_id[:8]}... "
1127
+ f"(was at {existing['path']}:{existing['line']})"
1128
+ )
1129
+ else:
1130
+ logger.debug(f"Comment for finding {finding_id[:8]}... unchanged")
1131
+ continue
1132
+
1133
+ # Fallback: location-based matching
1134
+ # This handles both:
1135
+ # 1. Legacy comments without fingerprints
1136
+ # 2. Comments with fingerprints that don't match (e.g., path changed)
1137
+ issue_type_match = re.search(r"<!-- issue-type: (\w+) -->", new_body)
1138
+ issue_type = issue_type_match.group(1) if issue_type_match else "unknown"
1139
+ location = (path, line, issue_type)
1140
+ seen_locations.add(location)
1141
+
1142
+ existing_loc = existing_by_location.get(location)
1143
+ if existing_loc:
1144
+ # Found existing comment at same location with same issue type
1145
+ # Update it (this handles both legacy comments and fingerprint mismatches)
1146
+ matched_comment_ids.add(existing_loc["id"])
1147
+ if existing_loc["body"] != new_body:
1148
+ success = await self.update_review_comment(existing_loc["id"], new_body)
1149
+ if success:
1150
+ updated_count += 1
1151
+ if finding_id:
1152
+ logger.debug(
1153
+ f"Updated comment at {path}:{line} (fingerprint mismatch, location match)"
1154
+ )
1155
+ else:
1156
+ logger.debug(f"Updated legacy comment at {path}:{line}")
1157
+ continue
1158
+
1159
+ # New comment - collect for batch creation
1160
+ new_comments_for_review.append(comment)
1161
+
1162
+ # Step 2: Create new comments via review API (if any)
1163
+ created_count = 0
1164
+ if new_comments_for_review:
1165
+ success = await self.create_review_with_comments(
1166
+ new_comments_for_review,
1167
+ body=body,
1168
+ event=event,
1169
+ )
1170
+ if success:
1171
+ created_count = len(new_comments_for_review)
1172
+ logger.info(f"Created {created_count} new review comments")
1173
+ else:
1174
+ logger.error("Failed to create new review comments")
1175
+ return False
1176
+
1177
+ # Step 3: Delete resolved comments (unless skip_cleanup is True)
1178
+ # In streaming mode, we skip cleanup because we're processing files one at a time
1179
+ # and don't want to delete comments from files processed earlier in the stream
1180
+ deleted_count = 0
1181
+
1182
+ if skip_cleanup:
1183
+ logger.debug("Skipping cleanup phase (streaming mode)")
1184
+ else:
1185
+ # Priority: fingerprint-based deletion, then location-based for legacy
1186
+ # Also clean up comments for files removed from the PR or files that were
1187
+ # validated but no longer have findings
1188
+ files_with_findings = {c["path"] for c in comments}
1189
+
1190
+ # Use validated_files if provided, otherwise fall back to files_with_findings
1191
+ # This ensures we clean up comments for files that were validated but have no findings
1192
+ files_in_scope = validated_files if validated_files is not None else files_with_findings
1193
+
1194
+ # Get current PR files to detect removed files
1195
+ # Note: get_pr_files() returns [] on error, so we check for non-empty result
1196
+ pr_files = await self.get_pr_files()
1197
+ if pr_files:
1198
+ current_pr_files: set[str] | None = {f["filename"] for f in pr_files}
1199
+ else:
1200
+ # Empty result could be an API error - fall back to batch-only cleanup
1201
+ # to avoid accidentally deleting valid comments
1202
+ logger.debug("Could not fetch PR files for cleanup, using batch-only mode")
1203
+ current_pr_files = None
1204
+
1205
+ def should_delete_comment(existing_path: str) -> bool:
1206
+ """Check if a comment should be deleted based on file status.
1207
+
1208
+ A comment should be deleted if the file is part of this PR.
1209
+ The fingerprint check (done by caller) ensures we only delete
1210
+ comments for findings that are no longer present.
1211
+
1212
+ This aggressive cleanup ensures stale comments are removed even if:
1213
+ - The file was fixed but not re-validated in this specific run
1214
+ - The validation runs on a subset of PR files
1215
+
1216
+ We preserve comments for files NOT in the PR to avoid accidentally
1217
+ deleting comments from other branches/PRs.
1218
+ """
1219
+ # If we successfully fetched PR files, delete comments for any PR file
1220
+ # whose finding is no longer present (fingerprint check done by caller)
1221
+ if current_pr_files is not None:
1222
+ return existing_path in current_pr_files
1223
+
1224
+ # Fallback: if we couldn't fetch PR files, only clean up validated files
1225
+ # to avoid accidentally deleting valid comments
1226
+ return existing_path in files_in_scope
1227
+
1228
+ # Collect all comment IDs to delete
1229
+ # Delete by fingerprint (primary) - comments that:
1230
+ # 1. Were NOT matched (updated) in this run
1231
+ # 2. Have a fingerprint not in the new findings
1232
+ # 3. Are in files that are part of this PR/validation
1233
+ comment_ids_to_delete: list[int] = []
1234
+
1235
+ for fingerprint, existing in existing_by_fingerprint.items():
1236
+ comment_id = existing["id"]
1237
+ # Skip if this comment was matched/updated via location fallback
1238
+ if comment_id in matched_comment_ids:
1239
+ continue
1240
+ if fingerprint not in seen_fingerprints and should_delete_comment(existing["path"]):
1241
+ comment_ids_to_delete.append(comment_id)
1242
+ logger.debug(f"Marking for deletion: resolved comment {fingerprint[:8]}...")
1243
+
1244
+ # Delete by location (legacy comments without fingerprints)
1245
+ for location, existing in existing_by_location.items():
1246
+ comment_id = existing["id"]
1247
+ # Skip if already matched/updated
1248
+ if comment_id in matched_comment_ids:
1249
+ continue
1250
+ # Skip if already marked for deletion by fingerprint above
1251
+ existing_fingerprint = self._extract_finding_id(existing.get("body", ""))
1252
+ if existing_fingerprint:
1253
+ continue # Already handled above
1254
+
1255
+ if location not in seen_locations and should_delete_comment(existing["path"]):
1256
+ comment_ids_to_delete.append(comment_id)
1257
+ logger.debug(f"Marking for deletion: resolved legacy comment at {location}")
1258
+
1259
+ # Delete all collected comments in parallel
1260
+ if comment_ids_to_delete:
1261
+ deleted_count, _failed = await self._delete_comments_parallel(comment_ids_to_delete)
1262
+
1263
+ logger.info(
1264
+ f"Review comment management: {updated_count} updated, "
1265
+ f"{created_count} created, {deleted_count} deleted (resolved)"
1266
+ )
1267
+
1268
+ return True
1269
+
1270
+ def _extract_finding_id(self, body: str) -> str | None:
1271
+ """Extract finding ID from comment body HTML comment.
1272
+
1273
+ Args:
1274
+ body: Comment body text
1275
+
1276
+ Returns:
1277
+ 16-character finding ID hash, or None if not found
1278
+ """
1279
+ match = re.search(r"<!-- finding-id: ([a-f0-9]{16}) -->", body)
1280
+ return match.group(1) if match else None
1281
+
1282
+ async def _get_bot_comments_by_fingerprint(self, identifier: str) -> dict[str, dict[str, Any]]:
1283
+ """Index existing bot comments by their finding fingerprint.
1284
+
1285
+ Args:
1286
+ identifier: String to identify bot comments
1287
+
1288
+ Returns:
1289
+ Dict mapping finding_id to comment metadata dict
1290
+ Comment dict contains: id, body, path, line
1291
+ """
1292
+ comments = await self.get_review_comments()
1293
+ indexed: dict[str, dict[str, Any]] = {}
1294
+
1295
+ for comment in comments:
1296
+ if not isinstance(comment, dict):
1297
+ continue
1298
+
1299
+ body = comment.get("body", "")
1300
+ if identifier not in str(body):
1301
+ continue
1302
+
1303
+ finding_id = self._extract_finding_id(body)
1304
+ if finding_id:
1305
+ indexed[finding_id] = {
1306
+ "id": comment["id"],
1307
+ "body": body,
1308
+ "path": comment.get("path", ""),
1309
+ "line": comment.get("line") or comment.get("original_line"),
1310
+ }
1311
+
1312
+ return indexed
1313
+
1314
+ # ==================== PR Labels ====================
1315
+
1316
+ async def add_labels(self, labels: list[str]) -> bool:
1317
+ """Add labels to the PR.
1318
+
1319
+ Args:
1320
+ labels: List of label names to add
1321
+
1322
+ Returns:
1323
+ True if successful, False otherwise
1324
+ """
1325
+ result = await self._make_request(
1326
+ "POST",
1327
+ f"issues/{self.pr_number}/labels",
1328
+ json={"labels": labels},
1329
+ )
1330
+
1331
+ if result:
1332
+ logger.info(f"Successfully added labels: {', '.join(labels)}")
1333
+ return True
1334
+ return False
1335
+
1336
+ async def remove_label(self, label: str) -> bool:
1337
+ """Remove a label from the PR.
1338
+
1339
+ Args:
1340
+ label: Label name to remove
1341
+
1342
+ Returns:
1343
+ True if successful, False otherwise
1344
+ """
1345
+ result = await self._make_request(
1346
+ "DELETE",
1347
+ f"issues/{self.pr_number}/labels/{label}",
1348
+ )
1349
+
1350
+ if result is not None: # DELETE returns empty dict on success
1351
+ logger.info(f"Successfully removed label: {label}")
1352
+ return True
1353
+ return False
1354
+
1355
+ async def get_labels(self) -> list[str]:
1356
+ """Get all labels on the PR.
1357
+
1358
+ Returns:
1359
+ List of label names
1360
+ """
1361
+ result = await self._make_request(
1362
+ "GET",
1363
+ f"issues/{self.pr_number}/labels",
1364
+ )
1365
+
1366
+ if result and isinstance(result, list):
1367
+ labels: list[str] = []
1368
+ for label in result:
1369
+ if isinstance(label, dict):
1370
+ name = label.get("name")
1371
+ if isinstance(name, str):
1372
+ labels.append(name)
1373
+ return labels
1374
+ return []
1375
+
1376
+ async def set_labels(self, labels: list[str]) -> bool:
1377
+ """Set labels on the PR, replacing any existing labels.
1378
+
1379
+ Args:
1380
+ labels: List of label names to set
1381
+
1382
+ Returns:
1383
+ True if successful, False otherwise
1384
+ """
1385
+ result = await self._make_request(
1386
+ "PUT",
1387
+ f"issues/{self.pr_number}/labels",
1388
+ json={"labels": labels},
1389
+ )
1390
+
1391
+ if result:
1392
+ logger.info(f"Successfully set labels: {', '.join(labels)}")
1393
+ return True
1394
+ return False
1395
+
1396
+ # ==================== PR Information ====================
1397
+
1398
+ async def get_pr_info(self) -> dict[str, Any] | None:
1399
+ """Get detailed information about the PR.
1400
+
1401
+ Returns:
1402
+ PR information dict or None on error
1403
+ """
1404
+ return await self._make_request("GET", f"pulls/{self.pr_number}")
1405
+
1406
+ async def get_pr_files(self) -> list[dict[str, Any]]:
1407
+ """Get list of files changed in the PR.
1408
+
1409
+ Returns:
1410
+ List of file information dicts
1411
+ """
1412
+ result = await self._make_request("GET", f"pulls/{self.pr_number}/files")
1413
+
1414
+ if result and isinstance(result, list):
1415
+ return result
1416
+ return []
1417
+
1418
+ async def get_pr_commits(self) -> list[dict[str, Any]]:
1419
+ """Get list of commits in the PR.
1420
+
1421
+ Returns:
1422
+ List of commit information dicts
1423
+ """
1424
+ result = await self._make_request("GET", f"pulls/{self.pr_number}/commits")
1425
+
1426
+ if result and isinstance(result, list):
1427
+ return result
1428
+ return []
1429
+
1430
+ # ==================== PR Status ====================
1431
+
1432
+ async def set_commit_status(
1433
+ self,
1434
+ state: str,
1435
+ context: str,
1436
+ description: str,
1437
+ target_url: str | None = None,
1438
+ ) -> bool:
1439
+ """Set a commit status on the PR's head commit.
1440
+
1441
+ Args:
1442
+ state: Status state ("error", "failure", "pending", "success")
1443
+ context: A string label to differentiate this status from others
1444
+ description: A short description of the status
1445
+ target_url: Optional URL to link to more details
1446
+
1447
+ Returns:
1448
+ True if successful, False otherwise
1449
+ """
1450
+ pr_info = await self.get_pr_info()
1451
+ if not pr_info:
1452
+ return False
1453
+
1454
+ head_info = pr_info.get("head")
1455
+ if not isinstance(head_info, dict):
1456
+ return False
1457
+
1458
+ commit_sha = head_info.get("sha")
1459
+ if not isinstance(commit_sha, str):
1460
+ return False
1461
+
1462
+ payload: dict[str, Any] = {
1463
+ "state": state,
1464
+ "context": context,
1465
+ "description": description,
1466
+ }
1467
+ if target_url:
1468
+ payload["target_url"] = target_url
1469
+
1470
+ result = await self._make_request(
1471
+ "POST",
1472
+ f"statuses/{commit_sha}",
1473
+ json=payload,
1474
+ )
1475
+
1476
+ if result:
1477
+ logger.info(f"Successfully set commit status: {state}")
1478
+ return True
1479
+ return False
1480
+
1481
+ # ==================== CODEOWNERS and Ignore Commands ====================
1482
+
1483
+ async def get_codeowners_content(self) -> str | None:
1484
+ """Fetch CODEOWNERS file content from repository.
1485
+
1486
+ Results are cached per instance to avoid redundant API calls.
1487
+
1488
+ Searches in standard CODEOWNERS locations:
1489
+ - CODEOWNERS
1490
+ - .github/CODEOWNERS
1491
+ - docs/CODEOWNERS
1492
+
1493
+ Returns:
1494
+ CODEOWNERS file content as string, or None if not found
1495
+ """
1496
+ # Return cached result if already loaded
1497
+ if self._codeowners_loaded:
1498
+ return self._codeowners_cache
1499
+
1500
+ from iam_validator.core.codeowners import ( # pylint: disable=import-outside-toplevel
1501
+ CodeOwnersParser,
1502
+ )
1503
+
1504
+ for path in CodeOwnersParser.CODEOWNERS_PATHS:
1505
+ result = await self._make_request(
1506
+ "GET",
1507
+ f"contents/{path}",
1508
+ )
1509
+
1510
+ if result and isinstance(result, dict) and "content" in result:
1511
+ try:
1512
+ content = base64.b64decode(result["content"]).decode("utf-8")
1513
+ logger.debug(f"Found CODEOWNERS at {path}")
1514
+ # Cache the result
1515
+ self._codeowners_cache = content
1516
+ self._codeowners_loaded = True
1517
+ return content
1518
+ except (ValueError, UnicodeDecodeError) as e:
1519
+ logger.warning(f"Failed to decode CODEOWNERS at {path}: {e}")
1520
+ continue
1521
+
1522
+ logger.debug("No CODEOWNERS file found in repository")
1523
+ # Cache the negative result too
1524
+ self._codeowners_cache = None
1525
+ self._codeowners_loaded = True
1526
+ return None
1527
+
1528
+ async def get_team_members(self, org: str, team_slug: str) -> list[str]:
1529
+ """Get members of a GitHub team.
1530
+
1531
+ Results are cached per instance to avoid redundant API calls
1532
+ when checking multiple users against the same team.
1533
+
1534
+ Note: This requires the token to have `read:org` scope for
1535
+ organization teams.
1536
+
1537
+ Args:
1538
+ org: Organization name
1539
+ team_slug: Team slug (URL-friendly name)
1540
+
1541
+ Returns:
1542
+ List of team member usernames (lowercase)
1543
+ """
1544
+ # Check cache first
1545
+ cache_key = (org.lower(), team_slug.lower())
1546
+ if cache_key in self._team_cache:
1547
+ logger.debug(f"Using cached team members for {org}/{team_slug}")
1548
+ return self._team_cache[cache_key]
1549
+
1550
+ url = f"{self.api_url}/orgs/{org}/teams/{team_slug}/members"
1551
+
1552
+ try:
1553
+ if self._client:
1554
+ response = await self._client.request("GET", url)
1555
+ else:
1556
+ async with httpx.AsyncClient(
1557
+ headers=self._get_headers(), timeout=httpx.Timeout(30.0)
1558
+ ) as client:
1559
+ response = await client.request("GET", url)
1560
+
1561
+ response.raise_for_status()
1562
+ result = response.json()
1563
+
1564
+ if isinstance(result, list):
1565
+ members = [
1566
+ member.get("login", "").lower()
1567
+ for member in result
1568
+ if isinstance(member, dict) and member.get("login")
1569
+ ]
1570
+ # Cache the result
1571
+ self._team_cache[cache_key] = members
1572
+ logger.debug(f"Found {len(members)} members in team {org}/{team_slug}")
1573
+ return members
1574
+
1575
+ except httpx.HTTPStatusError as e:
1576
+ logger.warning(
1577
+ f"Failed to get team members for {org}/{team_slug}: HTTP {e.response.status_code}"
1578
+ )
1579
+ except Exception as e: # pylint: disable=broad-exception-caught
1580
+ logger.warning(f"Failed to get team members for {org}/{team_slug}: {e}")
1581
+
1582
+ # Cache empty result to avoid repeated failed API calls
1583
+ self._team_cache[cache_key] = []
1584
+ return []
1585
+
1586
+ async def is_user_codeowner(
1587
+ self,
1588
+ username: str,
1589
+ file_path: str,
1590
+ codeowners_parser: "CodeOwnersParser | None" = None,
1591
+ allowed_users: list[str] | None = None,
1592
+ ) -> bool:
1593
+ """Check if a user is authorized to ignore findings for a file.
1594
+
1595
+ Authorization is granted if:
1596
+ 1. User is listed directly in CODEOWNERS for the file
1597
+ 2. User is a member of a team listed in CODEOWNERS for the file
1598
+ 3. User is in the allowed_users fallback list (when no CODEOWNERS)
1599
+
1600
+ Performance: Team membership checks are executed in parallel.
1601
+
1602
+ Args:
1603
+ username: GitHub username to check
1604
+ file_path: Path to the file being checked
1605
+ codeowners_parser: Pre-parsed CODEOWNERS (for caching)
1606
+ allowed_users: Fallback list of allowed users (when no CODEOWNERS)
1607
+
1608
+ Returns:
1609
+ True if user is authorized, False otherwise
1610
+ """
1611
+ username_lower = username.lower()
1612
+
1613
+ # Check fallback allowed_users first (always applies if configured)
1614
+ if allowed_users:
1615
+ if username_lower in [u.lower() for u in allowed_users]:
1616
+ logger.debug(f"User {username} authorized via allowed_users config")
1617
+ return True
1618
+
1619
+ # Get or parse CODEOWNERS
1620
+ parser = codeowners_parser
1621
+ if parser is None:
1622
+ content = await self.get_codeowners_content()
1623
+ if content is None:
1624
+ # No CODEOWNERS and no allowed_users match = deny
1625
+ logger.debug(f"No CODEOWNERS file found, user {username} not in allowed_users")
1626
+ return False
1627
+
1628
+ from iam_validator.core.codeowners import ( # pylint: disable=import-outside-toplevel
1629
+ CodeOwnersParser,
1630
+ )
1631
+
1632
+ parser = CodeOwnersParser(content)
1633
+
1634
+ # Check direct user ownership
1635
+ if parser.is_owner(username, file_path):
1636
+ logger.debug(f"User {username} is direct owner of {file_path}")
1637
+ return True
1638
+
1639
+ # Check team membership - fetch all teams in parallel for speed
1640
+ teams = parser.get_teams_for_file(file_path)
1641
+ if not teams:
1642
+ logger.debug(f"User {username} is not authorized for {file_path}")
1643
+ return False
1644
+
1645
+ # Fetch all team memberships concurrently
1646
+
1647
+ async def check_team(org: str, team_slug: str) -> tuple[str, str, bool]:
1648
+ members = await self.get_team_members(org, team_slug)
1649
+ return (org, team_slug, username_lower in members)
1650
+
1651
+ results = await asyncio.gather(*[check_team(org, team_slug) for org, team_slug in teams])
1652
+
1653
+ for org, team_slug, is_member in results:
1654
+ if is_member:
1655
+ logger.debug(f"User {username} authorized via team {org}/{team_slug}")
1656
+ return True
1657
+
1658
+ logger.debug(f"User {username} is not authorized for {file_path}")
1659
+ return False
1660
+
1661
+ async def get_issue_comments(self) -> list[dict[str, Any]]:
1662
+ """Get all issue comments (general PR comments, not review comments) with pagination.
1663
+
1664
+ Fetches ALL issue comments across all pages. This ensures proper
1665
+ comment management when there are many comments on a PR.
1666
+
1667
+ Returns:
1668
+ List of all issue comment dicts
1669
+ """
1670
+ return await self._make_paginated_request(f"issues/{self.pr_number}/comments")
1671
+
1672
+ async def get_comment_by_id(self, comment_id: int) -> dict[str, Any] | None:
1673
+ """Get a specific review comment by ID.
1674
+
1675
+ Used for verifying that ignore command replies still exist
1676
+ (tamper-resistant verification).
1677
+
1678
+ Args:
1679
+ comment_id: The ID of the review comment to fetch
1680
+
1681
+ Returns:
1682
+ Comment dict if found, None if deleted or error
1683
+ """
1684
+ result = await self._make_request(
1685
+ "GET",
1686
+ f"pulls/comments/{comment_id}",
1687
+ )
1688
+
1689
+ if result and isinstance(result, dict):
1690
+ return result
1691
+ return None
1692
+
1693
+ async def post_reply_to_review_comment(
1694
+ self,
1695
+ comment_id: int,
1696
+ body: str,
1697
+ ) -> bool:
1698
+ """Post a reply to a review comment thread.
1699
+
1700
+ Args:
1701
+ comment_id: The ID of the review comment to reply to
1702
+ body: The reply text (markdown supported)
1703
+
1704
+ Returns:
1705
+ True if successful, False otherwise
1706
+ """
1707
+ result = await self._make_request(
1708
+ "POST",
1709
+ f"pulls/{self.pr_number}/comments",
1710
+ json={
1711
+ "body": body,
1712
+ "in_reply_to": comment_id,
1713
+ },
1714
+ )
1715
+
1716
+ if result:
1717
+ logger.debug(f"Successfully posted reply to comment {comment_id}")
1718
+ return True
1719
+ return False
1720
+
1721
+ async def scan_for_ignore_commands(
1722
+ self,
1723
+ identifier: str = constants.BOT_IDENTIFIER,
1724
+ ) -> list[tuple[dict[str, Any], dict[str, Any]]]:
1725
+ """Scan for ignore commands in replies to bot review comments.
1726
+
1727
+ Looks for replies to bot comments that contain ignore commands.
1728
+ Supports formats: "ignore", "/ignore", "@iam-validator ignore",
1729
+ "skip", "suppress", and "ignore: reason here".
1730
+
1731
+ Args:
1732
+ identifier: String to identify bot comments
1733
+
1734
+ Returns:
1735
+ List of (bot_comment, reply_comment) tuples where reply
1736
+ contains an ignore command
1737
+ """
1738
+ all_comments = await self.get_review_comments()
1739
+ ignore_commands: list[tuple[dict[str, Any], dict[str, Any]]] = []
1740
+
1741
+ # Index bot comments by ID for O(1) lookup
1742
+ bot_comments_by_id: dict[int, dict[str, Any]] = {}
1743
+ for comment in all_comments:
1744
+ if not isinstance(comment, dict):
1745
+ continue
1746
+ body = comment.get("body", "")
1747
+ comment_id = comment.get("id")
1748
+ if identifier in str(body) and isinstance(comment_id, int):
1749
+ bot_comments_by_id[comment_id] = comment
1750
+
1751
+ # Find replies with ignore commands
1752
+ for comment in all_comments:
1753
+ if not isinstance(comment, dict):
1754
+ continue
1755
+
1756
+ reply_to_id = comment.get("in_reply_to_id")
1757
+ if reply_to_id and reply_to_id in bot_comments_by_id:
1758
+ body = comment.get("body", "")
1759
+ if self._is_ignore_command(body):
1760
+ ignore_commands.append((bot_comments_by_id[reply_to_id], comment))
1761
+
1762
+ logger.debug(f"Found {len(ignore_commands)} ignore command(s) in PR comments")
1763
+ return ignore_commands
1764
+
1765
+ def _is_ignore_command(self, text: str) -> bool:
1766
+ """Check if text is an ignore command.
1767
+
1768
+ Supports:
1769
+ - "ignore" (case insensitive)
1770
+ - "/ignore"
1771
+ - "@iam-validator ignore"
1772
+ - "skip", "suppress"
1773
+ - "ignore: reason here" (with optional reason)
1774
+
1775
+ Args:
1776
+ text: Comment text to check
1777
+
1778
+ Returns:
1779
+ True if text is an ignore command
1780
+ """
1781
+ if not text:
1782
+ return False
1783
+
1784
+ text = text.strip().lower()
1785
+
1786
+ ignore_patterns = [
1787
+ r"^\s*ignore\s*$",
1788
+ r"^\s*/ignore\s*$",
1789
+ r"^\s*@?iam-validator\s+ignore\s*$",
1790
+ r"^\s*ignore\s*:\s*.+$", # With reason
1791
+ r"^\s*skip\s*$",
1792
+ r"^\s*suppress\s*$",
1793
+ ]
1794
+
1795
+ return any(re.match(pattern, text, re.IGNORECASE) for pattern in ignore_patterns)
1796
+
1797
+ @staticmethod
1798
+ def extract_finding_id(comment_body: str) -> str | None:
1799
+ """Extract finding ID from a bot comment.
1800
+
1801
+ Args:
1802
+ comment_body: The comment body text
1803
+
1804
+ Returns:
1805
+ Finding ID hash, or None if not found
1806
+ """
1807
+ match = re.search(r"<!-- finding-id: ([a-f0-9]+) -->", comment_body)
1808
+ return match.group(1) if match else None
1809
+
1810
+ @staticmethod
1811
+ def extract_ignore_reason(text: str) -> str | None:
1812
+ """Extract reason from ignore command.
1813
+
1814
+ Args:
1815
+ text: The ignore command text
1816
+
1817
+ Returns:
1818
+ Reason string, or None if no reason provided
1819
+ """
1820
+ match = re.search(r"ignore\s*:\s*(.+)$", text.strip(), re.IGNORECASE)
1821
+ return match.group(1).strip() if match else None