gtg 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,982 @@
1
+ """PR Analyzer - main orchestrator for GoodToMerge.
2
+
3
+ This module contains the PRAnalyzer class, which orchestrates the analysis
4
+ of pull requests to determine their readiness for merge. It coordinates
5
+ between GitHub API access, caching, and comment parsing to produce a
6
+ comprehensive analysis result.
7
+
8
+ The analyzer follows the decision tree from the design specification:
9
+ 1. CI checks pending/failing -> CI_FAILING
10
+ 2. Unresolved threads exist -> UNRESOLVED_THREADS
11
+ 3. Actionable comments exist -> ACTION_REQUIRED
12
+ 4. Ambiguous comments exist -> ACTION_REQUIRED (with requires_investigation)
13
+ 5. All clear -> READY
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ from typing import TYPE_CHECKING, Any, Optional, cast
20
+
21
+ from goodtogo.core.errors import redact_error
22
+ from goodtogo.core.models import (
23
+ CICheck,
24
+ CIStatus,
25
+ Comment,
26
+ CommentClassification,
27
+ PRAnalysisResult,
28
+ Priority,
29
+ PRStatus,
30
+ ReviewerType,
31
+ ThreadSummary,
32
+ UnresolvedThread,
33
+ )
34
+ from goodtogo.core.validation import (
35
+ build_cache_key,
36
+ validate_github_identifier,
37
+ validate_pr_number,
38
+ )
39
+
40
+ if TYPE_CHECKING:
41
+ from goodtogo.adapters.agent_state import AgentState
42
+ from goodtogo.container import Container
43
+
44
+
45
+ # Cache TTL values in seconds
46
+ CACHE_TTL_META = 300 # 5 minutes for PR metadata
47
+ CACHE_TTL_CI_PENDING = 300 # 5 minutes while CI pending
48
+ CACHE_TTL_CI_COMPLETE = 86400 # 24 hours after CI complete
49
+ CACHE_TTL_COMMENT = 86400 # 24 hours for immutable comments
50
+ CACHE_TTL_STABLE_THREAD = 86400 # 24 hours for resolved threads
51
+ CACHE_TTL_STABLE_COMMENT = 86400 # 24 hours for NON_ACTIONABLE comments
52
+
53
+
54
+ class PRAnalyzer:
55
+ """Main orchestrator for PR analysis.
56
+
57
+ PRAnalyzer coordinates the analysis of pull requests by:
58
+ 1. Validating inputs
59
+ 2. Fetching PR data from GitHub (with caching)
60
+ 3. Identifying reviewer types for each comment
61
+ 4. Classifying comments using appropriate parsers
62
+ 5. Determining overall PR status
63
+
64
+ The analyzer uses dependency injection for all external dependencies
65
+ (GitHub API, cache, parsers) to enable testing and flexibility.
66
+
67
+ Example:
68
+ >>> from goodtogo.container import Container
69
+ >>> container = Container.create_default(github_token="ghp_...")
70
+ >>> analyzer = PRAnalyzer(container)
71
+ >>> result = analyzer.analyze("myorg", "myrepo", 123)
72
+ >>> print(result.status)
73
+ PRStatus.READY
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ container: Container,
79
+ agent_state: Optional[AgentState] = None,
80
+ pr_key: Optional[str] = None,
81
+ ) -> None:
82
+ """Initialize the PRAnalyzer with a DI container.
83
+
84
+ Args:
85
+ container: Dependency injection container providing:
86
+ - github: GitHubPort implementation for API access
87
+ - cache: CachePort implementation for caching
88
+ - parsers: Dict mapping ReviewerType to ReviewerParser
89
+ agent_state: Optional AgentState for persisting classification
90
+ decisions. When provided, dismissed comments are automatically
91
+ classified as NON_ACTIONABLE without running the parser.
92
+ pr_key: Optional PR key in format "owner/repo:pr_number". Required
93
+ when using agent_state for dismissal persistence.
94
+ """
95
+ self._container = container
96
+ self._agent_state = agent_state
97
+ self._pr_key = pr_key
98
+
99
+ def analyze(
100
+ self,
101
+ owner: str,
102
+ repo: str,
103
+ pr_number: int,
104
+ exclude_checks: Optional[set[str]] = None,
105
+ ) -> PRAnalysisResult:
106
+ """Analyze a PR and determine its readiness for merge.
107
+
108
+ This method orchestrates the complete PR analysis workflow:
109
+ 1. Validate inputs (owner, repo, pr_number)
110
+ 2. Fetch PR metadata (with cache)
111
+ 3. Check for new commits (invalidate cache if needed)
112
+ 4. Fetch comments, reviews, threads, CI status
113
+ 5. Identify reviewer type for each comment
114
+ 6. Parse and classify all comments
115
+ 7. Build actionable and ambiguous comment lists
116
+ 8. Generate human-readable action items
117
+ 9. Determine final PR status
118
+
119
+ Args:
120
+ owner: Repository owner (organization or username).
121
+ repo: Repository name.
122
+ pr_number: Pull request number.
123
+
124
+ Returns:
125
+ PRAnalysisResult containing complete analysis with:
126
+ - status: Final PR status (READY, ACTION_REQUIRED, etc.)
127
+ - ci_status: CI/CD check results
128
+ - threads: Thread resolution summary
129
+ - comments: All classified comments
130
+ - actionable_comments: Comments requiring action
131
+ - ambiguous_comments: Comments needing investigation
132
+ - action_items: Human-readable action list
133
+ - needs_action: Boolean indicating if action is required
134
+
135
+ Raises:
136
+ ValueError: If inputs fail validation.
137
+ RedactedError: If an error occurs during analysis (with
138
+ sensitive data redacted from the message).
139
+ """
140
+ try:
141
+ # Step 1: Validate inputs
142
+ owner = validate_github_identifier(owner, "owner")
143
+ repo = validate_github_identifier(repo, "repo")
144
+ pr_number = validate_pr_number(pr_number)
145
+
146
+ # Step 2: Fetch PR metadata (with cache)
147
+ pr_data = self._get_pr_data(owner, repo, pr_number)
148
+
149
+ # Extract commit info
150
+ head_sha = pr_data.get("head", {}).get("sha", "")
151
+ head_timestamp = pr_data.get("head", {}).get("committed_at", "")
152
+ if not head_timestamp:
153
+ # Fallback to updated_at if committed_at not available
154
+ head_timestamp = pr_data.get("updated_at", "")
155
+
156
+ # Step 3: Check for new commits and invalidate cache if needed
157
+ self._check_and_invalidate_cache(owner, repo, pr_number, head_sha)
158
+
159
+ # Step 4: Fetch PR data (comments, reviews, threads, CI)
160
+ comments_data = self._get_comments(owner, repo, pr_number)
161
+ reviews_data = self._get_reviews(owner, repo, pr_number)
162
+ threads_data = self._get_threads(owner, repo, pr_number)
163
+ ci_data = self._get_ci_status(owner, repo, head_sha)
164
+
165
+ # Step 5-6: Process comments with reviewer identification and parsing
166
+ all_comments = self._process_comments(
167
+ owner, repo, comments_data, reviews_data, threads_data
168
+ )
169
+
170
+ # Step 7: Build filtered lists
171
+ actionable_comments = [
172
+ c for c in all_comments if c.classification == CommentClassification.ACTIONABLE
173
+ ]
174
+ ambiguous_comments = [
175
+ c for c in all_comments if c.classification == CommentClassification.AMBIGUOUS
176
+ ]
177
+
178
+ # Sort actionable comments by priority
179
+ priority_order = {
180
+ Priority.CRITICAL: 0,
181
+ Priority.MAJOR: 1,
182
+ Priority.MINOR: 2,
183
+ Priority.TRIVIAL: 3,
184
+ Priority.UNKNOWN: 4,
185
+ }
186
+ actionable_comments.sort(key=lambda c: priority_order[c.priority])
187
+
188
+ # Build CI status model (before action items so we use filtered state)
189
+ ci_status = self._build_ci_status(ci_data, exclude_checks or set())
190
+
191
+ # Step 8: Generate action items using filtered CI state
192
+ action_items = self._generate_action_items(
193
+ actionable_comments, ambiguous_comments, threads_data, ci_status
194
+ )
195
+
196
+ # Build thread summary
197
+ threads = self._build_thread_summary(threads_data)
198
+
199
+ # Cache resolved threads for future runs (granular thread caching)
200
+ for thread in threads_data:
201
+ self._cache_resolved_thread(owner, repo, thread)
202
+
203
+ # Step 9: Determine final status using decision tree
204
+ status = self._determine_status(
205
+ ci_status, threads, actionable_comments, ambiguous_comments
206
+ )
207
+
208
+ # Issue #28: Invalidate volatile data cache for non-READY PRs
209
+ # Design principle: Only cache data that won't block a gtg check from passing.
210
+ # If PR is not ready, invalidate comment/thread/review cache to ensure fresh
211
+ # fetch next time. This prevents stale cache from incorrectly blocking merge.
212
+ if status != PRStatus.READY:
213
+ # Invalidate PR-level caches
214
+ pr_pattern = f"pr:{owner}:{repo}:{pr_number}:*"
215
+ self._container.cache.invalidate_pattern(pr_pattern)
216
+ # Invalidate granular comment caches for this repo
217
+ # Note: This is broader than necessary but comment IDs aren't PR-scoped
218
+ comment_pattern = f"comment:{owner}:{repo}:*"
219
+ self._container.cache.invalidate_pattern(comment_pattern)
220
+ # Invalidate granular thread caches for this repo
221
+ thread_pattern = f"thread:{owner}:{repo}:*"
222
+ self._container.cache.invalidate_pattern(thread_pattern)
223
+
224
+ # Get cache stats
225
+ cache_stats = self._container.cache.get_stats()
226
+
227
+ # Determine if action is needed
228
+ needs_action = status != PRStatus.READY
229
+
230
+ return PRAnalysisResult(
231
+ status=status,
232
+ pr_number=pr_number,
233
+ repo_owner=owner,
234
+ repo_name=repo,
235
+ latest_commit_sha=head_sha,
236
+ latest_commit_timestamp=head_timestamp,
237
+ ci_status=ci_status,
238
+ threads=threads,
239
+ comments=all_comments,
240
+ actionable_comments=actionable_comments,
241
+ ambiguous_comments=ambiguous_comments,
242
+ action_items=action_items,
243
+ needs_action=needs_action,
244
+ cache_stats=cache_stats,
245
+ )
246
+
247
+ except ValueError:
248
+ # Validation errors don't need redaction, re-raise as-is
249
+ raise
250
+ except Exception as e:
251
+ # Wrap all other exceptions with redacted messages
252
+ raise redact_error(e) from e
253
+
254
+ def _get_pr_data(self, owner: str, repo: str, pr_number: int) -> dict[str, Any]:
255
+ """Fetch PR metadata with caching.
256
+
257
+ Args:
258
+ owner: Repository owner.
259
+ repo: Repository name.
260
+ pr_number: PR number.
261
+
262
+ Returns:
263
+ Dictionary containing PR metadata.
264
+ """
265
+ cache_key = build_cache_key("pr", owner, repo, str(pr_number), "meta")
266
+
267
+ # Try cache first
268
+ cached = self._container.cache.get(cache_key)
269
+ if cached:
270
+ return cast(dict[str, Any], json.loads(cached))
271
+
272
+ # Fetch from GitHub
273
+ pr_data = self._container.github.get_pr(owner, repo, pr_number)
274
+
275
+ # Cache the result
276
+ self._container.cache.set(cache_key, json.dumps(pr_data), CACHE_TTL_META)
277
+
278
+ return pr_data
279
+
280
+ def _check_and_invalidate_cache(
281
+ self, owner: str, repo: str, pr_number: int, current_sha: str
282
+ ) -> None:
283
+ """Check for new commits and invalidate cache if needed.
284
+
285
+ If the latest commit SHA has changed since the last analysis,
286
+ invalidate all cached data for this PR.
287
+
288
+ Args:
289
+ owner: Repository owner.
290
+ repo: Repository name.
291
+ pr_number: PR number.
292
+ current_sha: Current head commit SHA.
293
+ """
294
+ cache_key = build_cache_key("pr", owner, repo, str(pr_number), "commit", "latest")
295
+
296
+ cached_sha = self._container.cache.get(cache_key)
297
+ if cached_sha and cached_sha != current_sha:
298
+ # New commit detected, invalidate all cached data for this PR
299
+ pattern = f"pr:{owner}:{repo}:{pr_number}:*"
300
+ self._container.cache.invalidate_pattern(pattern)
301
+
302
+ # Store current SHA
303
+ self._container.cache.set(cache_key, current_sha, CACHE_TTL_META)
304
+
305
+ def _get_comments(self, owner: str, repo: str, pr_number: int) -> list[dict[str, Any]]:
306
+ """Fetch PR comments with two-tier caching.
307
+
308
+ Uses two caching strategies:
309
+ 1. PR-level cache: Caches the full comment list for a short TTL (5 min)
310
+ 2. Granular cache: For each comment, checks if we have a cached stable
311
+ version (NON_ACTIONABLE classification). Uses cached raw data to
312
+ avoid re-parsing stable comments.
313
+
314
+ Args:
315
+ owner: Repository owner.
316
+ repo: Repository name.
317
+ pr_number: PR number.
318
+
319
+ Returns:
320
+ List of comment dictionaries.
321
+ """
322
+ # First, try PR-level cache for the comment list
323
+ cache_key = build_cache_key("pr", owner, repo, str(pr_number), "comments")
324
+ cached = self._container.cache.get(cache_key)
325
+ if cached:
326
+ return cast(list[dict[str, Any]], json.loads(cached))
327
+
328
+ # Fetch fresh comments from GitHub
329
+ fresh_comments = self._container.github.get_pr_comments(owner, repo, pr_number)
330
+
331
+ # Apply granular caching: use cached raw data for stable comments
332
+ result = []
333
+ for comment in fresh_comments:
334
+ comment_id = str(comment.get("id", ""))
335
+ if not comment_id:
336
+ # Comments without IDs can't be cached granularly
337
+ result.append(comment)
338
+ continue
339
+
340
+ granular_cache_key = build_cache_key("comment", owner, repo, comment_id)
341
+ granular_cached = self._container.cache.get(granular_cache_key)
342
+
343
+ if granular_cached:
344
+ cached_data = json.loads(granular_cached)
345
+ # Check if comment was edited since caching
346
+ fresh_timestamp = comment.get("updated_at") or comment.get("created_at", "")
347
+ cached_timestamp = cached_data.get("cached_at", "")
348
+ if (
349
+ cached_data.get("classification") == "NON_ACTIONABLE"
350
+ and fresh_timestamp == cached_timestamp
351
+ ):
352
+ # Use cached data - it's stable AND unchanged
353
+ result.append(cached_data["raw"])
354
+ continue
355
+ # Timestamp mismatch means comment was edited - use fresh data
356
+
357
+ # Use fresh data
358
+ result.append(comment)
359
+
360
+ # Cache the result at PR level
361
+ self._container.cache.set(cache_key, json.dumps(result), CACHE_TTL_META)
362
+
363
+ return result
364
+
365
+ def _get_reviews(self, owner: str, repo: str, pr_number: int) -> list[dict[str, Any]]:
366
+ """Fetch PR reviews with caching.
367
+
368
+ Args:
369
+ owner: Repository owner.
370
+ repo: Repository name.
371
+ pr_number: PR number.
372
+
373
+ Returns:
374
+ List of review dictionaries.
375
+ """
376
+ cache_key = build_cache_key("pr", owner, repo, str(pr_number), "reviews")
377
+
378
+ cached = self._container.cache.get(cache_key)
379
+ if cached:
380
+ return cast(list[dict[str, Any]], json.loads(cached))
381
+
382
+ reviews = self._container.github.get_pr_reviews(owner, repo, pr_number)
383
+ self._container.cache.set(cache_key, json.dumps(reviews), CACHE_TTL_META)
384
+
385
+ return reviews
386
+
387
+ def _get_threads(self, owner: str, repo: str, pr_number: int) -> list[dict[str, Any]]:
388
+ """Fetch PR threads with two-tier caching.
389
+
390
+ Uses two caching strategies:
391
+ 1. PR-level cache: Caches the full thread list for a short TTL (5 min)
392
+ 2. Granular cache: For each thread, checks if we have a cached stable
393
+ version (resolved state). Uses cached raw data for resolved threads.
394
+
395
+ Args:
396
+ owner: Repository owner.
397
+ repo: Repository name.
398
+ pr_number: PR number.
399
+
400
+ Returns:
401
+ List of thread dictionaries.
402
+ """
403
+ # First, try PR-level cache for the thread list
404
+ cache_key = build_cache_key("pr", owner, repo, str(pr_number), "threads")
405
+ cached = self._container.cache.get(cache_key)
406
+ if cached:
407
+ return cast(list[dict[str, Any]], json.loads(cached))
408
+
409
+ # Fetch fresh threads from GitHub
410
+ fresh_threads = self._container.github.get_pr_threads(owner, repo, pr_number)
411
+
412
+ # Apply granular caching: use cached raw data for resolved threads
413
+ result = []
414
+ for thread in fresh_threads:
415
+ thread_id = str(thread.get("id", ""))
416
+ if not thread_id:
417
+ result.append(thread)
418
+ continue
419
+
420
+ granular_cache_key = build_cache_key("thread", owner, repo, thread_id)
421
+ granular_cached = self._container.cache.get(granular_cache_key)
422
+
423
+ if granular_cached:
424
+ cached_data = json.loads(granular_cached)
425
+ # Check if thread was modified since caching
426
+ fresh_timestamp = thread.get("updated_at") or thread.get("created_at", "")
427
+ cached_timestamp = cached_data.get("cached_at", "")
428
+ if cached_data.get("is_resolved", False) and fresh_timestamp == cached_timestamp:
429
+ # Use cached data - it's stable AND unchanged
430
+ result.append(cached_data["raw"])
431
+ continue
432
+ # Timestamp mismatch means thread was modified (possibly re-opened)
433
+
434
+ # Use fresh data
435
+ result.append(thread)
436
+
437
+ # Cache the result at PR level
438
+ self._container.cache.set(cache_key, json.dumps(result), CACHE_TTL_META)
439
+
440
+ return result
441
+
442
+ def _cache_resolved_thread(
443
+ self,
444
+ owner: str,
445
+ repo: str,
446
+ thread_data: dict[str, Any],
447
+ ) -> None:
448
+ """Cache a thread if it's resolved (currently stable state).
449
+
450
+ Resolved threads are typically stable but can be re-opened or modified.
451
+ We cache them with a timestamp to detect staleness - if the thread's
452
+ updated_at changes, we'll use fresh data instead of the cache.
453
+
454
+ Args:
455
+ owner: Repository owner.
456
+ repo: Repository name.
457
+ thread_data: Thread dictionary from GitHub API.
458
+ """
459
+ if not thread_data.get("is_resolved", False):
460
+ return # Don't cache unresolved threads - volatile
461
+
462
+ thread_id = str(thread_data.get("id", ""))
463
+ if not thread_id:
464
+ return
465
+
466
+ cache_key = build_cache_key("thread", owner, repo, thread_id)
467
+ cache_value = json.dumps(
468
+ {
469
+ "raw": thread_data,
470
+ "is_resolved": True,
471
+ "cached_at": thread_data.get("updated_at") or thread_data.get("created_at", ""),
472
+ }
473
+ )
474
+ self._container.cache.set(cache_key, cache_value, CACHE_TTL_STABLE_THREAD)
475
+
476
+ def _get_ci_status(self, owner: str, repo: str, ref: str) -> dict[str, Any]:
477
+ """Fetch CI status with caching.
478
+
479
+ Uses different TTL based on whether CI is complete or still pending.
480
+
481
+ Args:
482
+ owner: Repository owner.
483
+ repo: Repository name.
484
+ ref: Git reference (commit SHA).
485
+
486
+ Returns:
487
+ Dictionary containing CI status.
488
+ """
489
+ cache_key = build_cache_key("pr", owner, repo, ref, "ci", "status")
490
+
491
+ cached = self._container.cache.get(cache_key)
492
+ if cached:
493
+ return cast(dict[str, Any], json.loads(cached))
494
+
495
+ ci_data = self._container.github.get_ci_status(owner, repo, ref)
496
+
497
+ # Use longer TTL if CI is complete
498
+ state = ci_data.get("state", "pending")
499
+ ttl = CACHE_TTL_CI_COMPLETE if state in ("success", "failure") else CACHE_TTL_CI_PENDING
500
+
501
+ self._container.cache.set(cache_key, json.dumps(ci_data), ttl)
502
+
503
+ return ci_data
504
+
505
+ def _identify_reviewer_type(self, author: str, body: str) -> ReviewerType:
506
+ """Identify the reviewer type for a comment.
507
+
508
+ Iterates through parsers in priority order to find one that
509
+ can handle the comment.
510
+
511
+ Args:
512
+ author: Comment author username.
513
+ body: Comment body text.
514
+
515
+ Returns:
516
+ ReviewerType for the identified reviewer.
517
+ """
518
+ # Check parsers in order (CODERABBIT, GREPTILE, CLAUDE, CURSOR, then fallback)
519
+ parser_order = [
520
+ ReviewerType.CODERABBIT,
521
+ ReviewerType.GREPTILE,
522
+ ReviewerType.CLAUDE,
523
+ ReviewerType.CURSOR,
524
+ ]
525
+
526
+ for reviewer_type in parser_order:
527
+ if reviewer_type in self._container.parsers:
528
+ parser = self._container.parsers[reviewer_type]
529
+ if parser.can_parse(author, body):
530
+ return reviewer_type
531
+
532
+ # Fallback to HUMAN/UNKNOWN
533
+ return ReviewerType.HUMAN
534
+
535
+ def _process_comments(
536
+ self,
537
+ owner: str,
538
+ repo: str,
539
+ comments_data: list[dict[str, Any]],
540
+ reviews_data: list[dict[str, Any]],
541
+ threads_data: list[dict[str, Any]],
542
+ ) -> list[Comment]:
543
+ """Process all comments and classify them.
544
+
545
+ Combines comments from inline comments, reviews, and threads,
546
+ identifies the reviewer type, and classifies each comment.
547
+ Caches NON_ACTIONABLE comments for future runs.
548
+
549
+ Args:
550
+ owner: Repository owner.
551
+ repo: Repository name.
552
+ comments_data: List of inline comments from GitHub.
553
+ reviews_data: List of reviews from GitHub.
554
+ threads_data: List of threads from GitHub.
555
+
556
+ Returns:
557
+ List of Comment objects with classifications.
558
+ """
559
+ all_comments: list[Comment] = []
560
+
561
+ # Build thread resolution map
562
+ thread_resolution = {}
563
+ thread_outdated = {}
564
+ for thread in threads_data:
565
+ thread_id = thread.get("id", "")
566
+ thread_resolution[thread_id] = thread.get("is_resolved", False)
567
+ thread_outdated[thread_id] = thread.get("is_outdated", False)
568
+
569
+ # Process inline comments
570
+ for comment_data in comments_data:
571
+ comment = self._classify_comment(comment_data, thread_resolution, thread_outdated)
572
+ all_comments.append(comment)
573
+ # Cache stable (NON_ACTIONABLE) comments
574
+ self._cache_stable_comment(owner, repo, comment_data, comment.classification)
575
+
576
+ # Process review body comments
577
+ for review_data in reviews_data:
578
+ # Skip reviews with empty bodies
579
+ body = review_data.get("body", "")
580
+ if not body or not body.strip():
581
+ continue
582
+
583
+ # Create a comment-like dict from review
584
+ review_comment = {
585
+ "id": f"review_{review_data.get('id', '')}",
586
+ "user": review_data.get("user", {}),
587
+ "body": body,
588
+ "created_at": review_data.get("submitted_at", ""),
589
+ "path": None,
590
+ "line": None,
591
+ }
592
+ comment = self._classify_comment(review_comment, thread_resolution, thread_outdated)
593
+ all_comments.append(comment)
594
+ # Cache stable (NON_ACTIONABLE) review comments
595
+ self._cache_stable_comment(owner, repo, review_comment, comment.classification)
596
+
597
+ return all_comments
598
+
599
+ def _classify_comment(
600
+ self,
601
+ comment_data: dict[str, Any],
602
+ thread_resolution: dict[str, bool],
603
+ thread_outdated: dict[str, bool],
604
+ ) -> Comment:
605
+ """Classify a single comment using the appropriate parser.
606
+
607
+ If agent_state is configured and the comment was previously dismissed,
608
+ returns NON_ACTIONABLE without running the parser.
609
+
610
+ Args:
611
+ comment_data: Dictionary containing comment data.
612
+ thread_resolution: Map of thread ID to resolution status.
613
+ thread_outdated: Map of thread ID to outdated status.
614
+
615
+ Returns:
616
+ Classified Comment object.
617
+ """
618
+ author = comment_data.get("user", {}).get("login", "")
619
+ body = comment_data.get("body", "")
620
+ comment_id = str(comment_data.get("id", ""))
621
+ thread_id = comment_data.get("in_reply_to_id")
622
+ if thread_id:
623
+ thread_id = str(thread_id)
624
+
625
+ # Determine thread status
626
+ is_resolved = thread_resolution.get(thread_id, False) if thread_id else False
627
+ is_outdated = thread_outdated.get(thread_id, False) if thread_id else False
628
+
629
+ # Identify reviewer type
630
+ reviewer_type = self._identify_reviewer_type(author, body)
631
+
632
+ # Check if comment was previously dismissed
633
+ if self._agent_state is not None and self._pr_key is not None:
634
+ if self._agent_state.is_comment_dismissed(self._pr_key, comment_id):
635
+ # Return NON_ACTIONABLE without running parser
636
+ return Comment(
637
+ id=comment_id,
638
+ author=author,
639
+ reviewer_type=reviewer_type,
640
+ body=body,
641
+ classification=CommentClassification.NON_ACTIONABLE,
642
+ priority=Priority.UNKNOWN,
643
+ requires_investigation=False,
644
+ thread_id=thread_id,
645
+ is_resolved=is_resolved,
646
+ is_outdated=is_outdated,
647
+ file_path=comment_data.get("path"),
648
+ line_number=comment_data.get("line"),
649
+ created_at=comment_data.get("created_at", ""),
650
+ addressed_in_commit=None,
651
+ url=comment_data.get("html_url"),
652
+ )
653
+
654
+ # Get the appropriate parser (fallback to HUMAN parser if not found)
655
+ parser = self._container.parsers.get(reviewer_type)
656
+ if parser is None:
657
+ parser = self._container.parsers.get(ReviewerType.HUMAN)
658
+ if parser is None:
659
+ # Last resort fallback - use the first available parser
660
+ parser = next(iter(self._container.parsers.values()))
661
+
662
+ # Add resolution status to comment data for parser use
663
+ comment_with_status = {
664
+ **comment_data,
665
+ "is_resolved": is_resolved,
666
+ "is_outdated": is_outdated,
667
+ }
668
+
669
+ # Parse the comment
670
+ classification, priority, requires_investigation = parser.parse(comment_with_status)
671
+
672
+ return Comment(
673
+ id=comment_id,
674
+ author=author,
675
+ reviewer_type=reviewer_type,
676
+ body=body,
677
+ classification=classification,
678
+ priority=priority,
679
+ requires_investigation=requires_investigation,
680
+ thread_id=thread_id,
681
+ is_resolved=is_resolved,
682
+ is_outdated=is_outdated,
683
+ file_path=comment_data.get("path"),
684
+ line_number=comment_data.get("line"),
685
+ created_at=comment_data.get("created_at", ""),
686
+ addressed_in_commit=None,
687
+ url=comment_data.get("html_url"),
688
+ )
689
+
690
+ def dismiss_comment(self, comment_id: str, reason: Optional[str] = None) -> None:
691
+ """Mark a comment as dismissed (non-actionable).
692
+
693
+ Persists the dismissal decision so future runs skip re-evaluation
694
+ of this comment.
695
+
696
+ Args:
697
+ comment_id: ID of the comment to dismiss.
698
+ reason: Optional explanation for why the comment was dismissed.
699
+
700
+ Raises:
701
+ ValueError: If pr_key is not set on the analyzer.
702
+ RuntimeError: If agent_state is not configured.
703
+ """
704
+ if self._pr_key is None:
705
+ raise ValueError(
706
+ "pr_key must be set on the analyzer to dismiss comments. "
707
+ "Pass pr_key when creating PRAnalyzer."
708
+ )
709
+ if self._agent_state is None:
710
+ raise RuntimeError(
711
+ "agent_state must be configured to dismiss comments. "
712
+ "Pass agent_state when creating PRAnalyzer."
713
+ )
714
+ self._agent_state.dismiss_comment(self._pr_key, comment_id, reason)
715
+
716
+ def _build_ci_status(self, ci_data: dict[str, Any], exclude_checks: set[str]) -> CIStatus:
717
+ """Build CIStatus model from GitHub API response.
718
+
719
+ Args:
720
+ ci_data: Dictionary from GitHub CI status API.
721
+ exclude_checks: Set of check names to exclude from evaluation.
722
+
723
+ Returns:
724
+ CIStatus model with aggregated check information.
725
+ """
726
+ statuses = ci_data.get("statuses", [])
727
+ check_runs = ci_data.get("check_runs", [])
728
+
729
+ checks: list[CICheck] = []
730
+
731
+ # Process status checks
732
+ for status in statuses:
733
+ name = status.get("context", "unknown")
734
+ if name in exclude_checks:
735
+ continue
736
+ checks.append(
737
+ CICheck(
738
+ name=name,
739
+ status=status.get("state", "pending"),
740
+ conclusion=status.get("state"),
741
+ url=status.get("target_url"),
742
+ )
743
+ )
744
+
745
+ # Process check runs (GitHub Actions, etc.)
746
+ for run in check_runs:
747
+ name = run.get("name", "unknown")
748
+ if name in exclude_checks:
749
+ continue
750
+
751
+ run_status = run.get("status", "queued")
752
+ run_conclusion = run.get("conclusion")
753
+
754
+ # Map GitHub status to our status
755
+ if run_status == "completed":
756
+ status_value = run_conclusion or "unknown"
757
+ elif run_status in ("queued", "in_progress"):
758
+ status_value = "pending"
759
+ else:
760
+ status_value = run_status
761
+
762
+ checks.append(
763
+ CICheck(
764
+ name=name,
765
+ status=status_value,
766
+ conclusion=run_conclusion,
767
+ url=run.get("html_url"),
768
+ )
769
+ )
770
+
771
+ # Calculate counts
772
+ total = len(checks)
773
+ passed = sum(1 for c in checks if c.status == "success")
774
+ failed = sum(1 for c in checks if c.status in ("failure", "error"))
775
+ pending = sum(1 for c in checks if c.status == "pending")
776
+
777
+ # Recalculate state based on filtered checks
778
+ # (can't use GitHub's state since we may have excluded checks)
779
+ if failed > 0:
780
+ computed_state = "failure"
781
+ elif pending > 0:
782
+ computed_state = "pending"
783
+ elif total == 0:
784
+ computed_state = "success" # No checks means success
785
+ else:
786
+ computed_state = "success"
787
+
788
+ return CIStatus(
789
+ state=computed_state,
790
+ total_checks=total,
791
+ passed=passed,
792
+ failed=failed,
793
+ pending=pending,
794
+ checks=checks,
795
+ )
796
+
797
+ def _build_thread_summary(self, threads_data: list[dict[str, Any]]) -> ThreadSummary:
798
+ """Build ThreadSummary from thread data.
799
+
800
+ Args:
801
+ threads_data: List of thread dictionaries.
802
+
803
+ Returns:
804
+ ThreadSummary with resolution counts and unresolved thread details.
805
+ """
806
+ total = len(threads_data)
807
+ resolved = sum(1 for t in threads_data if t.get("is_resolved", False))
808
+ outdated = sum(1 for t in threads_data if t.get("is_outdated", False))
809
+ unresolved = total - resolved
810
+
811
+ # Build list of unresolved thread details for agent workflows
812
+ unresolved_threads: list[UnresolvedThread] = []
813
+ for thread in threads_data:
814
+ if not thread.get("is_resolved", False):
815
+ # Get first comment info
816
+ comments = thread.get("comments", [])
817
+ first_comment = comments[0] if comments else {}
818
+ author = first_comment.get("author", "unknown")
819
+ body = first_comment.get("body", "")
820
+ body_preview = body[:200] if body else ""
821
+
822
+ # URL is optional - may be added by GraphQL query in future
823
+ url = thread.get("url")
824
+
825
+ unresolved_threads.append(
826
+ UnresolvedThread(
827
+ id=thread.get("id", ""),
828
+ url=url,
829
+ path=thread.get("path", ""),
830
+ line=thread.get("line"),
831
+ author=author,
832
+ body_preview=body_preview,
833
+ )
834
+ )
835
+
836
+ return ThreadSummary(
837
+ total=total,
838
+ resolved=resolved,
839
+ unresolved=unresolved,
840
+ outdated=outdated,
841
+ unresolved_threads=unresolved_threads,
842
+ )
843
+
844
+ def _generate_action_items(
845
+ self,
846
+ actionable_comments: list[Comment],
847
+ ambiguous_comments: list[Comment],
848
+ threads_data: list[dict[str, Any]],
849
+ ci_status: CIStatus,
850
+ ) -> list[str]:
851
+ """Generate human-readable action items.
852
+
853
+ Args:
854
+ actionable_comments: List of actionable comments.
855
+ ambiguous_comments: List of ambiguous comments.
856
+ threads_data: List of thread data.
857
+ ci_status: Filtered CI status (with excluded checks removed).
858
+
859
+ Returns:
860
+ List of human-readable action item strings.
861
+ """
862
+ action_items: list[str] = []
863
+
864
+ # CI status items (using filtered state that respects --exclude-checks)
865
+ if ci_status.state == "pending":
866
+ action_items.append("CI checks are still running - wait for completion")
867
+ elif ci_status.state == "failure":
868
+ action_items.append("CI checks are failing - fix build/test errors")
869
+
870
+ # Thread items
871
+ unresolved = sum(1 for t in threads_data if not t.get("is_resolved", False))
872
+ if unresolved > 0:
873
+ action_items.append(
874
+ f"{unresolved} unresolved review thread{'s' if unresolved != 1 else ''}"
875
+ )
876
+
877
+ # Actionable comment items
878
+ if actionable_comments:
879
+ # Group by priority
880
+ critical = sum(1 for c in actionable_comments if c.priority == Priority.CRITICAL)
881
+ major = sum(1 for c in actionable_comments if c.priority == Priority.MAJOR)
882
+ minor = sum(1 for c in actionable_comments if c.priority == Priority.MINOR)
883
+ other = len(actionable_comments) - critical - major - minor
884
+
885
+ if critical > 0:
886
+ issues = "issue" if critical == 1 else "issues"
887
+ needs = "needs" if critical == 1 else "need"
888
+ action_items.append(f"{critical} critical {issues} {needs} immediate attention")
889
+ if major > 0:
890
+ issues = "issue" if major == 1 else "issues"
891
+ action_items.append(f"{major} major {issues} must be fixed before merge")
892
+ if minor > 0:
893
+ issues = "issue" if minor == 1 else "issues"
894
+ action_items.append(f"{minor} minor {issues} should be addressed")
895
+ if other > 0:
896
+ comments = "comment" if other == 1 else "comments"
897
+ needs = "needs" if other == 1 else "need"
898
+ action_items.append(f"{other} actionable {comments} {needs} addressing")
899
+
900
+ # Ambiguous comment items
901
+ if ambiguous_comments:
902
+ action_items.append(
903
+ f"{len(ambiguous_comments)} comment{'s' if len(ambiguous_comments) != 1 else ''} "
904
+ f"require{'s' if len(ambiguous_comments) == 1 else ''} investigation (ambiguous)"
905
+ )
906
+
907
+ return action_items
908
+
909
+ def _determine_status(
910
+ self,
911
+ ci_status: CIStatus,
912
+ threads: ThreadSummary,
913
+ actionable_comments: list[Comment],
914
+ ambiguous_comments: list[Comment],
915
+ ) -> PRStatus:
916
+ """Determine final PR status using the decision tree.
917
+
918
+ Decision tree (in order):
919
+ 1. CI pending/failing -> CI_FAILING
920
+ 2. Unresolved threads -> UNRESOLVED_THREADS
921
+ 3. Actionable/ambiguous comments -> ACTION_REQUIRED
922
+ 4. All clear -> READY
923
+
924
+ Args:
925
+ ci_status: CI check status.
926
+ threads: Thread summary.
927
+ actionable_comments: List of actionable comments.
928
+ ambiguous_comments: List of ambiguous comments.
929
+
930
+ Returns:
931
+ Final PRStatus enum value.
932
+ """
933
+ # Check CI status first
934
+ if ci_status.state in ("pending", "failure", "error"):
935
+ return PRStatus.CI_FAILING
936
+
937
+ # Check for unresolved threads
938
+ if threads.unresolved > 0:
939
+ return PRStatus.UNRESOLVED_THREADS
940
+
941
+ # Check for actionable or ambiguous comments
942
+ if actionable_comments or ambiguous_comments:
943
+ return PRStatus.ACTION_REQUIRED
944
+
945
+ # All clear!
946
+ return PRStatus.READY
947
+
948
+ def _cache_stable_comment(
949
+ self,
950
+ owner: str,
951
+ repo: str,
952
+ comment_data: dict[str, Any],
953
+ classification: CommentClassification,
954
+ ) -> None:
955
+ """Cache a comment if its classification is stable (NON_ACTIONABLE).
956
+
957
+ Only NON_ACTIONABLE comments are cached because their classification
958
+ is unlikely to change. ACTIONABLE and AMBIGUOUS comments are volatile
959
+ and should be re-evaluated on each run.
960
+
961
+ Args:
962
+ owner: Repository owner.
963
+ repo: Repository name.
964
+ comment_data: Dictionary containing comment data.
965
+ classification: The classification result for this comment.
966
+ """
967
+ if classification != CommentClassification.NON_ACTIONABLE:
968
+ return # Don't cache ACTIONABLE or AMBIGUOUS - volatile
969
+
970
+ comment_id = str(comment_data.get("id", ""))
971
+ if not comment_id:
972
+ return
973
+
974
+ cache_key = build_cache_key("comment", owner, repo, comment_id)
975
+ cache_value = json.dumps(
976
+ {
977
+ "raw": comment_data,
978
+ "classification": classification.value,
979
+ "cached_at": comment_data.get("updated_at") or comment_data.get("created_at", ""),
980
+ }
981
+ )
982
+ self._container.cache.set(cache_key, cache_value, CACHE_TTL_STABLE_COMMENT)