gtg 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,312 @@
1
+ """Data models for GoodToMerge PR analysis.
2
+
3
+ This module defines all the Pydantic models and enums used throughout
4
+ the GoodToMerge library for representing PR analysis results, comments,
5
+ CI status, and thread information.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from enum import Enum
11
+ from typing import Optional
12
+
13
+ from pydantic import BaseModel
14
+
15
+
16
+ class PRStatus(str, Enum):
17
+ """Final PR status - maps to exit codes.
18
+
19
+ Exit code mapping:
20
+ READY: 0 - All clear, ready to merge
21
+ ACTION_REQUIRED: 1 - Actionable comments exist
22
+ UNRESOLVED_THREADS: 2 - Unresolved threads exist
23
+ CI_FAILING: 3 - CI/CD checks failing
24
+ ERROR: 4 - Error fetching data
25
+ """
26
+
27
+ READY = "READY"
28
+ ACTION_REQUIRED = "ACTION_REQUIRED"
29
+ UNRESOLVED_THREADS = "UNRESOLVED"
30
+ CI_FAILING = "CI_FAILING"
31
+ ERROR = "ERROR"
32
+
33
+
34
+ class CommentClassification(str, Enum):
35
+ """Comment classification result.
36
+
37
+ Used to categorize comments from automated reviewers and humans
38
+ to determine what action (if any) is required.
39
+ """
40
+
41
+ ACTIONABLE = "ACTIONABLE"
42
+ """Comment that must be addressed before merge."""
43
+
44
+ NON_ACTIONABLE = "NON_ACTIONABLE"
45
+ """Comment that can be safely ignored (informational, nitpick, etc.)."""
46
+
47
+ AMBIGUOUS = "AMBIGUOUS"
48
+ """Comment that needs agent investigation - cannot determine classification."""
49
+
50
+
51
+ class Priority(str, Enum):
52
+ """Comment priority level.
53
+
54
+ Used to sort and prioritize actionable comments. Maps to severity
55
+ indicators from various automated reviewers.
56
+ """
57
+
58
+ CRITICAL = "CRITICAL"
59
+ """Must fix immediately - blocking issue."""
60
+
61
+ MAJOR = "MAJOR"
62
+ """Must fix before merge - significant issue."""
63
+
64
+ MINOR = "MINOR"
65
+ """Should fix - notable but not blocking."""
66
+
67
+ TRIVIAL = "TRIVIAL"
68
+ """Nice to fix - minor improvement."""
69
+
70
+ UNKNOWN = "UNKNOWN"
71
+ """Could not determine priority."""
72
+
73
+
74
+ class ReviewerType(str, Enum):
75
+ """Automated reviewer identification.
76
+
77
+ Used to select the appropriate parser for processing comments
78
+ from different automated code review tools.
79
+ """
80
+
81
+ CODERABBIT = "coderabbit"
82
+ """CodeRabbit AI code reviewer."""
83
+
84
+ GREPTILE = "greptile"
85
+ """Greptile code reviewer."""
86
+
87
+ CLAUDE = "claude"
88
+ """Claude Code reviewer."""
89
+
90
+ CURSOR = "cursor"
91
+ """Cursor/Bugbot code reviewer."""
92
+
93
+ HUMAN = "human"
94
+ """Human reviewer (not automated)."""
95
+
96
+ UNKNOWN = "unknown"
97
+ """Unknown reviewer type."""
98
+
99
+
100
+ class Comment(BaseModel):
101
+ """Individual comment with classification.
102
+
103
+ Represents a single comment from a PR review, including its
104
+ classification result and metadata for addressing it.
105
+ """
106
+
107
+ id: str
108
+ """Unique identifier for the comment."""
109
+
110
+ author: str
111
+ """Username of the comment author."""
112
+
113
+ reviewer_type: ReviewerType
114
+ """Type of reviewer that posted this comment."""
115
+
116
+ body: str
117
+ """Full text content of the comment."""
118
+
119
+ classification: CommentClassification
120
+ """Classification result (ACTIONABLE, NON_ACTIONABLE, AMBIGUOUS)."""
121
+
122
+ priority: Priority
123
+ """Priority level for actionable comments."""
124
+
125
+ requires_investigation: bool
126
+ """True if AMBIGUOUS and needs agent investigation."""
127
+
128
+ thread_id: Optional[str]
129
+ """ID of the review thread this comment belongs to, if any."""
130
+
131
+ is_resolved: bool
132
+ """Whether the thread containing this comment is resolved."""
133
+
134
+ is_outdated: bool
135
+ """Whether this comment is outdated (code has changed)."""
136
+
137
+ file_path: Optional[str]
138
+ """Path to the file this comment references, if any."""
139
+
140
+ line_number: Optional[int]
141
+ """Line number in the file this comment references, if any."""
142
+
143
+ created_at: str
144
+ """ISO 8601 timestamp when the comment was created."""
145
+
146
+ addressed_in_commit: Optional[str]
147
+ """SHA of commit that addressed this comment, if known."""
148
+
149
+ url: Optional[str] = None
150
+ """URL to view this comment on GitHub, for agent workflows."""
151
+
152
+
153
+ class CICheck(BaseModel):
154
+ """Individual CI check status.
155
+
156
+ Represents a single CI/CD check run (e.g., build, test, lint).
157
+ """
158
+
159
+ name: str
160
+ """Name of the CI check."""
161
+
162
+ status: str
163
+ """Current status: 'success', 'failure', or 'pending'."""
164
+
165
+ conclusion: Optional[str]
166
+ """Final conclusion of the check, if completed."""
167
+
168
+ url: Optional[str]
169
+ """URL to the check details/logs."""
170
+
171
+
172
+ class CIStatus(BaseModel):
173
+ """Aggregate CI status.
174
+
175
+ Provides summary statistics and individual check details
176
+ for all CI/CD checks on a PR.
177
+ """
178
+
179
+ state: str
180
+ """Overall state: 'success', 'failure', or 'pending'."""
181
+
182
+ total_checks: int
183
+ """Total number of CI checks."""
184
+
185
+ passed: int
186
+ """Number of checks that passed."""
187
+
188
+ failed: int
189
+ """Number of checks that failed."""
190
+
191
+ pending: int
192
+ """Number of checks still running or pending."""
193
+
194
+ checks: list[CICheck]
195
+ """List of individual CI check results."""
196
+
197
+
198
+ class UnresolvedThread(BaseModel):
199
+ """Detailed information about an unresolved review thread.
200
+
201
+ Contains all data an agent needs to resolve a thread without
202
+ additional API calls, including the GraphQL node ID for the
203
+ resolution mutation.
204
+ """
205
+
206
+ id: str
207
+ """GraphQL node ID for resolution mutation."""
208
+
209
+ url: Optional[str]
210
+ """Link to thread in GitHub UI (if available)."""
211
+
212
+ path: str
213
+ """File path the thread is attached to."""
214
+
215
+ line: Optional[int]
216
+ """Line number in the diff."""
217
+
218
+ author: str
219
+ """Username of the first comment author."""
220
+
221
+ body_preview: str
222
+ """First 200 characters of the first comment body."""
223
+
224
+
225
+ class ThreadSummary(BaseModel):
226
+ """Thread resolution summary.
227
+
228
+ Provides counts of review thread states for determining
229
+ if unresolved discussions remain.
230
+ """
231
+
232
+ total: int
233
+ """Total number of review threads."""
234
+
235
+ resolved: int
236
+ """Number of resolved threads."""
237
+
238
+ unresolved: int
239
+ """Number of unresolved threads."""
240
+
241
+ outdated: int
242
+ """Number of outdated threads (code changed since comment)."""
243
+
244
+ unresolved_threads: list[UnresolvedThread]
245
+ """Detailed information about each unresolved thread for agent workflows."""
246
+
247
+
248
+ class CacheStats(BaseModel):
249
+ """Cache performance metrics.
250
+
251
+ Used to report cache effectiveness for performance tuning.
252
+ """
253
+
254
+ hits: int
255
+ """Number of cache hits."""
256
+
257
+ misses: int
258
+ """Number of cache misses."""
259
+
260
+ hit_rate: float
261
+ """Cache hit rate as a decimal (0.0 to 1.0)."""
262
+
263
+
264
+ class PRAnalysisResult(BaseModel):
265
+ """Complete PR analysis result - main output.
266
+
267
+ This is the primary output model returned by PRAnalyzer.analyze().
268
+ It contains all information needed for an AI agent to determine
269
+ the next action for a PR.
270
+ """
271
+
272
+ status: PRStatus
273
+ """Final PR status determining readiness to merge."""
274
+
275
+ pr_number: int
276
+ """PR number being analyzed."""
277
+
278
+ repo_owner: str
279
+ """Repository owner (organization or user)."""
280
+
281
+ repo_name: str
282
+ """Repository name."""
283
+
284
+ latest_commit_sha: str
285
+ """SHA of the latest commit on the PR branch."""
286
+
287
+ latest_commit_timestamp: str
288
+ """ISO 8601 timestamp of the latest commit."""
289
+
290
+ ci_status: CIStatus
291
+ """Aggregate CI/CD check status."""
292
+
293
+ threads: ThreadSummary
294
+ """Summary of review thread resolution status."""
295
+
296
+ comments: list[Comment]
297
+ """All comments on the PR with classifications."""
298
+
299
+ actionable_comments: list[Comment]
300
+ """Filtered list of comments requiring action."""
301
+
302
+ ambiguous_comments: list[Comment]
303
+ """Filtered list of comments requiring investigation."""
304
+
305
+ action_items: list[str]
306
+ """Human-readable list of actions needed."""
307
+
308
+ needs_action: bool
309
+ """True if any action is required before merge."""
310
+
311
+ cache_stats: Optional[CacheStats]
312
+ """Cache performance metrics, if caching is enabled."""
@@ -0,0 +1,144 @@
1
+ """Input validation utilities for GoodToMerge.
2
+
3
+ This module provides validation functions for all external inputs to ensure
4
+ security and correctness. All GitHub identifiers, PR numbers, and cache keys
5
+ are validated before use.
6
+
7
+ Security: These functions implement defense-in-depth validation. Even if
8
+ callers have pre-validated inputs, these functions perform additional checks
9
+ to prevent injection attacks and ensure data integrity.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import re
15
+
16
+ # GitHub identifier pattern: alphanumeric, dots, hyphens, underscores
17
+ # Must start and end with alphanumeric
18
+ # Maximum length is 39 characters (GitHub's limit)
19
+ GITHUB_ID_PATTERN = re.compile(r"^[a-zA-Z0-9]([a-zA-Z0-9._-]{0,37}[a-zA-Z0-9])?$")
20
+
21
+
22
+ def validate_github_identifier(value: str, field_name: str) -> str:
23
+ """Validate GitHub owner/repo name format.
24
+
25
+ GitHub identifiers (usernames, organization names, repository names) must:
26
+ - Be non-empty
27
+ - Be at most 39 characters long
28
+ - Contain only alphanumeric characters, dots, hyphens, and underscores
29
+ - Start and end with an alphanumeric character
30
+
31
+ Args:
32
+ value: The identifier value to validate.
33
+ field_name: The name of the field being validated (for error messages).
34
+
35
+ Returns:
36
+ The validated identifier (unchanged if valid).
37
+
38
+ Raises:
39
+ ValueError: If validation fails with a descriptive error message.
40
+
41
+ Examples:
42
+ >>> validate_github_identifier("my-org", "owner")
43
+ 'my-org'
44
+ >>> validate_github_identifier("my_repo.name", "repo")
45
+ 'my_repo.name'
46
+ >>> validate_github_identifier("", "owner") # doctest: +IGNORE_EXCEPTION_DETAIL
47
+ Traceback (most recent call last):
48
+ ...
49
+ ValueError: owner cannot be empty
50
+ """
51
+ if not value:
52
+ raise ValueError(f"{field_name} cannot be empty")
53
+
54
+ if len(value) > 39: # GitHub max length
55
+ raise ValueError(f"{field_name} exceeds maximum length (39 chars)")
56
+
57
+ if not GITHUB_ID_PATTERN.match(value):
58
+ raise ValueError(
59
+ f"{field_name} contains invalid characters. "
60
+ "Must be alphanumeric with ._- allowed, "
61
+ "starting and ending with alphanumeric."
62
+ )
63
+
64
+ return value
65
+
66
+
67
+ def validate_pr_number(value: int) -> int:
68
+ """Validate PR number is a positive integer within bounds.
69
+
70
+ PR numbers must be:
71
+ - Greater than zero (positive)
72
+ - At most 2147483647 (max int32 value)
73
+
74
+ Args:
75
+ value: The PR number to validate.
76
+
77
+ Returns:
78
+ The validated PR number (unchanged if valid).
79
+
80
+ Raises:
81
+ ValueError: If the PR number is not positive or exceeds maximum value.
82
+
83
+ Examples:
84
+ >>> validate_pr_number(123)
85
+ 123
86
+ >>> validate_pr_number(0) # doctest: +IGNORE_EXCEPTION_DETAIL
87
+ Traceback (most recent call last):
88
+ ...
89
+ ValueError: PR number must be positive
90
+ """
91
+ if value <= 0:
92
+ raise ValueError("PR number must be positive")
93
+ if value > 2147483647: # Max int32
94
+ raise ValueError("PR number exceeds maximum value")
95
+ return value
96
+
97
+
98
+ def build_cache_key(*parts: str) -> str:
99
+ """Build a cache key from validated parts.
100
+
101
+ Constructs a colon-delimited cache key from the provided parts.
102
+ This function performs defense-in-depth validation to ensure that
103
+ special characters that could cause cache key collisions or glob
104
+ pattern issues are rejected.
105
+
106
+ Rejected characters:
107
+ - Colon (:) - Used as the key delimiter
108
+ - Asterisk (*) - Glob wildcard that could cause unintended pattern matches
109
+ - Question mark (?) - Glob single-character wildcard
110
+
111
+ All parts must be pre-validated via validate_github_identifier()
112
+ or validate_pr_number() before calling this function, but this
113
+ function still performs validation as a defense-in-depth measure.
114
+
115
+ Args:
116
+ *parts: Variable number of string parts to join into a cache key.
117
+ Each part will be converted to string if not already.
118
+
119
+ Returns:
120
+ A colon-delimited cache key string.
121
+
122
+ Raises:
123
+ ValueError: If any part contains invalid characters (colon, asterisk,
124
+ question mark) or is empty.
125
+
126
+ Examples:
127
+ >>> build_cache_key("pr", "myorg", "myrepo", "123", "meta")
128
+ 'pr:myorg:myrepo:123:meta'
129
+ >>> build_cache_key("pr", "org", "repo", 456, "ci")
130
+ 'pr:org:repo:456:ci'
131
+ >>> build_cache_key("pr", "my:org", "repo", "123") # doctest: +IGNORE_EXCEPTION_DETAIL
132
+ Traceback (most recent call last):
133
+ ...
134
+ ValueError: Invalid character in cache key part: my:org
135
+ """
136
+ # Double-check no special characters that could cause issues
137
+ for part in parts:
138
+ str_part = str(part)
139
+ if not str_part:
140
+ raise ValueError("Cache key parts cannot be empty")
141
+ if ":" in str_part or "*" in str_part or "?" in str_part:
142
+ raise ValueError(f"Invalid character in cache key part: {part}")
143
+
144
+ return ":".join(str(p) for p in parts)
File without changes
@@ -0,0 +1,188 @@
1
+ """Claude Code parser for GoodToMerge.
2
+
3
+ This module implements the ReviewerParser interface for parsing comments
4
+ from Claude Code (Anthropic's AI coding assistant). It classifies comments
5
+ based on pattern matching to determine actionability and priority.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from typing import TYPE_CHECKING
12
+
13
+ from goodtogo.core.interfaces import ReviewerParser
14
+ from goodtogo.core.models import CommentClassification, Priority, ReviewerType
15
+
16
+ if TYPE_CHECKING:
17
+ pass
18
+
19
+
20
+ class ClaudeCodeParser(ReviewerParser):
21
+ """Parser for Claude Code automated reviewer comments.
22
+
23
+ Identifies and classifies comments from Claude Code based on author
24
+ patterns and body content. Uses keyword-based heuristics to determine
25
+ comment classification and priority.
26
+
27
+ Author patterns:
28
+ - claude-code[bot]
29
+ - anthropic-claude[bot]
30
+
31
+ Body signature fallback:
32
+ - Contains Claude Code signature patterns
33
+
34
+ Classification rules:
35
+ - ACTIONABLE: Contains "must", "should fix", "error", "bug"
36
+ - NON_ACTIONABLE: LGTM / approval keywords
37
+ - AMBIGUOUS: Contains "consider", "suggestion", "might" or unclassified
38
+ """
39
+
40
+ # Author patterns that identify Claude Code comments
41
+ _AUTHOR_PATTERNS: tuple[str, ...] = (
42
+ "claude[bot]",
43
+ "claude-code[bot]",
44
+ "anthropic-claude[bot]",
45
+ )
46
+
47
+ # Body patterns that identify Claude Code comments (fallback)
48
+ _BODY_SIGNATURE_PATTERNS: tuple[re.Pattern[str], ...] = (
49
+ re.compile(r"Generated with Claude Code", re.IGNORECASE),
50
+ re.compile(r"Claude Code", re.IGNORECASE),
51
+ )
52
+
53
+ # Patterns indicating actionable comments (case-insensitive)
54
+ _ACTIONABLE_PATTERNS: tuple[re.Pattern[str], ...] = (
55
+ re.compile(r"\bmust\b", re.IGNORECASE),
56
+ re.compile(r"\bshould\s+fix\b", re.IGNORECASE),
57
+ re.compile(r"\berror\b", re.IGNORECASE),
58
+ re.compile(r"\bbug\b", re.IGNORECASE),
59
+ )
60
+
61
+ # Patterns indicating non-actionable approval comments (case-insensitive)
62
+ _APPROVAL_PATTERNS: tuple[re.Pattern[str], ...] = (
63
+ re.compile(r"\bLGTM\b", re.IGNORECASE),
64
+ re.compile(r"\blooks\s+good\b", re.IGNORECASE),
65
+ re.compile(r"\bapproved?\b", re.IGNORECASE),
66
+ re.compile(r"\bship\s+it\b", re.IGNORECASE),
67
+ )
68
+
69
+ # Patterns indicating task completion summaries (non-actionable)
70
+ # These are automated review summaries, not actionable comments
71
+ _SUMMARY_PATTERNS: tuple[re.Pattern[str], ...] = (
72
+ # "**Claude finished @username's task**" header (username can have hyphens)
73
+ re.compile(r"\*\*Claude finished @[\w-]+'s task\*\*", re.IGNORECASE),
74
+ # "Claude finished reviewing" pattern
75
+ re.compile(r"Claude finished reviewing", re.IGNORECASE),
76
+ # Review summary headers
77
+ re.compile(r"^###?\s*(?:PR\s+)?Review(?:\s+Summary)?:", re.MULTILINE | re.IGNORECASE),
78
+ # Recommendation line at end of reviews
79
+ re.compile(r"^##?\s*Recommendation\s*$", re.MULTILINE | re.IGNORECASE),
80
+ # "Overall Assessment" sections
81
+ re.compile(r"^##?\s*Overall Assessment\s*$", re.MULTILINE | re.IGNORECASE),
82
+ )
83
+
84
+ # Patterns indicating ambiguous/suggestion comments (case-insensitive)
85
+ _SUGGESTION_PATTERNS: tuple[re.Pattern[str], ...] = (
86
+ re.compile(r"\bconsider\b", re.IGNORECASE),
87
+ re.compile(r"\bsuggestion\b", re.IGNORECASE),
88
+ re.compile(r"\bmight\b", re.IGNORECASE),
89
+ )
90
+
91
+ @property
92
+ def reviewer_type(self) -> ReviewerType:
93
+ """Return the reviewer type this parser handles.
94
+
95
+ Returns:
96
+ ReviewerType.CLAUDE for Claude Code comments.
97
+ """
98
+ return ReviewerType.CLAUDE
99
+
100
+ def can_parse(self, author: str, body: str) -> bool:
101
+ """Check if this parser can handle the comment.
102
+
103
+ Identifies Claude Code comments by:
104
+ 1. Matching author name against known bot patterns
105
+ 2. Checking body for Claude Code signature patterns (fallback)
106
+
107
+ Args:
108
+ author: Comment author's username/login.
109
+ body: Comment body text.
110
+
111
+ Returns:
112
+ True if the comment appears to be from Claude Code.
113
+ """
114
+ # Check author patterns first (most reliable)
115
+ author_lower = author.lower()
116
+ for author_pattern in self._AUTHOR_PATTERNS:
117
+ if author_pattern.lower() == author_lower:
118
+ return True
119
+
120
+ # Fallback: check body for Claude signature
121
+ for body_pattern in self._BODY_SIGNATURE_PATTERNS:
122
+ if body_pattern.search(body):
123
+ return True
124
+
125
+ return False
126
+
127
+ def parse(self, comment: dict) -> tuple[CommentClassification, Priority, bool]:
128
+ """Parse comment and return classification.
129
+
130
+ Classifies Claude Code comments based on keyword patterns:
131
+ - Actionable: Contains "must", "should fix", "error", "bug"
132
+ - Non-actionable: LGTM / approval keywords
133
+ - Ambiguous: Contains "consider", "suggestion", "might" or unclassified
134
+
135
+ Args:
136
+ comment: Dictionary containing comment data with 'body' key.
137
+
138
+ Returns:
139
+ Tuple of (classification, priority, requires_investigation):
140
+ - classification: ACTIONABLE, NON_ACTIONABLE, or AMBIGUOUS
141
+ - priority: MINOR for actionable, UNKNOWN otherwise
142
+ - requires_investigation: True for AMBIGUOUS classification
143
+ """
144
+ body = comment.get("body", "")
145
+
146
+ # Check for task completion summaries first (these are informational)
147
+ for pattern in self._SUMMARY_PATTERNS:
148
+ if pattern.search(body):
149
+ return (
150
+ CommentClassification.NON_ACTIONABLE,
151
+ Priority.UNKNOWN,
152
+ False,
153
+ )
154
+
155
+ # Check for actionable patterns (highest priority for non-summary comments)
156
+ for pattern in self._ACTIONABLE_PATTERNS:
157
+ if pattern.search(body):
158
+ return (
159
+ CommentClassification.ACTIONABLE,
160
+ Priority.MINOR,
161
+ False,
162
+ )
163
+
164
+ # Check for approval/LGTM patterns (non-actionable)
165
+ for pattern in self._APPROVAL_PATTERNS:
166
+ if pattern.search(body):
167
+ return (
168
+ CommentClassification.NON_ACTIONABLE,
169
+ Priority.UNKNOWN,
170
+ False,
171
+ )
172
+
173
+ # Check for suggestion patterns (ambiguous, needs investigation)
174
+ for pattern in self._SUGGESTION_PATTERNS:
175
+ if pattern.search(body):
176
+ return (
177
+ CommentClassification.AMBIGUOUS,
178
+ Priority.UNKNOWN,
179
+ True,
180
+ )
181
+
182
+ # Default: ambiguous, requires investigation
183
+ # Per design spec: "Never silently skip ambiguous comments"
184
+ return (
185
+ CommentClassification.AMBIGUOUS,
186
+ Priority.UNKNOWN,
187
+ True,
188
+ )