gtg 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goodtogo/__init__.py +66 -0
- goodtogo/adapters/__init__.py +22 -0
- goodtogo/adapters/agent_state.py +490 -0
- goodtogo/adapters/cache_memory.py +208 -0
- goodtogo/adapters/cache_sqlite.py +305 -0
- goodtogo/adapters/github.py +523 -0
- goodtogo/adapters/time_provider.py +123 -0
- goodtogo/cli.py +311 -0
- goodtogo/container.py +313 -0
- goodtogo/core/__init__.py +0 -0
- goodtogo/core/analyzer.py +982 -0
- goodtogo/core/errors.py +100 -0
- goodtogo/core/interfaces.py +388 -0
- goodtogo/core/models.py +312 -0
- goodtogo/core/validation.py +144 -0
- goodtogo/parsers/__init__.py +0 -0
- goodtogo/parsers/claude.py +188 -0
- goodtogo/parsers/coderabbit.py +352 -0
- goodtogo/parsers/cursor.py +135 -0
- goodtogo/parsers/generic.py +192 -0
- goodtogo/parsers/greptile.py +249 -0
- gtg-0.4.0.dist-info/METADATA +278 -0
- gtg-0.4.0.dist-info/RECORD +27 -0
- gtg-0.4.0.dist-info/WHEEL +5 -0
- gtg-0.4.0.dist-info/entry_points.txt +2 -0
- gtg-0.4.0.dist-info/licenses/LICENSE +21 -0
- gtg-0.4.0.dist-info/top_level.txt +1 -0
goodtogo/core/models.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""Data models for GoodToMerge PR analysis.
|
|
2
|
+
|
|
3
|
+
This module defines all the Pydantic models and enums used throughout
|
|
4
|
+
the GoodToMerge library for representing PR analysis results, comments,
|
|
5
|
+
CI status, and thread information.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PRStatus(str, Enum):
|
|
17
|
+
"""Final PR status - maps to exit codes.
|
|
18
|
+
|
|
19
|
+
Exit code mapping:
|
|
20
|
+
READY: 0 - All clear, ready to merge
|
|
21
|
+
ACTION_REQUIRED: 1 - Actionable comments exist
|
|
22
|
+
UNRESOLVED_THREADS: 2 - Unresolved threads exist
|
|
23
|
+
CI_FAILING: 3 - CI/CD checks failing
|
|
24
|
+
ERROR: 4 - Error fetching data
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
READY = "READY"
|
|
28
|
+
ACTION_REQUIRED = "ACTION_REQUIRED"
|
|
29
|
+
UNRESOLVED_THREADS = "UNRESOLVED"
|
|
30
|
+
CI_FAILING = "CI_FAILING"
|
|
31
|
+
ERROR = "ERROR"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class CommentClassification(str, Enum):
|
|
35
|
+
"""Comment classification result.
|
|
36
|
+
|
|
37
|
+
Used to categorize comments from automated reviewers and humans
|
|
38
|
+
to determine what action (if any) is required.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
ACTIONABLE = "ACTIONABLE"
|
|
42
|
+
"""Comment that must be addressed before merge."""
|
|
43
|
+
|
|
44
|
+
NON_ACTIONABLE = "NON_ACTIONABLE"
|
|
45
|
+
"""Comment that can be safely ignored (informational, nitpick, etc.)."""
|
|
46
|
+
|
|
47
|
+
AMBIGUOUS = "AMBIGUOUS"
|
|
48
|
+
"""Comment that needs agent investigation - cannot determine classification."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Priority(str, Enum):
|
|
52
|
+
"""Comment priority level.
|
|
53
|
+
|
|
54
|
+
Used to sort and prioritize actionable comments. Maps to severity
|
|
55
|
+
indicators from various automated reviewers.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
CRITICAL = "CRITICAL"
|
|
59
|
+
"""Must fix immediately - blocking issue."""
|
|
60
|
+
|
|
61
|
+
MAJOR = "MAJOR"
|
|
62
|
+
"""Must fix before merge - significant issue."""
|
|
63
|
+
|
|
64
|
+
MINOR = "MINOR"
|
|
65
|
+
"""Should fix - notable but not blocking."""
|
|
66
|
+
|
|
67
|
+
TRIVIAL = "TRIVIAL"
|
|
68
|
+
"""Nice to fix - minor improvement."""
|
|
69
|
+
|
|
70
|
+
UNKNOWN = "UNKNOWN"
|
|
71
|
+
"""Could not determine priority."""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ReviewerType(str, Enum):
|
|
75
|
+
"""Automated reviewer identification.
|
|
76
|
+
|
|
77
|
+
Used to select the appropriate parser for processing comments
|
|
78
|
+
from different automated code review tools.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
CODERABBIT = "coderabbit"
|
|
82
|
+
"""CodeRabbit AI code reviewer."""
|
|
83
|
+
|
|
84
|
+
GREPTILE = "greptile"
|
|
85
|
+
"""Greptile code reviewer."""
|
|
86
|
+
|
|
87
|
+
CLAUDE = "claude"
|
|
88
|
+
"""Claude Code reviewer."""
|
|
89
|
+
|
|
90
|
+
CURSOR = "cursor"
|
|
91
|
+
"""Cursor/Bugbot code reviewer."""
|
|
92
|
+
|
|
93
|
+
HUMAN = "human"
|
|
94
|
+
"""Human reviewer (not automated)."""
|
|
95
|
+
|
|
96
|
+
UNKNOWN = "unknown"
|
|
97
|
+
"""Unknown reviewer type."""
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class Comment(BaseModel):
|
|
101
|
+
"""Individual comment with classification.
|
|
102
|
+
|
|
103
|
+
Represents a single comment from a PR review, including its
|
|
104
|
+
classification result and metadata for addressing it.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
id: str
|
|
108
|
+
"""Unique identifier for the comment."""
|
|
109
|
+
|
|
110
|
+
author: str
|
|
111
|
+
"""Username of the comment author."""
|
|
112
|
+
|
|
113
|
+
reviewer_type: ReviewerType
|
|
114
|
+
"""Type of reviewer that posted this comment."""
|
|
115
|
+
|
|
116
|
+
body: str
|
|
117
|
+
"""Full text content of the comment."""
|
|
118
|
+
|
|
119
|
+
classification: CommentClassification
|
|
120
|
+
"""Classification result (ACTIONABLE, NON_ACTIONABLE, AMBIGUOUS)."""
|
|
121
|
+
|
|
122
|
+
priority: Priority
|
|
123
|
+
"""Priority level for actionable comments."""
|
|
124
|
+
|
|
125
|
+
requires_investigation: bool
|
|
126
|
+
"""True if AMBIGUOUS and needs agent investigation."""
|
|
127
|
+
|
|
128
|
+
thread_id: Optional[str]
|
|
129
|
+
"""ID of the review thread this comment belongs to, if any."""
|
|
130
|
+
|
|
131
|
+
is_resolved: bool
|
|
132
|
+
"""Whether the thread containing this comment is resolved."""
|
|
133
|
+
|
|
134
|
+
is_outdated: bool
|
|
135
|
+
"""Whether this comment is outdated (code has changed)."""
|
|
136
|
+
|
|
137
|
+
file_path: Optional[str]
|
|
138
|
+
"""Path to the file this comment references, if any."""
|
|
139
|
+
|
|
140
|
+
line_number: Optional[int]
|
|
141
|
+
"""Line number in the file this comment references, if any."""
|
|
142
|
+
|
|
143
|
+
created_at: str
|
|
144
|
+
"""ISO 8601 timestamp when the comment was created."""
|
|
145
|
+
|
|
146
|
+
addressed_in_commit: Optional[str]
|
|
147
|
+
"""SHA of commit that addressed this comment, if known."""
|
|
148
|
+
|
|
149
|
+
url: Optional[str] = None
|
|
150
|
+
"""URL to view this comment on GitHub, for agent workflows."""
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class CICheck(BaseModel):
|
|
154
|
+
"""Individual CI check status.
|
|
155
|
+
|
|
156
|
+
Represents a single CI/CD check run (e.g., build, test, lint).
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
name: str
|
|
160
|
+
"""Name of the CI check."""
|
|
161
|
+
|
|
162
|
+
status: str
|
|
163
|
+
"""Current status: 'success', 'failure', or 'pending'."""
|
|
164
|
+
|
|
165
|
+
conclusion: Optional[str]
|
|
166
|
+
"""Final conclusion of the check, if completed."""
|
|
167
|
+
|
|
168
|
+
url: Optional[str]
|
|
169
|
+
"""URL to the check details/logs."""
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class CIStatus(BaseModel):
|
|
173
|
+
"""Aggregate CI status.
|
|
174
|
+
|
|
175
|
+
Provides summary statistics and individual check details
|
|
176
|
+
for all CI/CD checks on a PR.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
state: str
|
|
180
|
+
"""Overall state: 'success', 'failure', or 'pending'."""
|
|
181
|
+
|
|
182
|
+
total_checks: int
|
|
183
|
+
"""Total number of CI checks."""
|
|
184
|
+
|
|
185
|
+
passed: int
|
|
186
|
+
"""Number of checks that passed."""
|
|
187
|
+
|
|
188
|
+
failed: int
|
|
189
|
+
"""Number of checks that failed."""
|
|
190
|
+
|
|
191
|
+
pending: int
|
|
192
|
+
"""Number of checks still running or pending."""
|
|
193
|
+
|
|
194
|
+
checks: list[CICheck]
|
|
195
|
+
"""List of individual CI check results."""
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class UnresolvedThread(BaseModel):
|
|
199
|
+
"""Detailed information about an unresolved review thread.
|
|
200
|
+
|
|
201
|
+
Contains all data an agent needs to resolve a thread without
|
|
202
|
+
additional API calls, including the GraphQL node ID for the
|
|
203
|
+
resolution mutation.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
id: str
|
|
207
|
+
"""GraphQL node ID for resolution mutation."""
|
|
208
|
+
|
|
209
|
+
url: Optional[str]
|
|
210
|
+
"""Link to thread in GitHub UI (if available)."""
|
|
211
|
+
|
|
212
|
+
path: str
|
|
213
|
+
"""File path the thread is attached to."""
|
|
214
|
+
|
|
215
|
+
line: Optional[int]
|
|
216
|
+
"""Line number in the diff."""
|
|
217
|
+
|
|
218
|
+
author: str
|
|
219
|
+
"""Username of the first comment author."""
|
|
220
|
+
|
|
221
|
+
body_preview: str
|
|
222
|
+
"""First 200 characters of the first comment body."""
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class ThreadSummary(BaseModel):
|
|
226
|
+
"""Thread resolution summary.
|
|
227
|
+
|
|
228
|
+
Provides counts of review thread states for determining
|
|
229
|
+
if unresolved discussions remain.
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
total: int
|
|
233
|
+
"""Total number of review threads."""
|
|
234
|
+
|
|
235
|
+
resolved: int
|
|
236
|
+
"""Number of resolved threads."""
|
|
237
|
+
|
|
238
|
+
unresolved: int
|
|
239
|
+
"""Number of unresolved threads."""
|
|
240
|
+
|
|
241
|
+
outdated: int
|
|
242
|
+
"""Number of outdated threads (code changed since comment)."""
|
|
243
|
+
|
|
244
|
+
unresolved_threads: list[UnresolvedThread]
|
|
245
|
+
"""Detailed information about each unresolved thread for agent workflows."""
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class CacheStats(BaseModel):
|
|
249
|
+
"""Cache performance metrics.
|
|
250
|
+
|
|
251
|
+
Used to report cache effectiveness for performance tuning.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
hits: int
|
|
255
|
+
"""Number of cache hits."""
|
|
256
|
+
|
|
257
|
+
misses: int
|
|
258
|
+
"""Number of cache misses."""
|
|
259
|
+
|
|
260
|
+
hit_rate: float
|
|
261
|
+
"""Cache hit rate as a decimal (0.0 to 1.0)."""
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class PRAnalysisResult(BaseModel):
|
|
265
|
+
"""Complete PR analysis result - main output.
|
|
266
|
+
|
|
267
|
+
This is the primary output model returned by PRAnalyzer.analyze().
|
|
268
|
+
It contains all information needed for an AI agent to determine
|
|
269
|
+
the next action for a PR.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
status: PRStatus
|
|
273
|
+
"""Final PR status determining readiness to merge."""
|
|
274
|
+
|
|
275
|
+
pr_number: int
|
|
276
|
+
"""PR number being analyzed."""
|
|
277
|
+
|
|
278
|
+
repo_owner: str
|
|
279
|
+
"""Repository owner (organization or user)."""
|
|
280
|
+
|
|
281
|
+
repo_name: str
|
|
282
|
+
"""Repository name."""
|
|
283
|
+
|
|
284
|
+
latest_commit_sha: str
|
|
285
|
+
"""SHA of the latest commit on the PR branch."""
|
|
286
|
+
|
|
287
|
+
latest_commit_timestamp: str
|
|
288
|
+
"""ISO 8601 timestamp of the latest commit."""
|
|
289
|
+
|
|
290
|
+
ci_status: CIStatus
|
|
291
|
+
"""Aggregate CI/CD check status."""
|
|
292
|
+
|
|
293
|
+
threads: ThreadSummary
|
|
294
|
+
"""Summary of review thread resolution status."""
|
|
295
|
+
|
|
296
|
+
comments: list[Comment]
|
|
297
|
+
"""All comments on the PR with classifications."""
|
|
298
|
+
|
|
299
|
+
actionable_comments: list[Comment]
|
|
300
|
+
"""Filtered list of comments requiring action."""
|
|
301
|
+
|
|
302
|
+
ambiguous_comments: list[Comment]
|
|
303
|
+
"""Filtered list of comments requiring investigation."""
|
|
304
|
+
|
|
305
|
+
action_items: list[str]
|
|
306
|
+
"""Human-readable list of actions needed."""
|
|
307
|
+
|
|
308
|
+
needs_action: bool
|
|
309
|
+
"""True if any action is required before merge."""
|
|
310
|
+
|
|
311
|
+
cache_stats: Optional[CacheStats]
|
|
312
|
+
"""Cache performance metrics, if caching is enabled."""
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Input validation utilities for GoodToMerge.
|
|
2
|
+
|
|
3
|
+
This module provides validation functions for all external inputs to ensure
|
|
4
|
+
security and correctness. All GitHub identifiers, PR numbers, and cache keys
|
|
5
|
+
are validated before use.
|
|
6
|
+
|
|
7
|
+
Security: These functions implement defense-in-depth validation. Even if
|
|
8
|
+
callers have pre-validated inputs, these functions perform additional checks
|
|
9
|
+
to prevent injection attacks and ensure data integrity.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
# GitHub identifier pattern: alphanumeric, dots, hyphens, underscores
|
|
17
|
+
# Must start and end with alphanumeric
|
|
18
|
+
# Maximum length is 39 characters (GitHub's limit)
|
|
19
|
+
GITHUB_ID_PATTERN = re.compile(r"^[a-zA-Z0-9]([a-zA-Z0-9._-]{0,37}[a-zA-Z0-9])?$")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def validate_github_identifier(value: str, field_name: str) -> str:
|
|
23
|
+
"""Validate GitHub owner/repo name format.
|
|
24
|
+
|
|
25
|
+
GitHub identifiers (usernames, organization names, repository names) must:
|
|
26
|
+
- Be non-empty
|
|
27
|
+
- Be at most 39 characters long
|
|
28
|
+
- Contain only alphanumeric characters, dots, hyphens, and underscores
|
|
29
|
+
- Start and end with an alphanumeric character
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
value: The identifier value to validate.
|
|
33
|
+
field_name: The name of the field being validated (for error messages).
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The validated identifier (unchanged if valid).
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ValueError: If validation fails with a descriptive error message.
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
>>> validate_github_identifier("my-org", "owner")
|
|
43
|
+
'my-org'
|
|
44
|
+
>>> validate_github_identifier("my_repo.name", "repo")
|
|
45
|
+
'my_repo.name'
|
|
46
|
+
>>> validate_github_identifier("", "owner") # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
47
|
+
Traceback (most recent call last):
|
|
48
|
+
...
|
|
49
|
+
ValueError: owner cannot be empty
|
|
50
|
+
"""
|
|
51
|
+
if not value:
|
|
52
|
+
raise ValueError(f"{field_name} cannot be empty")
|
|
53
|
+
|
|
54
|
+
if len(value) > 39: # GitHub max length
|
|
55
|
+
raise ValueError(f"{field_name} exceeds maximum length (39 chars)")
|
|
56
|
+
|
|
57
|
+
if not GITHUB_ID_PATTERN.match(value):
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"{field_name} contains invalid characters. "
|
|
60
|
+
"Must be alphanumeric with ._- allowed, "
|
|
61
|
+
"starting and ending with alphanumeric."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return value
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def validate_pr_number(value: int) -> int:
|
|
68
|
+
"""Validate PR number is a positive integer within bounds.
|
|
69
|
+
|
|
70
|
+
PR numbers must be:
|
|
71
|
+
- Greater than zero (positive)
|
|
72
|
+
- At most 2147483647 (max int32 value)
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
value: The PR number to validate.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
The validated PR number (unchanged if valid).
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ValueError: If the PR number is not positive or exceeds maximum value.
|
|
82
|
+
|
|
83
|
+
Examples:
|
|
84
|
+
>>> validate_pr_number(123)
|
|
85
|
+
123
|
|
86
|
+
>>> validate_pr_number(0) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
87
|
+
Traceback (most recent call last):
|
|
88
|
+
...
|
|
89
|
+
ValueError: PR number must be positive
|
|
90
|
+
"""
|
|
91
|
+
if value <= 0:
|
|
92
|
+
raise ValueError("PR number must be positive")
|
|
93
|
+
if value > 2147483647: # Max int32
|
|
94
|
+
raise ValueError("PR number exceeds maximum value")
|
|
95
|
+
return value
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def build_cache_key(*parts: str) -> str:
|
|
99
|
+
"""Build a cache key from validated parts.
|
|
100
|
+
|
|
101
|
+
Constructs a colon-delimited cache key from the provided parts.
|
|
102
|
+
This function performs defense-in-depth validation to ensure that
|
|
103
|
+
special characters that could cause cache key collisions or glob
|
|
104
|
+
pattern issues are rejected.
|
|
105
|
+
|
|
106
|
+
Rejected characters:
|
|
107
|
+
- Colon (:) - Used as the key delimiter
|
|
108
|
+
- Asterisk (*) - Glob wildcard that could cause unintended pattern matches
|
|
109
|
+
- Question mark (?) - Glob single-character wildcard
|
|
110
|
+
|
|
111
|
+
All parts must be pre-validated via validate_github_identifier()
|
|
112
|
+
or validate_pr_number() before calling this function, but this
|
|
113
|
+
function still performs validation as a defense-in-depth measure.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
*parts: Variable number of string parts to join into a cache key.
|
|
117
|
+
Each part will be converted to string if not already.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
A colon-delimited cache key string.
|
|
121
|
+
|
|
122
|
+
Raises:
|
|
123
|
+
ValueError: If any part contains invalid characters (colon, asterisk,
|
|
124
|
+
question mark) or is empty.
|
|
125
|
+
|
|
126
|
+
Examples:
|
|
127
|
+
>>> build_cache_key("pr", "myorg", "myrepo", "123", "meta")
|
|
128
|
+
'pr:myorg:myrepo:123:meta'
|
|
129
|
+
>>> build_cache_key("pr", "org", "repo", 456, "ci")
|
|
130
|
+
'pr:org:repo:456:ci'
|
|
131
|
+
>>> build_cache_key("pr", "my:org", "repo", "123") # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
132
|
+
Traceback (most recent call last):
|
|
133
|
+
...
|
|
134
|
+
ValueError: Invalid character in cache key part: my:org
|
|
135
|
+
"""
|
|
136
|
+
# Double-check no special characters that could cause issues
|
|
137
|
+
for part in parts:
|
|
138
|
+
str_part = str(part)
|
|
139
|
+
if not str_part:
|
|
140
|
+
raise ValueError("Cache key parts cannot be empty")
|
|
141
|
+
if ":" in str_part or "*" in str_part or "?" in str_part:
|
|
142
|
+
raise ValueError(f"Invalid character in cache key part: {part}")
|
|
143
|
+
|
|
144
|
+
return ":".join(str(p) for p in parts)
|
|
File without changes
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""Claude Code parser for GoodToMerge.
|
|
2
|
+
|
|
3
|
+
This module implements the ReviewerParser interface for parsing comments
|
|
4
|
+
from Claude Code (Anthropic's AI coding assistant). It classifies comments
|
|
5
|
+
based on pattern matching to determine actionability and priority.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from goodtogo.core.interfaces import ReviewerParser
|
|
14
|
+
from goodtogo.core.models import CommentClassification, Priority, ReviewerType
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ClaudeCodeParser(ReviewerParser):
|
|
21
|
+
"""Parser for Claude Code automated reviewer comments.
|
|
22
|
+
|
|
23
|
+
Identifies and classifies comments from Claude Code based on author
|
|
24
|
+
patterns and body content. Uses keyword-based heuristics to determine
|
|
25
|
+
comment classification and priority.
|
|
26
|
+
|
|
27
|
+
Author patterns:
|
|
28
|
+
- claude-code[bot]
|
|
29
|
+
- anthropic-claude[bot]
|
|
30
|
+
|
|
31
|
+
Body signature fallback:
|
|
32
|
+
- Contains Claude Code signature patterns
|
|
33
|
+
|
|
34
|
+
Classification rules:
|
|
35
|
+
- ACTIONABLE: Contains "must", "should fix", "error", "bug"
|
|
36
|
+
- NON_ACTIONABLE: LGTM / approval keywords
|
|
37
|
+
- AMBIGUOUS: Contains "consider", "suggestion", "might" or unclassified
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# Author patterns that identify Claude Code comments
|
|
41
|
+
_AUTHOR_PATTERNS: tuple[str, ...] = (
|
|
42
|
+
"claude[bot]",
|
|
43
|
+
"claude-code[bot]",
|
|
44
|
+
"anthropic-claude[bot]",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Body patterns that identify Claude Code comments (fallback)
|
|
48
|
+
_BODY_SIGNATURE_PATTERNS: tuple[re.Pattern[str], ...] = (
|
|
49
|
+
re.compile(r"Generated with Claude Code", re.IGNORECASE),
|
|
50
|
+
re.compile(r"Claude Code", re.IGNORECASE),
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Patterns indicating actionable comments (case-insensitive)
|
|
54
|
+
_ACTIONABLE_PATTERNS: tuple[re.Pattern[str], ...] = (
|
|
55
|
+
re.compile(r"\bmust\b", re.IGNORECASE),
|
|
56
|
+
re.compile(r"\bshould\s+fix\b", re.IGNORECASE),
|
|
57
|
+
re.compile(r"\berror\b", re.IGNORECASE),
|
|
58
|
+
re.compile(r"\bbug\b", re.IGNORECASE),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Patterns indicating non-actionable approval comments (case-insensitive)
|
|
62
|
+
_APPROVAL_PATTERNS: tuple[re.Pattern[str], ...] = (
|
|
63
|
+
re.compile(r"\bLGTM\b", re.IGNORECASE),
|
|
64
|
+
re.compile(r"\blooks\s+good\b", re.IGNORECASE),
|
|
65
|
+
re.compile(r"\bapproved?\b", re.IGNORECASE),
|
|
66
|
+
re.compile(r"\bship\s+it\b", re.IGNORECASE),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Patterns indicating task completion summaries (non-actionable)
|
|
70
|
+
# These are automated review summaries, not actionable comments
|
|
71
|
+
_SUMMARY_PATTERNS: tuple[re.Pattern[str], ...] = (
|
|
72
|
+
# "**Claude finished @username's task**" header (username can have hyphens)
|
|
73
|
+
re.compile(r"\*\*Claude finished @[\w-]+'s task\*\*", re.IGNORECASE),
|
|
74
|
+
# "Claude finished reviewing" pattern
|
|
75
|
+
re.compile(r"Claude finished reviewing", re.IGNORECASE),
|
|
76
|
+
# Review summary headers
|
|
77
|
+
re.compile(r"^###?\s*(?:PR\s+)?Review(?:\s+Summary)?:", re.MULTILINE | re.IGNORECASE),
|
|
78
|
+
# Recommendation line at end of reviews
|
|
79
|
+
re.compile(r"^##?\s*Recommendation\s*$", re.MULTILINE | re.IGNORECASE),
|
|
80
|
+
# "Overall Assessment" sections
|
|
81
|
+
re.compile(r"^##?\s*Overall Assessment\s*$", re.MULTILINE | re.IGNORECASE),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Patterns indicating ambiguous/suggestion comments (case-insensitive)
|
|
85
|
+
_SUGGESTION_PATTERNS: tuple[re.Pattern[str], ...] = (
|
|
86
|
+
re.compile(r"\bconsider\b", re.IGNORECASE),
|
|
87
|
+
re.compile(r"\bsuggestion\b", re.IGNORECASE),
|
|
88
|
+
re.compile(r"\bmight\b", re.IGNORECASE),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def reviewer_type(self) -> ReviewerType:
|
|
93
|
+
"""Return the reviewer type this parser handles.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
ReviewerType.CLAUDE for Claude Code comments.
|
|
97
|
+
"""
|
|
98
|
+
return ReviewerType.CLAUDE
|
|
99
|
+
|
|
100
|
+
def can_parse(self, author: str, body: str) -> bool:
|
|
101
|
+
"""Check if this parser can handle the comment.
|
|
102
|
+
|
|
103
|
+
Identifies Claude Code comments by:
|
|
104
|
+
1. Matching author name against known bot patterns
|
|
105
|
+
2. Checking body for Claude Code signature patterns (fallback)
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
author: Comment author's username/login.
|
|
109
|
+
body: Comment body text.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
True if the comment appears to be from Claude Code.
|
|
113
|
+
"""
|
|
114
|
+
# Check author patterns first (most reliable)
|
|
115
|
+
author_lower = author.lower()
|
|
116
|
+
for author_pattern in self._AUTHOR_PATTERNS:
|
|
117
|
+
if author_pattern.lower() == author_lower:
|
|
118
|
+
return True
|
|
119
|
+
|
|
120
|
+
# Fallback: check body for Claude signature
|
|
121
|
+
for body_pattern in self._BODY_SIGNATURE_PATTERNS:
|
|
122
|
+
if body_pattern.search(body):
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
return False
|
|
126
|
+
|
|
127
|
+
def parse(self, comment: dict) -> tuple[CommentClassification, Priority, bool]:
|
|
128
|
+
"""Parse comment and return classification.
|
|
129
|
+
|
|
130
|
+
Classifies Claude Code comments based on keyword patterns:
|
|
131
|
+
- Actionable: Contains "must", "should fix", "error", "bug"
|
|
132
|
+
- Non-actionable: LGTM / approval keywords
|
|
133
|
+
- Ambiguous: Contains "consider", "suggestion", "might" or unclassified
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
comment: Dictionary containing comment data with 'body' key.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Tuple of (classification, priority, requires_investigation):
|
|
140
|
+
- classification: ACTIONABLE, NON_ACTIONABLE, or AMBIGUOUS
|
|
141
|
+
- priority: MINOR for actionable, UNKNOWN otherwise
|
|
142
|
+
- requires_investigation: True for AMBIGUOUS classification
|
|
143
|
+
"""
|
|
144
|
+
body = comment.get("body", "")
|
|
145
|
+
|
|
146
|
+
# Check for task completion summaries first (these are informational)
|
|
147
|
+
for pattern in self._SUMMARY_PATTERNS:
|
|
148
|
+
if pattern.search(body):
|
|
149
|
+
return (
|
|
150
|
+
CommentClassification.NON_ACTIONABLE,
|
|
151
|
+
Priority.UNKNOWN,
|
|
152
|
+
False,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Check for actionable patterns (highest priority for non-summary comments)
|
|
156
|
+
for pattern in self._ACTIONABLE_PATTERNS:
|
|
157
|
+
if pattern.search(body):
|
|
158
|
+
return (
|
|
159
|
+
CommentClassification.ACTIONABLE,
|
|
160
|
+
Priority.MINOR,
|
|
161
|
+
False,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Check for approval/LGTM patterns (non-actionable)
|
|
165
|
+
for pattern in self._APPROVAL_PATTERNS:
|
|
166
|
+
if pattern.search(body):
|
|
167
|
+
return (
|
|
168
|
+
CommentClassification.NON_ACTIONABLE,
|
|
169
|
+
Priority.UNKNOWN,
|
|
170
|
+
False,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Check for suggestion patterns (ambiguous, needs investigation)
|
|
174
|
+
for pattern in self._SUGGESTION_PATTERNS:
|
|
175
|
+
if pattern.search(body):
|
|
176
|
+
return (
|
|
177
|
+
CommentClassification.AMBIGUOUS,
|
|
178
|
+
Priority.UNKNOWN,
|
|
179
|
+
True,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Default: ambiguous, requires investigation
|
|
183
|
+
# Per design spec: "Never silently skip ambiguous comments"
|
|
184
|
+
return (
|
|
185
|
+
CommentClassification.AMBIGUOUS,
|
|
186
|
+
Priority.UNKNOWN,
|
|
187
|
+
True,
|
|
188
|
+
)
|