github2gerrit 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- github2gerrit/__init__.py +29 -0
- github2gerrit/cli.py +865 -0
- github2gerrit/config.py +311 -0
- github2gerrit/core.py +1750 -0
- github2gerrit/duplicate_detection.py +542 -0
- github2gerrit/github_api.py +331 -0
- github2gerrit/gitutils.py +655 -0
- github2gerrit/models.py +81 -0
- {github2gerrit-0.1.0.dist-info → github2gerrit-0.1.3.dist-info}/METADATA +5 -4
- github2gerrit-0.1.3.dist-info/RECORD +12 -0
- github2gerrit-0.1.0.dist-info/RECORD +0 -4
- {github2gerrit-0.1.0.dist-info → github2gerrit-0.1.3.dist-info}/WHEEL +0 -0
- {github2gerrit-0.1.0.dist-info → github2gerrit-0.1.3.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,542 @@
|
|
1
|
+
# SPDX-FileCopyrightText: 2024 Matthew Watkins
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
"""
|
5
|
+
Duplicate change detection for github2gerrit.
|
6
|
+
|
7
|
+
This module provides functionality to detect potentially duplicate changes
|
8
|
+
before submitting them to Gerrit, helping to prevent spam and redundant
|
9
|
+
submissions from automated tools like Dependabot.
|
10
|
+
"""
|
11
|
+
|
12
|
+
import hashlib
|
13
|
+
import logging
|
14
|
+
import os
|
15
|
+
import re
|
16
|
+
import urllib.parse
|
17
|
+
import urllib.request
|
18
|
+
from datetime import UTC
|
19
|
+
from datetime import datetime
|
20
|
+
from datetime import timedelta
|
21
|
+
from pathlib import Path
|
22
|
+
|
23
|
+
from .github_api import GhPullRequest
|
24
|
+
from .github_api import GhRepository
|
25
|
+
from .github_api import build_client
|
26
|
+
from .github_api import get_repo_from_env
|
27
|
+
from .models import GitHubContext
|
28
|
+
|
29
|
+
|
30
|
+
# Optional Gerrit REST API support
|
31
|
+
try:
|
32
|
+
from pygerrit2 import GerritRestAPI
|
33
|
+
from pygerrit2 import HTTPBasicAuth
|
34
|
+
except ImportError:
|
35
|
+
GerritRestAPI = None
|
36
|
+
HTTPBasicAuth = None
|
37
|
+
|
38
|
+
|
39
|
+
log = logging.getLogger(__name__)
|
40
|
+
|
41
|
+
__all__ = [
|
42
|
+
"ChangeFingerprint",
|
43
|
+
"DuplicateChangeError",
|
44
|
+
"DuplicateDetector",
|
45
|
+
"check_for_duplicates",
|
46
|
+
]
|
47
|
+
|
48
|
+
|
49
|
+
class DuplicateChangeError(Exception):
|
50
|
+
"""Raised when a duplicate change is detected."""
|
51
|
+
|
52
|
+
def __init__(self, message: str, existing_prs: list[int]) -> None:
|
53
|
+
super().__init__(message)
|
54
|
+
self.existing_prs = existing_prs
|
55
|
+
|
56
|
+
|
57
|
+
class ChangeFingerprint:
|
58
|
+
"""Represents a fingerprint of a change for duplicate detection."""
|
59
|
+
|
60
|
+
def __init__(
|
61
|
+
self, title: str, body: str = "", files_changed: list[str] | None = None
|
62
|
+
):
|
63
|
+
self.title = title.strip()
|
64
|
+
self.body = (body or "").strip()
|
65
|
+
self.files_changed = sorted(files_changed or [])
|
66
|
+
self._normalized_title = self._normalize_title(title)
|
67
|
+
self._content_hash = self._compute_content_hash()
|
68
|
+
|
69
|
+
def _normalize_title(self, title: str) -> str:
|
70
|
+
"""Normalize PR title for comparison."""
|
71
|
+
# Remove common prefixes/suffixes
|
72
|
+
normalized = title.strip()
|
73
|
+
|
74
|
+
# Remove conventional commit prefixes like "feat:", "fix:", etc.
|
75
|
+
normalized = re.sub(
|
76
|
+
r"^(feat|fix|docs|style|refactor|test|chore|ci|build|perf)"
|
77
|
+
r"(\(.+?\))?: ",
|
78
|
+
"",
|
79
|
+
normalized,
|
80
|
+
flags=re.IGNORECASE,
|
81
|
+
)
|
82
|
+
|
83
|
+
# Remove markdown formatting
|
84
|
+
normalized = re.sub(r"[*_`]", "", normalized)
|
85
|
+
|
86
|
+
# Remove version number variations for dependency updates
|
87
|
+
# E.g., "from 0.6 to 0.8" -> "from x.y.z to x.y.z"
|
88
|
+
# Handle v-prefixed versions first, then plain versions
|
89
|
+
normalized = re.sub(r"\bv\d+(\.\d+)*(-\w+)?\b", "vx.y.z", normalized)
|
90
|
+
normalized = re.sub(r"\b\d+(\.\d+)+(-\w+)?\b", "x.y.z", normalized)
|
91
|
+
normalized = re.sub(r"\b\d+\.\d+\b", "x.y.z", normalized)
|
92
|
+
|
93
|
+
# Remove specific commit hashes
|
94
|
+
normalized = re.sub(r"\b[a-f0-9]{7,40}\b", "commit_hash", normalized)
|
95
|
+
|
96
|
+
# Normalize whitespace
|
97
|
+
normalized = re.sub(r"\s+", " ", normalized).strip()
|
98
|
+
|
99
|
+
return normalized.lower()
|
100
|
+
|
101
|
+
def _compute_content_hash(self) -> str:
|
102
|
+
"""Compute a hash of the change content."""
|
103
|
+
content = (
|
104
|
+
f"{self._normalized_title}\n{self.body}\n"
|
105
|
+
f"{','.join(self.files_changed)}"
|
106
|
+
)
|
107
|
+
return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
|
108
|
+
|
109
|
+
def is_similar_to(
|
110
|
+
self, other: "ChangeFingerprint", similarity_threshold: float = 0.8
|
111
|
+
) -> bool:
|
112
|
+
"""Check if this fingerprint is similar to another."""
|
113
|
+
# Exact normalized title match
|
114
|
+
if self._normalized_title == other._normalized_title:
|
115
|
+
return True
|
116
|
+
|
117
|
+
# Content hash match
|
118
|
+
if self._content_hash == other._content_hash:
|
119
|
+
return True
|
120
|
+
|
121
|
+
# Check for similar file changes (for dependency updates)
|
122
|
+
if self.files_changed and other.files_changed:
|
123
|
+
common_files = set(self.files_changed) & set(other.files_changed)
|
124
|
+
union_files = set(self.files_changed) | set(other.files_changed)
|
125
|
+
if common_files and union_files:
|
126
|
+
overlap_ratio = len(common_files) / len(union_files)
|
127
|
+
# If files overlap, check title similarity (lower threshold)
|
128
|
+
if overlap_ratio > 0:
|
129
|
+
return self._titles_similar(other, 0.6)
|
130
|
+
|
131
|
+
# Check title similarity even without file changes
|
132
|
+
return self._titles_similar(other, similarity_threshold)
|
133
|
+
|
134
|
+
def _titles_similar(
|
135
|
+
self, other: "ChangeFingerprint", threshold: float
|
136
|
+
) -> bool:
|
137
|
+
"""Check if titles are similar using simple string similarity."""
|
138
|
+
title1 = self._normalized_title
|
139
|
+
title2 = other._normalized_title
|
140
|
+
|
141
|
+
if not title1 or not title2:
|
142
|
+
return False
|
143
|
+
|
144
|
+
# Simple Jaccard similarity on words
|
145
|
+
words1 = set(title1.split())
|
146
|
+
words2 = set(title2.split())
|
147
|
+
|
148
|
+
if not words1 or not words2:
|
149
|
+
return False
|
150
|
+
|
151
|
+
intersection = len(words1 & words2)
|
152
|
+
union = len(words1 | words2)
|
153
|
+
|
154
|
+
return (intersection / union) >= threshold
|
155
|
+
|
156
|
+
def __str__(self) -> str:
|
157
|
+
return (
|
158
|
+
f"ChangeFingerprint(title='{self.title[:50]}...', "
|
159
|
+
f"hash={self._content_hash})"
|
160
|
+
)
|
161
|
+
|
162
|
+
|
163
|
+
class DuplicateDetector:
|
164
|
+
"""Detects duplicate Gerrit changes for GitHub pull requests."""
|
165
|
+
|
166
|
+
def __init__(self, repo: GhRepository, lookback_days: int = 7):
|
167
|
+
self.repo = repo
|
168
|
+
self.lookback_days = lookback_days
|
169
|
+
self._cutoff_date = datetime.now(UTC) - timedelta(days=lookback_days)
|
170
|
+
|
171
|
+
def _match_first_group(self, pattern: str, text: str) -> str:
|
172
|
+
"""Extract first regex group match from text."""
|
173
|
+
match = re.search(pattern, text)
|
174
|
+
return match.group(1) if match else ""
|
175
|
+
|
176
|
+
def _resolve_gerrit_info_from_env_or_gitreview(
|
177
|
+
self, gh: GitHubContext
|
178
|
+
) -> tuple[str, str] | None:
|
179
|
+
"""Resolve Gerrit host and project from environment or .gitreview file.
|
180
|
+
|
181
|
+
Returns:
|
182
|
+
Tuple of (host, project) if found, None otherwise
|
183
|
+
"""
|
184
|
+
# First try environment variables (same as core module)
|
185
|
+
gerrit_host = os.getenv("GERRIT_SERVER", "").strip()
|
186
|
+
gerrit_project = os.getenv("GERRIT_PROJECT", "").strip()
|
187
|
+
|
188
|
+
if gerrit_host and gerrit_project:
|
189
|
+
return (gerrit_host, gerrit_project)
|
190
|
+
|
191
|
+
# Try to read .gitreview file locally first
|
192
|
+
gitreview_path = Path(".gitreview")
|
193
|
+
if gitreview_path.exists():
|
194
|
+
try:
|
195
|
+
text = gitreview_path.read_text(encoding="utf-8")
|
196
|
+
host = self._match_first_group(r"(?m)^host=(.+)$", text)
|
197
|
+
proj = self._match_first_group(r"(?m)^project=(.+)$", text)
|
198
|
+
if host and proj:
|
199
|
+
project = proj.removesuffix(".git")
|
200
|
+
return (host.strip(), project.strip())
|
201
|
+
except Exception as exc:
|
202
|
+
log.debug("Failed to read local .gitreview: %s", exc)
|
203
|
+
|
204
|
+
# Try to fetch .gitreview remotely (simplified version of core logic)
|
205
|
+
try:
|
206
|
+
repo_full = gh.repository.strip() if gh.repository else ""
|
207
|
+
if not repo_full:
|
208
|
+
return None
|
209
|
+
|
210
|
+
# Try a few common branches
|
211
|
+
branches = []
|
212
|
+
if gh.head_ref:
|
213
|
+
branches.append(gh.head_ref)
|
214
|
+
if gh.base_ref:
|
215
|
+
branches.append(gh.base_ref)
|
216
|
+
branches.extend(["master", "main"])
|
217
|
+
|
218
|
+
for branch in branches:
|
219
|
+
if not branch:
|
220
|
+
continue
|
221
|
+
|
222
|
+
url = (
|
223
|
+
f"https://raw.githubusercontent.com/"
|
224
|
+
f"{repo_full}/refs/heads/{branch}/.gitreview"
|
225
|
+
)
|
226
|
+
|
227
|
+
parsed = urllib.parse.urlparse(url)
|
228
|
+
if (
|
229
|
+
parsed.scheme != "https"
|
230
|
+
or parsed.netloc != "raw.githubusercontent.com"
|
231
|
+
):
|
232
|
+
continue
|
233
|
+
|
234
|
+
try:
|
235
|
+
log.debug("Fetching .gitreview from: %s", url)
|
236
|
+
with urllib.request.urlopen(url, timeout=5) as resp: # noqa: S310
|
237
|
+
text_remote = resp.read().decode("utf-8")
|
238
|
+
|
239
|
+
host = self._match_first_group(
|
240
|
+
r"(?m)^host=(.+)$", text_remote
|
241
|
+
)
|
242
|
+
proj = self._match_first_group(
|
243
|
+
r"(?m)^project=(.+)$", text_remote
|
244
|
+
)
|
245
|
+
|
246
|
+
if host and proj:
|
247
|
+
project = proj.removesuffix(".git")
|
248
|
+
return (host.strip(), project.strip())
|
249
|
+
|
250
|
+
except Exception as exc:
|
251
|
+
log.debug(
|
252
|
+
"Failed to fetch .gitreview from %s: %s", url, exc
|
253
|
+
)
|
254
|
+
continue
|
255
|
+
|
256
|
+
except Exception as exc:
|
257
|
+
log.debug("Failed to resolve .gitreview remotely: %s", exc)
|
258
|
+
|
259
|
+
return None
|
260
|
+
|
261
|
+
def _build_gerrit_rest_client(self, gerrit_host: str) -> object | None:
|
262
|
+
"""Build a Gerrit REST API client if pygerrit2 is available."""
|
263
|
+
if GerritRestAPI is None:
|
264
|
+
log.debug(
|
265
|
+
"pygerrit2 not available, skipping Gerrit duplicate check"
|
266
|
+
)
|
267
|
+
return None
|
268
|
+
|
269
|
+
base_path = os.getenv("GERRIT_HTTP_BASE_PATH", "").strip().strip("/")
|
270
|
+
base_url = (
|
271
|
+
f"https://{gerrit_host}/"
|
272
|
+
if not base_path
|
273
|
+
else f"https://{gerrit_host}/{base_path}/"
|
274
|
+
)
|
275
|
+
|
276
|
+
http_user = (
|
277
|
+
os.getenv("GERRIT_HTTP_USER", "").strip()
|
278
|
+
or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
|
279
|
+
)
|
280
|
+
http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
|
281
|
+
|
282
|
+
try:
|
283
|
+
if http_user and http_pass:
|
284
|
+
if HTTPBasicAuth is None:
|
285
|
+
log.debug("pygerrit2 HTTPBasicAuth not available")
|
286
|
+
return None
|
287
|
+
# Type ignore needed for dynamic import returning Any
|
288
|
+
return GerritRestAPI( # type: ignore[no-any-return]
|
289
|
+
url=base_url, auth=HTTPBasicAuth(http_user, http_pass)
|
290
|
+
)
|
291
|
+
else:
|
292
|
+
# Type ignore needed for dynamic import returning Any
|
293
|
+
return GerritRestAPI(url=base_url) # type: ignore[no-any-return]
|
294
|
+
except Exception as exc:
|
295
|
+
log.debug("Failed to create Gerrit REST client: %s", exc)
|
296
|
+
return None
|
297
|
+
|
298
|
+
def _build_gerrit_rest_client_with_r_path(
|
299
|
+
self, gerrit_host: str
|
300
|
+
) -> object | None:
|
301
|
+
"""Build a Gerrit REST API client with /r/ base path for fallback."""
|
302
|
+
if GerritRestAPI is None:
|
303
|
+
return None
|
304
|
+
|
305
|
+
fallback_url = f"https://{gerrit_host}/r/"
|
306
|
+
http_user = (
|
307
|
+
os.getenv("GERRIT_HTTP_USER", "").strip()
|
308
|
+
or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
|
309
|
+
)
|
310
|
+
http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
|
311
|
+
|
312
|
+
try:
|
313
|
+
if http_user and http_pass:
|
314
|
+
if HTTPBasicAuth is None:
|
315
|
+
return None
|
316
|
+
# Type ignore needed for dynamic import returning Any
|
317
|
+
return GerritRestAPI( # type: ignore[no-any-return]
|
318
|
+
url=fallback_url, auth=HTTPBasicAuth(http_user, http_pass)
|
319
|
+
)
|
320
|
+
else:
|
321
|
+
# Type ignore needed for dynamic import returning Any
|
322
|
+
return GerritRestAPI(url=fallback_url) # type: ignore[no-any-return]
|
323
|
+
except Exception as exc:
|
324
|
+
log.debug(
|
325
|
+
"Failed to create Gerrit REST client with /r/ path: %s", exc
|
326
|
+
)
|
327
|
+
return None
|
328
|
+
|
329
|
+
def check_gerrit_for_existing_change(self, gh: GitHubContext) -> bool:
|
330
|
+
"""Check if a Gerrit change already exists for the given GitHub PR.
|
331
|
+
|
332
|
+
Args:
|
333
|
+
gh: GitHub context containing PR and repository information
|
334
|
+
|
335
|
+
Returns:
|
336
|
+
True if a Gerrit change already exists for this PR, False otherwise
|
337
|
+
"""
|
338
|
+
if not gh.pr_number:
|
339
|
+
return False
|
340
|
+
|
341
|
+
# Resolve Gerrit host and project
|
342
|
+
gerrit_info = self._resolve_gerrit_info_from_env_or_gitreview(gh)
|
343
|
+
if not gerrit_info:
|
344
|
+
log.debug(
|
345
|
+
"Cannot resolve Gerrit host/project, "
|
346
|
+
"skipping Gerrit duplicate check"
|
347
|
+
)
|
348
|
+
return False
|
349
|
+
|
350
|
+
gerrit_host, gerrit_project = gerrit_info
|
351
|
+
|
352
|
+
rest = self._build_gerrit_rest_client(gerrit_host)
|
353
|
+
if rest is None:
|
354
|
+
log.debug(
|
355
|
+
"Cannot check Gerrit for duplicates, REST client unavailable"
|
356
|
+
)
|
357
|
+
return False
|
358
|
+
|
359
|
+
# Generate the GitHub change hash for this PR
|
360
|
+
github_hash = DuplicateDetector._generate_github_change_hash(gh)
|
361
|
+
|
362
|
+
try:
|
363
|
+
# Search for changes that contain the GitHub hash in commit messages
|
364
|
+
# This is more reliable than comment-based searches
|
365
|
+
query = (
|
366
|
+
f'project:{gerrit_project} message:"GitHub-Hash: {github_hash}"'
|
367
|
+
)
|
368
|
+
path = f"/changes/?q={query}&n=10"
|
369
|
+
|
370
|
+
log.debug(
|
371
|
+
"Searching Gerrit for existing changes with GitHub hash %s, "
|
372
|
+
"query: %s",
|
373
|
+
github_hash,
|
374
|
+
query,
|
375
|
+
)
|
376
|
+
# Use getattr for dynamic method access to avoid type checking
|
377
|
+
changes = rest.get(path) # type: ignore[attr-defined]
|
378
|
+
|
379
|
+
if changes:
|
380
|
+
log.info(
|
381
|
+
"Found %d existing Gerrit change(s) for GitHub PR #%d: %s",
|
382
|
+
len(changes),
|
383
|
+
gh.pr_number,
|
384
|
+
[f"{c.get('_number', '?')}" for c in changes],
|
385
|
+
)
|
386
|
+
return True
|
387
|
+
else:
|
388
|
+
log.debug(
|
389
|
+
"No existing Gerrit changes found for GitHub PR #%d",
|
390
|
+
gh.pr_number,
|
391
|
+
)
|
392
|
+
return False
|
393
|
+
|
394
|
+
except Exception as exc:
|
395
|
+
# Check if this is a 404 error and try /r/ fallback
|
396
|
+
status = getattr(
|
397
|
+
getattr(exc, "response", None), "status_code", None
|
398
|
+
)
|
399
|
+
if status == 404:
|
400
|
+
try:
|
401
|
+
log.debug("Trying /r/ fallback for Gerrit API")
|
402
|
+
fallback_rest = self._build_gerrit_rest_client_with_r_path(
|
403
|
+
gerrit_host
|
404
|
+
)
|
405
|
+
if fallback_rest:
|
406
|
+
changes = fallback_rest.get(path) # type: ignore[attr-defined]
|
407
|
+
if changes:
|
408
|
+
log.info(
|
409
|
+
"Found %d existing Gerrit change(s) for PR #%d "
|
410
|
+
"via /r/ fallback: %s",
|
411
|
+
len(changes),
|
412
|
+
gh.pr_number,
|
413
|
+
[f"{c.get('_number', '?')}" for c in changes],
|
414
|
+
)
|
415
|
+
return True
|
416
|
+
else:
|
417
|
+
log.debug(
|
418
|
+
"No existing Gerrit changes found for PR #%d "
|
419
|
+
"via /r/ fallback",
|
420
|
+
gh.pr_number,
|
421
|
+
)
|
422
|
+
return False
|
423
|
+
except Exception as exc2:
|
424
|
+
log.warning(
|
425
|
+
"Failed to query Gerrit via /r/ fallback: %s", exc2
|
426
|
+
)
|
427
|
+
return False
|
428
|
+
|
429
|
+
log.warning("Failed to query Gerrit for existing changes: %s", exc)
|
430
|
+
# If we can't check Gerrit, err on the side of caution
|
431
|
+
return False
|
432
|
+
|
433
|
+
@staticmethod
|
434
|
+
def _generate_github_change_hash(gh: GitHubContext) -> str:
|
435
|
+
"""Generate a deterministic hash for a GitHub PR to identify duplicates.
|
436
|
+
|
437
|
+
This creates a SHA256 hash based on stable PR metadata that uniquely
|
438
|
+
identifies the change content, making duplicate detection reliable
|
439
|
+
regardless of comment formatting or API issues.
|
440
|
+
|
441
|
+
Args:
|
442
|
+
gh: GitHub context containing PR information
|
443
|
+
|
444
|
+
Returns:
|
445
|
+
Hex-encoded SHA256 hash string (first 16 characters for readability)
|
446
|
+
"""
|
447
|
+
import hashlib
|
448
|
+
|
449
|
+
# Build hash input from stable, unique PR identifiers
|
450
|
+
# Use server_url + repository + pr_number for global uniqueness
|
451
|
+
hash_input = f"{gh.server_url}/{gh.repository}/pull/{gh.pr_number}"
|
452
|
+
|
453
|
+
# Create SHA256 hash and take first 16 characters for readability
|
454
|
+
hash_bytes = hashlib.sha256(hash_input.encode("utf-8")).digest()
|
455
|
+
hash_hex = hash_bytes.hex()[:16]
|
456
|
+
|
457
|
+
log.debug(
|
458
|
+
"Generated GitHub change hash for %s: %s", hash_input, hash_hex
|
459
|
+
)
|
460
|
+
return hash_hex
|
461
|
+
|
462
|
+
def check_for_duplicates(
|
463
|
+
self,
|
464
|
+
target_pr: GhPullRequest,
|
465
|
+
allow_duplicates: bool = False,
|
466
|
+
gh: GitHubContext | None = None,
|
467
|
+
) -> None:
|
468
|
+
"""Check if the target PR is a duplicate in Gerrit.
|
469
|
+
|
470
|
+
Args:
|
471
|
+
target_pr: The PR to check for duplicates
|
472
|
+
allow_duplicates: If True, only log warnings; if False, raise error
|
473
|
+
gh: GitHub context for Gerrit duplicate checking
|
474
|
+
|
475
|
+
Raises:
|
476
|
+
DuplicateChangeError: If duplicates found and allow_duplicates=False
|
477
|
+
"""
|
478
|
+
pr_number = getattr(target_pr, "number", 0)
|
479
|
+
|
480
|
+
log.debug("Checking PR #%d for Gerrit duplicates", pr_number)
|
481
|
+
|
482
|
+
# Check if this PR already has a corresponding Gerrit change
|
483
|
+
if gh and self.check_gerrit_for_existing_change(gh):
|
484
|
+
full_message = (
|
485
|
+
f"PR #{pr_number} already has an existing Gerrit change. "
|
486
|
+
f"Skipping duplicate submission. "
|
487
|
+
f"Target PR title: '{getattr(target_pr, 'title', '')[:100]}'"
|
488
|
+
)
|
489
|
+
|
490
|
+
if allow_duplicates:
|
491
|
+
log.warning(
|
492
|
+
"GERRIT DUPLICATE DETECTED (allowed): %s", full_message
|
493
|
+
)
|
494
|
+
return
|
495
|
+
else:
|
496
|
+
raise DuplicateChangeError(full_message, [])
|
497
|
+
|
498
|
+
log.debug("No existing Gerrit change found for PR #%d", pr_number)
|
499
|
+
|
500
|
+
|
501
|
+
def check_for_duplicates(
|
502
|
+
gh: GitHubContext,
|
503
|
+
allow_duplicates: bool = False,
|
504
|
+
lookback_days: int = 7,
|
505
|
+
) -> None:
|
506
|
+
"""Convenience function to check for duplicates.
|
507
|
+
|
508
|
+
Args:
|
509
|
+
gh: GitHub context containing PR information
|
510
|
+
allow_duplicates: If True, only log warnings; if False, raise exception
|
511
|
+
lookback_days: Number of days to look back for similar PRs
|
512
|
+
|
513
|
+
Raises:
|
514
|
+
DuplicateChangeError: If duplicates found and allow_duplicates=False
|
515
|
+
"""
|
516
|
+
if not gh.pr_number:
|
517
|
+
log.debug("No PR number provided, skipping duplicate check")
|
518
|
+
return
|
519
|
+
|
520
|
+
try:
|
521
|
+
client = build_client()
|
522
|
+
repo = get_repo_from_env(client)
|
523
|
+
|
524
|
+
# Get the target PR
|
525
|
+
target_pr = repo.get_pull(gh.pr_number)
|
526
|
+
|
527
|
+
# Create detector and check
|
528
|
+
detector = DuplicateDetector(repo, lookback_days=lookback_days)
|
529
|
+
detector.check_for_duplicates(
|
530
|
+
target_pr, allow_duplicates=allow_duplicates, gh=gh
|
531
|
+
)
|
532
|
+
|
533
|
+
log.info("Duplicate check completed for PR #%d", gh.pr_number)
|
534
|
+
|
535
|
+
except DuplicateChangeError:
|
536
|
+
# Re-raise duplicate errors
|
537
|
+
raise
|
538
|
+
except Exception as exc:
|
539
|
+
log.warning(
|
540
|
+
"Duplicate detection failed for PR #%d: %s", gh.pr_number, exc
|
541
|
+
)
|
542
|
+
# Don't fail the entire process if duplicate detection has issues
|