github2gerrit 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,6 @@ submissions from automated tools like Dependabot.
10
10
  """
11
11
 
12
12
  import hashlib
13
- import json
14
13
  import logging
15
14
  import os
16
15
  import re
@@ -21,7 +20,7 @@ from collections.abc import Iterable
21
20
  from datetime import UTC
22
21
  from datetime import datetime
23
22
  from datetime import timedelta
24
- from pathlib import Path
23
+ from typing import Any
25
24
 
26
25
  from .gerrit_urls import create_gerrit_url_builder
27
26
  from .github_api import GhPullRequest
@@ -29,6 +28,7 @@ from .github_api import GhRepository
29
28
  from .github_api import build_client
30
29
  from .github_api import get_repo_from_env
31
30
  from .models import GitHubContext
31
+ from .trailers import extract_github_metadata
32
32
 
33
33
 
34
34
  # Optional Gerrit REST API support
@@ -53,15 +53,23 @@ __all__ = [
53
53
  class DuplicateChangeError(Exception):
54
54
  """Raised when a duplicate change is detected."""
55
55
 
56
- def __init__(self, message: str, existing_prs: list[int]) -> None:
56
+ def __init__(
57
+ self,
58
+ message: str,
59
+ existing_prs: list[int],
60
+ urls: list[str] | None = None,
61
+ ) -> None:
57
62
  super().__init__(message)
58
63
  self.existing_prs = existing_prs
64
+ self.urls = urls or []
59
65
 
60
66
 
61
67
  class ChangeFingerprint:
62
68
  """Represents a fingerprint of a change for duplicate detection."""
63
69
 
64
- def __init__(self, title: str, body: str = "", files_changed: list[str] | None = None):
70
+ def __init__(
71
+ self, title: str, body: str = "", files_changed: list[str] | None = None
72
+ ):
65
73
  self.title = title.strip()
66
74
  self.body = (body or "").strip()
67
75
  self.files_changed = sorted(files_changed or [])
@@ -102,10 +110,15 @@ class ChangeFingerprint:
102
110
 
103
111
  def _compute_content_hash(self) -> str:
104
112
  """Compute a hash of the change content."""
105
- content = f"{self._normalized_title}\n{self.body}\n{','.join(self.files_changed)}"
113
+ content = (
114
+ f"{self._normalized_title}\n{self.body}\n"
115
+ f"{','.join(self.files_changed)}"
116
+ )
106
117
  return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
107
118
 
108
- def is_similar_to(self, other: "ChangeFingerprint", similarity_threshold: float = 0.8) -> bool:
119
+ def is_similar_to(
120
+ self, other: "ChangeFingerprint", similarity_threshold: float = 0.8
121
+ ) -> bool:
109
122
  """Check if this fingerprint is similar to another."""
110
123
  # Exact normalized title match
111
124
  if self._normalized_title == other._normalized_title:
@@ -128,7 +141,9 @@ class ChangeFingerprint:
128
141
  # Check title similarity even without file changes
129
142
  return self._titles_similar(other, similarity_threshold)
130
143
 
131
- def _titles_similar(self, other: "ChangeFingerprint", threshold: float) -> bool:
144
+ def _titles_similar(
145
+ self, other: "ChangeFingerprint", threshold: float
146
+ ) -> bool:
132
147
  """Check if titles are similar using simple string similarity."""
133
148
  title1 = self._normalized_title
134
149
  title2 = other._normalized_title
@@ -149,7 +164,10 @@ class ChangeFingerprint:
149
164
  return (intersection / union) >= threshold
150
165
 
151
166
  def __str__(self) -> str:
152
- return f"ChangeFingerprint(title='{self.title[:50]}...', hash={self._content_hash})"
167
+ return (
168
+ f"ChangeFingerprint(title='{self.title[:50]}...', "
169
+ f"hash={self._content_hash})"
170
+ )
153
171
 
154
172
 
155
173
  class DuplicateDetector:
@@ -171,7 +189,9 @@ class DuplicateDetector:
171
189
  match = re.search(pattern, text)
172
190
  return match.group(1) if match else ""
173
191
 
174
- def _resolve_gerrit_info_from_env_or_gitreview(self, gh: GitHubContext) -> tuple[str, str] | None:
192
+ def _resolve_gerrit_info_from_env_or_gitreview(
193
+ self, gh: GitHubContext
194
+ ) -> tuple[str, str] | None:
175
195
  """Resolve Gerrit host and project from environment or .gitreview file.
176
196
 
177
197
  Returns:
@@ -184,20 +204,11 @@ class DuplicateDetector:
184
204
  if gerrit_host and gerrit_project:
185
205
  return (gerrit_host, gerrit_project)
186
206
 
187
- # Try to read .gitreview file locally first
188
- gitreview_path = Path(".gitreview")
189
- if gitreview_path.exists():
190
- try:
191
- text = gitreview_path.read_text(encoding="utf-8")
192
- host = self._match_first_group(r"(?m)^host=(.+)$", text)
193
- proj = self._match_first_group(r"(?m)^project=(.+)$", text)
194
- if host and proj:
195
- project = proj.removesuffix(".git")
196
- return (host.strip(), project.strip())
197
- if host and not proj:
198
- return (host.strip(), "")
199
- except Exception as exc:
200
- log.debug("Failed to read local .gitreview: %s", exc)
207
+ # Skip local .gitreview check in composite action context
208
+ # The duplicate detection runs before workspace setup, so there's no
209
+ # reliable local .gitreview file to check. Instead, rely on environment
210
+ # variables or remote fetching.
211
+ log.debug("Skipping local .gitreview check (composite action context)")
201
212
 
202
213
  # Try to fetch .gitreview remotely (simplified version of core logic)
203
214
  try:
@@ -220,7 +231,10 @@ class DuplicateDetector:
220
231
  url = f"https://raw.githubusercontent.com/{repo_full}/{branch}/.gitreview"
221
232
 
222
233
  parsed = urllib.parse.urlparse(url)
223
- if parsed.scheme != "https" or parsed.netloc != "raw.githubusercontent.com":
234
+ if (
235
+ parsed.scheme != "https"
236
+ or parsed.netloc != "raw.githubusercontent.com"
237
+ ):
224
238
  continue
225
239
 
226
240
  try:
@@ -228,8 +242,12 @@ class DuplicateDetector:
228
242
  with urllib.request.urlopen(url, timeout=5) as resp:
229
243
  text_remote = resp.read().decode("utf-8")
230
244
 
231
- host = self._match_first_group(r"(?m)^host=(.+)$", text_remote)
232
- proj = self._match_first_group(r"(?m)^project=(.+)$", text_remote)
245
+ host = self._match_first_group(
246
+ r"(?m)^host=(.+)$", text_remote
247
+ )
248
+ proj = self._match_first_group(
249
+ r"(?m)^project=(.+)$", text_remote
250
+ )
233
251
 
234
252
  if host and proj:
235
253
  project = proj.removesuffix(".git")
@@ -238,7 +256,9 @@ class DuplicateDetector:
238
256
  return (host.strip(), "")
239
257
 
240
258
  except Exception as exc:
241
- log.debug("Failed to fetch .gitreview from %s: %s", url, exc)
259
+ log.debug(
260
+ "Failed to fetch .gitreview from %s: %s", url, exc
261
+ )
242
262
  continue
243
263
 
244
264
  except Exception as exc:
@@ -246,31 +266,24 @@ class DuplicateDetector:
246
266
 
247
267
  return None
248
268
 
249
- def _build_gerrit_rest_client(self, gerrit_host: str) -> object | None:
250
- """Build a Gerrit REST API client if pygerrit2 is available."""
251
- if GerritRestAPI is None:
252
- log.debug("pygerrit2 not available, skipping Gerrit duplicate check")
253
- return None
254
-
255
- # Create centralized URL builder
256
- url_builder = create_gerrit_url_builder(gerrit_host)
257
- base_url = url_builder.api_url()
269
+ def _build_gerrit_rest_client(self, gerrit_host: str) -> Any | None:
270
+ """Build a Gerrit REST API client using centralized framework."""
271
+ from .gerrit_rest import build_client_for_host
258
272
 
259
- http_user = os.getenv("GERRIT_HTTP_USER", "").strip() or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
273
+ http_user = (
274
+ os.getenv("GERRIT_HTTP_USER", "").strip()
275
+ or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
276
+ )
260
277
  http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
261
278
 
262
279
  try:
263
- if http_user and http_pass:
264
- if HTTPBasicAuth is None:
265
- log.debug("pygerrit2 HTTPBasicAuth not available")
266
- return None
267
- # Type ignore needed for dynamic import returning Any
268
- return GerritRestAPI( # type: ignore[no-any-return]
269
- url=base_url, auth=HTTPBasicAuth(http_user, http_pass)
270
- )
271
- else:
272
- # Type ignore needed for dynamic import returning Any
273
- return GerritRestAPI(url=base_url) # type: ignore[no-any-return]
280
+ return build_client_for_host(
281
+ gerrit_host,
282
+ timeout=8.0,
283
+ max_attempts=3,
284
+ http_user=http_user or None,
285
+ http_password=http_pass or None,
286
+ )
274
287
  except Exception as exc:
275
288
  log.debug("Failed to create Gerrit REST client: %s", exc)
276
289
  return None
@@ -299,7 +312,9 @@ class DuplicateDetector:
299
312
  hash_bytes = hashlib.sha256(hash_input.encode("utf-8")).digest()
300
313
  hash_hex = hash_bytes.hex()[:16]
301
314
 
302
- log.debug("Generated GitHub change hash for %s: %s", hash_input, hash_hex)
315
+ log.debug(
316
+ "Generated GitHub change hash for %s: %s", hash_input, hash_hex
317
+ )
303
318
  return hash_hex
304
319
 
305
320
  def check_for_duplicates(
@@ -307,25 +322,41 @@ class DuplicateDetector:
307
322
  target_pr: GhPullRequest,
308
323
  allow_duplicates: bool = False,
309
324
  gh: GitHubContext | None = None,
325
+ expected_github_hash: str | None = None,
310
326
  ) -> None:
311
- """Check if the target PR is a duplicate via subject equality against Gerrit.
327
+ """Check if the target PR is a duplicate via trailer-aware and subject
328
+ equality against Gerrit.
312
329
 
313
- Implements a robust, dependency-free subject-first duplicate check:
330
+ Implements a robust, dependency-free duplicate check with trailer
331
+ awareness:
332
+ - First check for existing changes with matching GitHub-Hash trailer
333
+ (short-circuit)
314
334
  - Resolve Gerrit host/project from env or .gitreview
315
- - Query Gerrit changes updated within the lookback window (excluding abandoned)
335
+ - Query Gerrit changes updated within the lookback window (excluding
336
+ abandoned)
316
337
  - Compare normalized subjects (first line) for exact equality
317
338
  - If any match, treat as duplicate and either warn or raise
339
+
340
+ Args:
341
+ target_pr: The GitHub PR to check for duplicates
342
+ allow_duplicates: If True, log warnings instead of raising errors
343
+ gh: GitHub context for resolving Gerrit configuration
344
+ expected_github_hash: The GitHub-Hash trailer value expected for
345
+ this PR
318
346
  """
319
347
  pr_number = getattr(target_pr, "number", 0)
320
348
  pr_title = (getattr(target_pr, "title", "") or "").strip()
321
349
 
322
350
  log.debug(
323
- "Checking PR #%d for duplicates via subject equality against Gerrit",
351
+ "Checking PR #%d for duplicates via subject equality against "
352
+ "Gerrit",
324
353
  pr_number,
325
354
  )
326
355
 
327
356
  if not pr_title:
328
- log.debug("PR #%d has empty title; skipping duplicate check", pr_number)
357
+ log.debug(
358
+ "PR #%d has empty title; skipping duplicate check", pr_number
359
+ )
329
360
  return
330
361
  if gh is None:
331
362
  log.debug("No GitHub context provided; skipping duplicate check")
@@ -334,7 +365,10 @@ class DuplicateDetector:
334
365
  # Resolve Gerrit target (host/project)
335
366
  gerrit_info = self._resolve_gerrit_info_from_env_or_gitreview(gh)
336
367
  if not gerrit_info:
337
- log.debug("Unable to resolve Gerrit host/project; skipping duplicate check")
368
+ log.debug(
369
+ "Unable to resolve Gerrit host/project; skipping duplicate "
370
+ "check"
371
+ )
338
372
  return
339
373
  gerrit_host, gerrit_project = gerrit_info
340
374
 
@@ -342,16 +376,21 @@ class DuplicateDetector:
342
376
  def _normalize_subject(title: str) -> str:
343
377
  normalized = title.strip()
344
378
  normalized = re.sub(
345
- r"^(feat|fix|docs|style|refactor|test|chore|ci|build|perf)(\(.+?\))?: ",
379
+ r"^(feat|fix|docs|style|refactor|test|chore|ci|build|perf)"
380
+ r"(\(.+?\))?: ",
346
381
  "",
347
382
  normalized,
348
383
  flags=re.IGNORECASE,
349
384
  )
350
385
  normalized = re.sub(r"[*_`]", "", normalized)
351
- normalized = re.sub(r"\bv\d+(\.\d+)*(-\w+)?\b", "vx.y.z", normalized)
386
+ normalized = re.sub(
387
+ r"\bv\d+(\.\d+)*(-\w+)?\b", "vx.y.z", normalized
388
+ )
352
389
  normalized = re.sub(r"\b\d+(\.\d+)+(-\w+)?\b", "x.y.z", normalized)
353
390
  normalized = re.sub(r"\b\d+\.\d+\b", "x.y.z", normalized)
354
- normalized = re.sub(r"\b[a-f0-9]{7,40}\b", "commit_hash", normalized)
391
+ normalized = re.sub(
392
+ r"\b[a-f0-9]{7,40}\b", "commit_hash", normalized
393
+ )
355
394
  normalized = re.sub(r"\s+", " ", normalized).strip()
356
395
  return normalized.lower()
357
396
 
@@ -363,7 +402,6 @@ class DuplicateDetector:
363
402
 
364
403
  # Build Gerrit REST URL using centralized URL builder
365
404
  url_builder = create_gerrit_url_builder(gerrit_host)
366
- api_base = url_builder.api_url().rstrip("/")
367
405
 
368
406
  # Track which base path actually works for constructing display URLs
369
407
  successful_base_path = url_builder.base_path
@@ -393,64 +431,99 @@ class DuplicateDetector:
393
431
  query = " ".join(q_parts)
394
432
  encoded_q = urllib.parse.quote(query, safe="")
395
433
 
396
- # Request current commit metadata so we get 'subject'
397
- # Use a modest page size
398
- url = f"{api_base}/changes/?q={encoded_q}&n=50&o=CURRENT_COMMIT&o=CURRENT_FILES"
399
-
400
- def _load_gerrit_json(url_: str) -> list[dict[str, object]]:
434
+ def _load_gerrit_json(query_path: str) -> list[dict[str, object]]:
401
435
  try:
402
- log.debug("Querying Gerrit for duplicates: %s", url_)
403
- # Ensure we only fetch over HTTPS to avoid unsafe schemes
404
- parsed = urllib.parse.urlparse(url_)
405
- if parsed.scheme != "https":
406
- log.debug("Skipping non-HTTPS URL for Gerrit query: %s", url_)
436
+ # Use centralized client that handles base path and auth
437
+ client = self._build_gerrit_rest_client(gerrit_host)
438
+ if client is None:
439
+ log.debug(
440
+ "Gerrit client not available; skipping duplicate check"
441
+ )
407
442
  return []
408
- headers: dict[str, str] = {}
409
- http_user = os.getenv("GERRIT_HTTP_USER", "").strip()
410
- http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
411
- if http_user and http_pass:
412
- import base64 as _b64 # localized import to avoid global import edit
413
-
414
- basic = _b64.b64encode(f"{http_user}:{http_pass}".encode()).decode("ascii")
415
- headers["Authorization"] = f"Basic {basic}"
416
- req = urllib.request.Request(url_, headers=headers)
417
- with urllib.request.urlopen(req, timeout=8) as resp:
418
- raw = resp.read().decode("utf-8", errors="replace")
419
- # Strip Gerrit's XSSI prefix if present
420
- if raw.startswith(")]}'"):
421
- raw = raw.split("\n", 1)[1] if "\n" in raw else ""
422
- data = json.loads(raw or "[]")
443
+
444
+ log.debug("Querying Gerrit for duplicates: %s", query_path)
445
+ data = client.get(query_path)
423
446
  if isinstance(data, list):
424
447
  return data
425
448
  else:
426
449
  return []
427
- except urllib.error.HTTPError as exc:
428
- log.debug("Gerrit query failed for %s: %s", url_, exc)
429
- return []
430
450
  except Exception as exc:
431
- log.debug("Gerrit query failed for %s: %s", url_, exc)
451
+ log.debug("Gerrit query failed for %s: %s", query_path, exc)
432
452
  return []
433
453
 
454
+ # Build query path for centralized client
455
+ query_path = (
456
+ f"/changes/?q={encoded_q}&n=50&o=CURRENT_COMMIT&o=CURRENT_FILES"
457
+ )
458
+
434
459
  log.debug(
435
- "Gerrit duplicate query: host=%s project=%s filter=%s cutoff=%s url=%s",
460
+ "Gerrit duplicate query: host=%s project=%s filter=%s cutoff=%s "
461
+ "path=%s",
436
462
  gerrit_host,
437
463
  gerrit_project or "(any)",
438
464
  dup_filter,
439
465
  cutoff_date,
440
- url,
466
+ query_path,
441
467
  )
442
- changes = _load_gerrit_json(url)
468
+ changes = _load_gerrit_json(query_path)
443
469
  log.debug(
444
- "Gerrit query returned %d change(s) for project=%s filter=%s after=%s",
470
+ "Gerrit query returned %d change(s) for project=%s filter=%s "
471
+ "after=%s",
445
472
  len(changes),
446
473
  gerrit_project or "(any)",
447
474
  dup_filter,
448
475
  cutoff_date,
449
476
  )
450
477
  if changes:
451
- sample_subjects = ", ".join(str(c.get("subject") or "")[:60] for c in changes[:5])
478
+ sample_subjects = ", ".join(
479
+ str(c.get("subject") or "")[:60] for c in changes[:5]
480
+ )
452
481
  log.debug("Sample subjects: %s", sample_subjects)
453
482
 
483
+ # First pass: Check for trailer-based matches (GitHub-Hash)
484
+ if expected_github_hash:
485
+ log.debug(
486
+ "Checking for GitHub-Hash trailer matches: %s",
487
+ expected_github_hash,
488
+ )
489
+ trailer_matches: list[tuple[int, str]] = []
490
+
491
+ for c in changes:
492
+ # Extract commit message and check for GitHub trailers
493
+ rev = str(c.get("current_revision") or "")
494
+ revs_obj = c.get("revisions")
495
+ revs = revs_obj if isinstance(revs_obj, dict) else {}
496
+ cur_obj = revs.get(rev)
497
+ cur = cur_obj if isinstance(cur_obj, dict) else {}
498
+ commit = cur.get("commit") or {}
499
+ msg = str(commit.get("message") or "")
500
+
501
+ if msg:
502
+ github_metadata = extract_github_metadata(msg)
503
+ change_github_hash = github_metadata.get("GitHub-Hash", "")
504
+
505
+ if change_github_hash == expected_github_hash:
506
+ num = c.get("_number")
507
+ proj = str(c.get("project") or gerrit_project or "")
508
+ if isinstance(num, int):
509
+ trailer_matches.append((num, proj))
510
+ log.debug(
511
+ "Found GitHub-Hash trailer match: change %d, "
512
+ "hash %s",
513
+ num,
514
+ change_github_hash,
515
+ )
516
+
517
+ if trailer_matches:
518
+ log.debug(
519
+ "Found %d change(s) with matching GitHub-Hash trailer - "
520
+ "treating as update targets",
521
+ len(trailer_matches),
522
+ )
523
+ # These are update targets, not duplicates - allow them to
524
+ # proceed
525
+ return
526
+
454
527
  # Compare normalized subjects for exact equality
455
528
  matched: list[tuple[int, str]] = []
456
529
  for c in changes:
@@ -464,8 +537,11 @@ class DuplicateDetector:
464
537
  matched.append((num, proj))
465
538
 
466
539
  if not matched:
467
- # No exact subject match; proceed with similarity scoring across candidates
468
- log.debug("No exact-subject matches found; entering similarity scoring")
540
+ # No exact subject match; proceed with similarity scoring across
541
+ # candidates
542
+ log.debug(
543
+ "No exact-subject matches found; entering similarity scoring"
544
+ )
469
545
  from .similarity import ScoringConfig
470
546
  from .similarity import aggregate_scores
471
547
  from .similarity import remove_commit_trailers
@@ -488,7 +564,8 @@ class DuplicateDetector:
488
564
  if fname:
489
565
  src_files.append(str(fname))
490
566
  except Exception as exc:
491
- # Best-effort; if files cannot be retrieved, proceed without them
567
+ # Best-effort; if files cannot be retrieved, proceed without
568
+ # them
492
569
  log.debug("Failed to retrieve PR files for scoring: %s", exc)
493
570
 
494
571
  best_score = 0.0
@@ -499,7 +576,8 @@ class DuplicateDetector:
499
576
  subj = str(c.get("subject") or "").strip()
500
577
  if not subj:
501
578
  continue
502
- # Extract commit message and files from revisions (CURRENT_COMMIT, CURRENT_FILES)
579
+ # Extract commit message and files from revisions
580
+ # (CURRENT_COMMIT, CURRENT_FILES)
503
581
  rev = str(c.get("current_revision") or "")
504
582
  revs_obj = c.get("revisions")
505
583
  revs = revs_obj if isinstance(revs_obj, dict) else {}
@@ -512,7 +590,11 @@ class DuplicateDetector:
512
590
  cand_body_raw = msg.split("\n", 1)[1]
513
591
  cand_body = remove_commit_trailers(cand_body_raw)
514
592
  files_dict = cur.get("files") or {}
515
- cand_files = [p for p in files_dict if isinstance(p, str) and not p.startswith("/")]
593
+ cand_files = [
594
+ p
595
+ for p in files_dict
596
+ if isinstance(p, str) and not p.startswith("/")
597
+ ]
516
598
 
517
599
  # Compute component scores
518
600
  s_res = score_subjects(src_subjects, subj)
@@ -524,7 +606,9 @@ class DuplicateDetector:
524
606
  b_res = score_bodies(src_body, cand_body)
525
607
 
526
608
  # Aggregate
527
- agg = aggregate_scores(s_res.score, f_res.score, b_res.score, config=config)
609
+ agg = aggregate_scores(
610
+ s_res.score, f_res.score, b_res.score, config=config
611
+ )
528
612
  log.debug(
529
613
  "Aggregate score computed: %.2f (s=%.2f f=%.2f b=%.2f)",
530
614
  agg,
@@ -533,20 +617,24 @@ class DuplicateDetector:
533
617
  b_res.score,
534
618
  )
535
619
 
536
- # Build candidate reference and number using successful base path
620
+ # Build candidate reference and number using successful base
621
+ # path
537
622
  num_obj = c.get("_number")
538
623
  num = int(num_obj) if isinstance(num_obj, int) else None
539
624
  proj = str(c.get("project") or gerrit_project or "")
540
625
 
541
626
  # Use the base path that actually worked for API calls
542
- display_url_builder = create_gerrit_url_builder(gerrit_host, successful_base_path)
627
+ display_url_builder = create_gerrit_url_builder(
628
+ gerrit_host, successful_base_path
629
+ )
543
630
  ref = (
544
631
  display_url_builder.change_url(proj, num)
545
632
  if proj and isinstance(num, int)
546
633
  else (f"change {num}" if isinstance(num, int) else "")
547
634
  )
548
635
  log.debug(
549
- "Scoring candidate: ref=%s agg=%.2f (s=%.2f f=%.2f b=%.2f) subj='%s'",
636
+ "Scoring candidate: ref=%s agg=%.2f (s=%.2f f=%.2f b=%.2f) "
637
+ "subj='%s'",
550
638
  ref or "(none)",
551
639
  agg,
552
640
  s_res.score,
@@ -559,7 +647,11 @@ class DuplicateDetector:
559
647
  if agg > best_score:
560
648
  best_score = agg
561
649
  # Deduplicate reasons preserving order
562
- best_reasons = list(dict.fromkeys(s_res.reasons + f_res.reasons + b_res.reasons))
650
+ best_reasons = list(
651
+ dict.fromkeys(
652
+ s_res.reasons + f_res.reasons + b_res.reasons
653
+ )
654
+ )
563
655
 
564
656
  # Collect all candidates above threshold
565
657
  if agg >= config.similarity_threshold and ref:
@@ -579,7 +671,10 @@ class DuplicateDetector:
579
671
  for s, u, _ in hits_sorted:
580
672
  if u:
581
673
  log.info("Score: %.2f URL: %s", s, u)
582
- msg = f"Similar Gerrit change(s) detected [≥ {config.similarity_threshold:.2f}]"
674
+ msg = (
675
+ f"Similar Gerrit change(s) detected "
676
+ f"[≥ {config.similarity_threshold:.2f}]"
677
+ )
583
678
  if best_reasons:
584
679
  msg += f" (Reasons: {', '.join(best_reasons)})"
585
680
  if allow_duplicates:
@@ -590,34 +685,45 @@ class DuplicateDetector:
590
685
  # Construct human-friendly references for logs
591
686
  matching_numbers: list[int] = []
592
687
  match_lines: list[str] = []
688
+ duplicate_urls: list[str] = []
593
689
  for n, proj in matched:
594
690
  if proj:
595
691
  # Use the base path that actually worked for API calls
596
- display_url_builder = create_gerrit_url_builder(gerrit_host, successful_base_path)
692
+ display_url_builder = create_gerrit_url_builder(
693
+ gerrit_host, successful_base_path
694
+ )
597
695
  url = display_url_builder.change_url(proj, n)
598
696
  match_lines.append(f"Score: 1.0 URL: {url}")
599
- log.info("Score: 1.0 URL: %s", url)
697
+ duplicate_urls.append(url)
698
+ log.debug("Score: 1.0 URL: %s", url)
600
699
  else:
601
700
  match_lines.append(f"Score: 1.0 URL: change {n}")
602
- log.info("Score: 1.0 URL: change %s", n)
701
+ duplicate_urls.append(f"change {n}")
702
+ log.debug("Score: 1.0 URL: change %s", n)
603
703
  matching_numbers.append(n)
604
704
 
605
705
  if not matched:
606
- log.debug("No exact subject matches and no similarity matches; duplicate check passes")
706
+ log.debug(
707
+ "No exact subject matches and no similarity matches; "
708
+ "duplicate check passes"
709
+ )
607
710
  return
608
711
 
609
712
  # Remove PR number from message since cli.py already includes it
610
713
  full_message = "subject matches existing Gerrit change(s)"
611
714
  if allow_duplicates:
612
- log.warning("GERRIT DUPLICATE DETECTED (allowed): %s", full_message)
715
+ log.debug("GERRIT DUPLICATE DETECTED (allowed): %s", full_message)
613
716
  return
614
- raise DuplicateChangeError(full_message, matching_numbers)
717
+ raise DuplicateChangeError(
718
+ full_message, matching_numbers, duplicate_urls
719
+ )
615
720
 
616
721
 
617
722
  def check_for_duplicates(
618
723
  gh: GitHubContext,
619
724
  allow_duplicates: bool = False,
620
725
  lookback_days: int = 7,
726
+ expected_github_hash: str | None = None,
621
727
  ) -> None:
622
728
  """Convenience function to check for duplicates.
623
729
 
@@ -625,6 +731,7 @@ def check_for_duplicates(
625
731
  gh: GitHub context containing PR information
626
732
  allow_duplicates: If True, only log warnings; if False, raise exception
627
733
  lookback_days: Number of days to look back for similar PRs
734
+ expected_github_hash: The GitHub-Hash trailer value expected for this PR
628
735
 
629
736
  Raises:
630
737
  DuplicateChangeError: If duplicates found and allow_duplicates=False
@@ -646,13 +753,20 @@ def check_for_duplicates(
646
753
  lookback_days=lookback_days,
647
754
  duplicates_filter=os.getenv("DUPLICATE_TYPES", "open"),
648
755
  )
649
- detector.check_for_duplicates(target_pr, allow_duplicates=allow_duplicates, gh=gh)
756
+ detector.check_for_duplicates(
757
+ target_pr,
758
+ allow_duplicates=allow_duplicates,
759
+ gh=gh,
760
+ expected_github_hash=expected_github_hash,
761
+ )
650
762
 
651
- log.info("Duplicate check completed for PR #%d", gh.pr_number)
763
+ log.debug("Duplicate check completed for PR #%d", gh.pr_number)
652
764
 
653
765
  except DuplicateChangeError:
654
766
  # Re-raise duplicate errors
655
767
  raise
656
768
  except Exception as exc:
657
- log.warning("Duplicate detection failed for PR #%d: %s", gh.pr_number, exc)
769
+ log.warning(
770
+ "Duplicate detection failed for PR #%d: %s", gh.pr_number, exc
771
+ )
658
772
  # Don't fail the entire process if duplicate detection has issues