github2gerrit 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,6 @@ submissions from automated tools like Dependabot.
10
10
  """
11
11
 
12
12
  import hashlib
13
- import json
14
13
  import logging
15
14
  import os
16
15
  import re
@@ -22,6 +21,7 @@ from datetime import UTC
22
21
  from datetime import datetime
23
22
  from datetime import timedelta
24
23
  from pathlib import Path
24
+ from typing import Any
25
25
 
26
26
  from .gerrit_urls import create_gerrit_url_builder
27
27
  from .github_api import GhPullRequest
@@ -29,6 +29,7 @@ from .github_api import GhRepository
29
29
  from .github_api import build_client
30
30
  from .github_api import get_repo_from_env
31
31
  from .models import GitHubContext
32
+ from .trailers import extract_github_metadata
32
33
 
33
34
 
34
35
  # Optional Gerrit REST API support
@@ -53,15 +54,23 @@ __all__ = [
53
54
  class DuplicateChangeError(Exception):
54
55
  """Raised when a duplicate change is detected."""
55
56
 
56
- def __init__(self, message: str, existing_prs: list[int]) -> None:
57
+ def __init__(
58
+ self,
59
+ message: str,
60
+ existing_prs: list[int],
61
+ urls: list[str] | None = None,
62
+ ) -> None:
57
63
  super().__init__(message)
58
64
  self.existing_prs = existing_prs
65
+ self.urls = urls or []
59
66
 
60
67
 
61
68
  class ChangeFingerprint:
62
69
  """Represents a fingerprint of a change for duplicate detection."""
63
70
 
64
- def __init__(self, title: str, body: str = "", files_changed: list[str] | None = None):
71
+ def __init__(
72
+ self, title: str, body: str = "", files_changed: list[str] | None = None
73
+ ):
65
74
  self.title = title.strip()
66
75
  self.body = (body or "").strip()
67
76
  self.files_changed = sorted(files_changed or [])
@@ -102,10 +111,15 @@ class ChangeFingerprint:
102
111
 
103
112
  def _compute_content_hash(self) -> str:
104
113
  """Compute a hash of the change content."""
105
- content = f"{self._normalized_title}\n{self.body}\n{','.join(self.files_changed)}"
114
+ content = (
115
+ f"{self._normalized_title}\n{self.body}\n"
116
+ f"{','.join(self.files_changed)}"
117
+ )
106
118
  return hashlib.sha256(content.encode("utf-8")).hexdigest()[:16]
107
119
 
108
- def is_similar_to(self, other: "ChangeFingerprint", similarity_threshold: float = 0.8) -> bool:
120
+ def is_similar_to(
121
+ self, other: "ChangeFingerprint", similarity_threshold: float = 0.8
122
+ ) -> bool:
109
123
  """Check if this fingerprint is similar to another."""
110
124
  # Exact normalized title match
111
125
  if self._normalized_title == other._normalized_title:
@@ -128,7 +142,9 @@ class ChangeFingerprint:
128
142
  # Check title similarity even without file changes
129
143
  return self._titles_similar(other, similarity_threshold)
130
144
 
131
- def _titles_similar(self, other: "ChangeFingerprint", threshold: float) -> bool:
145
+ def _titles_similar(
146
+ self, other: "ChangeFingerprint", threshold: float
147
+ ) -> bool:
132
148
  """Check if titles are similar using simple string similarity."""
133
149
  title1 = self._normalized_title
134
150
  title2 = other._normalized_title
@@ -149,7 +165,10 @@ class ChangeFingerprint:
149
165
  return (intersection / union) >= threshold
150
166
 
151
167
  def __str__(self) -> str:
152
- return f"ChangeFingerprint(title='{self.title[:50]}...', hash={self._content_hash})"
168
+ return (
169
+ f"ChangeFingerprint(title='{self.title[:50]}...', "
170
+ f"hash={self._content_hash})"
171
+ )
153
172
 
154
173
 
155
174
  class DuplicateDetector:
@@ -171,7 +190,9 @@ class DuplicateDetector:
171
190
  match = re.search(pattern, text)
172
191
  return match.group(1) if match else ""
173
192
 
174
- def _resolve_gerrit_info_from_env_or_gitreview(self, gh: GitHubContext) -> tuple[str, str] | None:
193
+ def _resolve_gerrit_info_from_env_or_gitreview(
194
+ self, gh: GitHubContext
195
+ ) -> tuple[str, str] | None:
175
196
  """Resolve Gerrit host and project from environment or .gitreview file.
176
197
 
177
198
  Returns:
@@ -220,7 +241,10 @@ class DuplicateDetector:
220
241
  url = f"https://raw.githubusercontent.com/{repo_full}/{branch}/.gitreview"
221
242
 
222
243
  parsed = urllib.parse.urlparse(url)
223
- if parsed.scheme != "https" or parsed.netloc != "raw.githubusercontent.com":
244
+ if (
245
+ parsed.scheme != "https"
246
+ or parsed.netloc != "raw.githubusercontent.com"
247
+ ):
224
248
  continue
225
249
 
226
250
  try:
@@ -228,8 +252,12 @@ class DuplicateDetector:
228
252
  with urllib.request.urlopen(url, timeout=5) as resp:
229
253
  text_remote = resp.read().decode("utf-8")
230
254
 
231
- host = self._match_first_group(r"(?m)^host=(.+)$", text_remote)
232
- proj = self._match_first_group(r"(?m)^project=(.+)$", text_remote)
255
+ host = self._match_first_group(
256
+ r"(?m)^host=(.+)$", text_remote
257
+ )
258
+ proj = self._match_first_group(
259
+ r"(?m)^project=(.+)$", text_remote
260
+ )
233
261
 
234
262
  if host and proj:
235
263
  project = proj.removesuffix(".git")
@@ -238,7 +266,9 @@ class DuplicateDetector:
238
266
  return (host.strip(), "")
239
267
 
240
268
  except Exception as exc:
241
- log.debug("Failed to fetch .gitreview from %s: %s", url, exc)
269
+ log.debug(
270
+ "Failed to fetch .gitreview from %s: %s", url, exc
271
+ )
242
272
  continue
243
273
 
244
274
  except Exception as exc:
@@ -246,31 +276,24 @@ class DuplicateDetector:
246
276
 
247
277
  return None
248
278
 
249
- def _build_gerrit_rest_client(self, gerrit_host: str) -> object | None:
250
- """Build a Gerrit REST API client if pygerrit2 is available."""
251
- if GerritRestAPI is None:
252
- log.debug("pygerrit2 not available, skipping Gerrit duplicate check")
253
- return None
279
+ def _build_gerrit_rest_client(self, gerrit_host: str) -> Any | None:
280
+ """Build a Gerrit REST API client using centralized framework."""
281
+ from .gerrit_rest import build_client_for_host
254
282
 
255
- # Create centralized URL builder
256
- url_builder = create_gerrit_url_builder(gerrit_host)
257
- base_url = url_builder.api_url()
258
-
259
- http_user = os.getenv("GERRIT_HTTP_USER", "").strip() or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
283
+ http_user = (
284
+ os.getenv("GERRIT_HTTP_USER", "").strip()
285
+ or os.getenv("GERRIT_SSH_USER_G2G", "").strip()
286
+ )
260
287
  http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
261
288
 
262
289
  try:
263
- if http_user and http_pass:
264
- if HTTPBasicAuth is None:
265
- log.debug("pygerrit2 HTTPBasicAuth not available")
266
- return None
267
- # Type ignore needed for dynamic import returning Any
268
- return GerritRestAPI( # type: ignore[no-any-return]
269
- url=base_url, auth=HTTPBasicAuth(http_user, http_pass)
270
- )
271
- else:
272
- # Type ignore needed for dynamic import returning Any
273
- return GerritRestAPI(url=base_url) # type: ignore[no-any-return]
290
+ return build_client_for_host(
291
+ gerrit_host,
292
+ timeout=8.0,
293
+ max_attempts=3,
294
+ http_user=http_user or None,
295
+ http_password=http_pass or None,
296
+ )
274
297
  except Exception as exc:
275
298
  log.debug("Failed to create Gerrit REST client: %s", exc)
276
299
  return None
@@ -299,7 +322,9 @@ class DuplicateDetector:
299
322
  hash_bytes = hashlib.sha256(hash_input.encode("utf-8")).digest()
300
323
  hash_hex = hash_bytes.hex()[:16]
301
324
 
302
- log.debug("Generated GitHub change hash for %s: %s", hash_input, hash_hex)
325
+ log.debug(
326
+ "Generated GitHub change hash for %s: %s", hash_input, hash_hex
327
+ )
303
328
  return hash_hex
304
329
 
305
330
  def check_for_duplicates(
@@ -307,25 +332,41 @@ class DuplicateDetector:
307
332
  target_pr: GhPullRequest,
308
333
  allow_duplicates: bool = False,
309
334
  gh: GitHubContext | None = None,
335
+ expected_github_hash: str | None = None,
310
336
  ) -> None:
311
- """Check if the target PR is a duplicate via subject equality against Gerrit.
337
+ """Check if the target PR is a duplicate via trailer-aware and subject
338
+ equality against Gerrit.
312
339
 
313
- Implements a robust, dependency-free subject-first duplicate check:
340
+ Implements a robust, dependency-free duplicate check with trailer
341
+ awareness:
342
+ - First check for existing changes with matching GitHub-Hash trailer
343
+ (short-circuit)
314
344
  - Resolve Gerrit host/project from env or .gitreview
315
- - Query Gerrit changes updated within the lookback window (excluding abandoned)
345
+ - Query Gerrit changes updated within the lookback window (excluding
346
+ abandoned)
316
347
  - Compare normalized subjects (first line) for exact equality
317
348
  - If any match, treat as duplicate and either warn or raise
349
+
350
+ Args:
351
+ target_pr: The GitHub PR to check for duplicates
352
+ allow_duplicates: If True, log warnings instead of raising errors
353
+ gh: GitHub context for resolving Gerrit configuration
354
+ expected_github_hash: The GitHub-Hash trailer value expected for
355
+ this PR
318
356
  """
319
357
  pr_number = getattr(target_pr, "number", 0)
320
358
  pr_title = (getattr(target_pr, "title", "") or "").strip()
321
359
 
322
360
  log.debug(
323
- "Checking PR #%d for duplicates via subject equality against Gerrit",
361
+ "Checking PR #%d for duplicates via subject equality against "
362
+ "Gerrit",
324
363
  pr_number,
325
364
  )
326
365
 
327
366
  if not pr_title:
328
- log.debug("PR #%d has empty title; skipping duplicate check", pr_number)
367
+ log.debug(
368
+ "PR #%d has empty title; skipping duplicate check", pr_number
369
+ )
329
370
  return
330
371
  if gh is None:
331
372
  log.debug("No GitHub context provided; skipping duplicate check")
@@ -334,7 +375,10 @@ class DuplicateDetector:
334
375
  # Resolve Gerrit target (host/project)
335
376
  gerrit_info = self._resolve_gerrit_info_from_env_or_gitreview(gh)
336
377
  if not gerrit_info:
337
- log.debug("Unable to resolve Gerrit host/project; skipping duplicate check")
378
+ log.debug(
379
+ "Unable to resolve Gerrit host/project; skipping duplicate "
380
+ "check"
381
+ )
338
382
  return
339
383
  gerrit_host, gerrit_project = gerrit_info
340
384
 
@@ -342,16 +386,21 @@ class DuplicateDetector:
342
386
  def _normalize_subject(title: str) -> str:
343
387
  normalized = title.strip()
344
388
  normalized = re.sub(
345
- r"^(feat|fix|docs|style|refactor|test|chore|ci|build|perf)(\(.+?\))?: ",
389
+ r"^(feat|fix|docs|style|refactor|test|chore|ci|build|perf)"
390
+ r"(\(.+?\))?: ",
346
391
  "",
347
392
  normalized,
348
393
  flags=re.IGNORECASE,
349
394
  )
350
395
  normalized = re.sub(r"[*_`]", "", normalized)
351
- normalized = re.sub(r"\bv\d+(\.\d+)*(-\w+)?\b", "vx.y.z", normalized)
396
+ normalized = re.sub(
397
+ r"\bv\d+(\.\d+)*(-\w+)?\b", "vx.y.z", normalized
398
+ )
352
399
  normalized = re.sub(r"\b\d+(\.\d+)+(-\w+)?\b", "x.y.z", normalized)
353
400
  normalized = re.sub(r"\b\d+\.\d+\b", "x.y.z", normalized)
354
- normalized = re.sub(r"\b[a-f0-9]{7,40}\b", "commit_hash", normalized)
401
+ normalized = re.sub(
402
+ r"\b[a-f0-9]{7,40}\b", "commit_hash", normalized
403
+ )
355
404
  normalized = re.sub(r"\s+", " ", normalized).strip()
356
405
  return normalized.lower()
357
406
 
@@ -363,7 +412,6 @@ class DuplicateDetector:
363
412
 
364
413
  # Build Gerrit REST URL using centralized URL builder
365
414
  url_builder = create_gerrit_url_builder(gerrit_host)
366
- api_base = url_builder.api_url().rstrip("/")
367
415
 
368
416
  # Track which base path actually works for constructing display URLs
369
417
  successful_base_path = url_builder.base_path
@@ -393,64 +441,99 @@ class DuplicateDetector:
393
441
  query = " ".join(q_parts)
394
442
  encoded_q = urllib.parse.quote(query, safe="")
395
443
 
396
- # Request current commit metadata so we get 'subject'
397
- # Use a modest page size
398
- url = f"{api_base}/changes/?q={encoded_q}&n=50&o=CURRENT_COMMIT&o=CURRENT_FILES"
399
-
400
- def _load_gerrit_json(url_: str) -> list[dict[str, object]]:
444
+ def _load_gerrit_json(query_path: str) -> list[dict[str, object]]:
401
445
  try:
402
- log.debug("Querying Gerrit for duplicates: %s", url_)
403
- # Ensure we only fetch over HTTPS to avoid unsafe schemes
404
- parsed = urllib.parse.urlparse(url_)
405
- if parsed.scheme != "https":
406
- log.debug("Skipping non-HTTPS URL for Gerrit query: %s", url_)
446
+ # Use centralized client that handles base path and auth
447
+ client = self._build_gerrit_rest_client(gerrit_host)
448
+ if client is None:
449
+ log.debug(
450
+ "Gerrit client not available; skipping duplicate check"
451
+ )
407
452
  return []
408
- headers: dict[str, str] = {}
409
- http_user = os.getenv("GERRIT_HTTP_USER", "").strip()
410
- http_pass = os.getenv("GERRIT_HTTP_PASSWORD", "").strip()
411
- if http_user and http_pass:
412
- import base64 as _b64 # localized import to avoid global import edit
413
-
414
- basic = _b64.b64encode(f"{http_user}:{http_pass}".encode()).decode("ascii")
415
- headers["Authorization"] = f"Basic {basic}"
416
- req = urllib.request.Request(url_, headers=headers)
417
- with urllib.request.urlopen(req, timeout=8) as resp:
418
- raw = resp.read().decode("utf-8", errors="replace")
419
- # Strip Gerrit's XSSI prefix if present
420
- if raw.startswith(")]}'"):
421
- raw = raw.split("\n", 1)[1] if "\n" in raw else ""
422
- data = json.loads(raw or "[]")
453
+
454
+ log.debug("Querying Gerrit for duplicates: %s", query_path)
455
+ data = client.get(query_path)
423
456
  if isinstance(data, list):
424
457
  return data
425
458
  else:
426
459
  return []
427
- except urllib.error.HTTPError as exc:
428
- log.debug("Gerrit query failed for %s: %s", url_, exc)
429
- return []
430
460
  except Exception as exc:
431
- log.debug("Gerrit query failed for %s: %s", url_, exc)
461
+ log.debug("Gerrit query failed for %s: %s", query_path, exc)
432
462
  return []
433
463
 
464
+ # Build query path for centralized client
465
+ query_path = (
466
+ f"/changes/?q={encoded_q}&n=50&o=CURRENT_COMMIT&o=CURRENT_FILES"
467
+ )
468
+
434
469
  log.debug(
435
- "Gerrit duplicate query: host=%s project=%s filter=%s cutoff=%s url=%s",
470
+ "Gerrit duplicate query: host=%s project=%s filter=%s cutoff=%s "
471
+ "path=%s",
436
472
  gerrit_host,
437
473
  gerrit_project or "(any)",
438
474
  dup_filter,
439
475
  cutoff_date,
440
- url,
476
+ query_path,
441
477
  )
442
- changes = _load_gerrit_json(url)
478
+ changes = _load_gerrit_json(query_path)
443
479
  log.debug(
444
- "Gerrit query returned %d change(s) for project=%s filter=%s after=%s",
480
+ "Gerrit query returned %d change(s) for project=%s filter=%s "
481
+ "after=%s",
445
482
  len(changes),
446
483
  gerrit_project or "(any)",
447
484
  dup_filter,
448
485
  cutoff_date,
449
486
  )
450
487
  if changes:
451
- sample_subjects = ", ".join(str(c.get("subject") or "")[:60] for c in changes[:5])
488
+ sample_subjects = ", ".join(
489
+ str(c.get("subject") or "")[:60] for c in changes[:5]
490
+ )
452
491
  log.debug("Sample subjects: %s", sample_subjects)
453
492
 
493
+ # First pass: Check for trailer-based matches (GitHub-Hash)
494
+ if expected_github_hash:
495
+ log.debug(
496
+ "Checking for GitHub-Hash trailer matches: %s",
497
+ expected_github_hash,
498
+ )
499
+ trailer_matches: list[tuple[int, str]] = []
500
+
501
+ for c in changes:
502
+ # Extract commit message and check for GitHub trailers
503
+ rev = str(c.get("current_revision") or "")
504
+ revs_obj = c.get("revisions")
505
+ revs = revs_obj if isinstance(revs_obj, dict) else {}
506
+ cur_obj = revs.get(rev)
507
+ cur = cur_obj if isinstance(cur_obj, dict) else {}
508
+ commit = cur.get("commit") or {}
509
+ msg = str(commit.get("message") or "")
510
+
511
+ if msg:
512
+ github_metadata = extract_github_metadata(msg)
513
+ change_github_hash = github_metadata.get("GitHub-Hash", "")
514
+
515
+ if change_github_hash == expected_github_hash:
516
+ num = c.get("_number")
517
+ proj = str(c.get("project") or gerrit_project or "")
518
+ if isinstance(num, int):
519
+ trailer_matches.append((num, proj))
520
+ log.debug(
521
+ "Found GitHub-Hash trailer match: change %d, "
522
+ "hash %s",
523
+ num,
524
+ change_github_hash,
525
+ )
526
+
527
+ if trailer_matches:
528
+ log.debug(
529
+ "Found %d change(s) with matching GitHub-Hash trailer - "
530
+ "treating as update targets",
531
+ len(trailer_matches),
532
+ )
533
+ # These are update targets, not duplicates - allow them to
534
+ # proceed
535
+ return
536
+
454
537
  # Compare normalized subjects for exact equality
455
538
  matched: list[tuple[int, str]] = []
456
539
  for c in changes:
@@ -464,8 +547,11 @@ class DuplicateDetector:
464
547
  matched.append((num, proj))
465
548
 
466
549
  if not matched:
467
- # No exact subject match; proceed with similarity scoring across candidates
468
- log.debug("No exact-subject matches found; entering similarity scoring")
550
+ # No exact subject match; proceed with similarity scoring across
551
+ # candidates
552
+ log.debug(
553
+ "No exact-subject matches found; entering similarity scoring"
554
+ )
469
555
  from .similarity import ScoringConfig
470
556
  from .similarity import aggregate_scores
471
557
  from .similarity import remove_commit_trailers
@@ -488,7 +574,8 @@ class DuplicateDetector:
488
574
  if fname:
489
575
  src_files.append(str(fname))
490
576
  except Exception as exc:
491
- # Best-effort; if files cannot be retrieved, proceed without them
577
+ # Best-effort; if files cannot be retrieved, proceed without
578
+ # them
492
579
  log.debug("Failed to retrieve PR files for scoring: %s", exc)
493
580
 
494
581
  best_score = 0.0
@@ -499,7 +586,8 @@ class DuplicateDetector:
499
586
  subj = str(c.get("subject") or "").strip()
500
587
  if not subj:
501
588
  continue
502
- # Extract commit message and files from revisions (CURRENT_COMMIT, CURRENT_FILES)
589
+ # Extract commit message and files from revisions
590
+ # (CURRENT_COMMIT, CURRENT_FILES)
503
591
  rev = str(c.get("current_revision") or "")
504
592
  revs_obj = c.get("revisions")
505
593
  revs = revs_obj if isinstance(revs_obj, dict) else {}
@@ -512,7 +600,11 @@ class DuplicateDetector:
512
600
  cand_body_raw = msg.split("\n", 1)[1]
513
601
  cand_body = remove_commit_trailers(cand_body_raw)
514
602
  files_dict = cur.get("files") or {}
515
- cand_files = [p for p in files_dict if isinstance(p, str) and not p.startswith("/")]
603
+ cand_files = [
604
+ p
605
+ for p in files_dict
606
+ if isinstance(p, str) and not p.startswith("/")
607
+ ]
516
608
 
517
609
  # Compute component scores
518
610
  s_res = score_subjects(src_subjects, subj)
@@ -524,7 +616,9 @@ class DuplicateDetector:
524
616
  b_res = score_bodies(src_body, cand_body)
525
617
 
526
618
  # Aggregate
527
- agg = aggregate_scores(s_res.score, f_res.score, b_res.score, config=config)
619
+ agg = aggregate_scores(
620
+ s_res.score, f_res.score, b_res.score, config=config
621
+ )
528
622
  log.debug(
529
623
  "Aggregate score computed: %.2f (s=%.2f f=%.2f b=%.2f)",
530
624
  agg,
@@ -533,20 +627,24 @@ class DuplicateDetector:
533
627
  b_res.score,
534
628
  )
535
629
 
536
- # Build candidate reference and number using successful base path
630
+ # Build candidate reference and number using successful base
631
+ # path
537
632
  num_obj = c.get("_number")
538
633
  num = int(num_obj) if isinstance(num_obj, int) else None
539
634
  proj = str(c.get("project") or gerrit_project or "")
540
635
 
541
636
  # Use the base path that actually worked for API calls
542
- display_url_builder = create_gerrit_url_builder(gerrit_host, successful_base_path)
637
+ display_url_builder = create_gerrit_url_builder(
638
+ gerrit_host, successful_base_path
639
+ )
543
640
  ref = (
544
641
  display_url_builder.change_url(proj, num)
545
642
  if proj and isinstance(num, int)
546
643
  else (f"change {num}" if isinstance(num, int) else "")
547
644
  )
548
645
  log.debug(
549
- "Scoring candidate: ref=%s agg=%.2f (s=%.2f f=%.2f b=%.2f) subj='%s'",
646
+ "Scoring candidate: ref=%s agg=%.2f (s=%.2f f=%.2f b=%.2f) "
647
+ "subj='%s'",
550
648
  ref or "(none)",
551
649
  agg,
552
650
  s_res.score,
@@ -559,7 +657,11 @@ class DuplicateDetector:
559
657
  if agg > best_score:
560
658
  best_score = agg
561
659
  # Deduplicate reasons preserving order
562
- best_reasons = list(dict.fromkeys(s_res.reasons + f_res.reasons + b_res.reasons))
660
+ best_reasons = list(
661
+ dict.fromkeys(
662
+ s_res.reasons + f_res.reasons + b_res.reasons
663
+ )
664
+ )
563
665
 
564
666
  # Collect all candidates above threshold
565
667
  if agg >= config.similarity_threshold and ref:
@@ -579,7 +681,10 @@ class DuplicateDetector:
579
681
  for s, u, _ in hits_sorted:
580
682
  if u:
581
683
  log.info("Score: %.2f URL: %s", s, u)
582
- msg = f"Similar Gerrit change(s) detected [≥ {config.similarity_threshold:.2f}]"
684
+ msg = (
685
+ f"Similar Gerrit change(s) detected "
686
+ f"[≥ {config.similarity_threshold:.2f}]"
687
+ )
583
688
  if best_reasons:
584
689
  msg += f" (Reasons: {', '.join(best_reasons)})"
585
690
  if allow_duplicates:
@@ -590,34 +695,45 @@ class DuplicateDetector:
590
695
  # Construct human-friendly references for logs
591
696
  matching_numbers: list[int] = []
592
697
  match_lines: list[str] = []
698
+ duplicate_urls: list[str] = []
593
699
  for n, proj in matched:
594
700
  if proj:
595
701
  # Use the base path that actually worked for API calls
596
- display_url_builder = create_gerrit_url_builder(gerrit_host, successful_base_path)
702
+ display_url_builder = create_gerrit_url_builder(
703
+ gerrit_host, successful_base_path
704
+ )
597
705
  url = display_url_builder.change_url(proj, n)
598
706
  match_lines.append(f"Score: 1.0 URL: {url}")
599
- log.info("Score: 1.0 URL: %s", url)
707
+ duplicate_urls.append(url)
708
+ log.debug("Score: 1.0 URL: %s", url)
600
709
  else:
601
710
  match_lines.append(f"Score: 1.0 URL: change {n}")
602
- log.info("Score: 1.0 URL: change %s", n)
711
+ duplicate_urls.append(f"change {n}")
712
+ log.debug("Score: 1.0 URL: change %s", n)
603
713
  matching_numbers.append(n)
604
714
 
605
715
  if not matched:
606
- log.debug("No exact subject matches and no similarity matches; duplicate check passes")
716
+ log.debug(
717
+ "No exact subject matches and no similarity matches; "
718
+ "duplicate check passes"
719
+ )
607
720
  return
608
721
 
609
722
  # Remove PR number from message since cli.py already includes it
610
723
  full_message = "subject matches existing Gerrit change(s)"
611
724
  if allow_duplicates:
612
- log.warning("GERRIT DUPLICATE DETECTED (allowed): %s", full_message)
725
+ log.debug("GERRIT DUPLICATE DETECTED (allowed): %s", full_message)
613
726
  return
614
- raise DuplicateChangeError(full_message, matching_numbers)
727
+ raise DuplicateChangeError(
728
+ full_message, matching_numbers, duplicate_urls
729
+ )
615
730
 
616
731
 
617
732
  def check_for_duplicates(
618
733
  gh: GitHubContext,
619
734
  allow_duplicates: bool = False,
620
735
  lookback_days: int = 7,
736
+ expected_github_hash: str | None = None,
621
737
  ) -> None:
622
738
  """Convenience function to check for duplicates.
623
739
 
@@ -625,6 +741,7 @@ def check_for_duplicates(
625
741
  gh: GitHub context containing PR information
626
742
  allow_duplicates: If True, only log warnings; if False, raise exception
627
743
  lookback_days: Number of days to look back for similar PRs
744
+ expected_github_hash: The GitHub-Hash trailer value expected for this PR
628
745
 
629
746
  Raises:
630
747
  DuplicateChangeError: If duplicates found and allow_duplicates=False
@@ -646,13 +763,20 @@ def check_for_duplicates(
646
763
  lookback_days=lookback_days,
647
764
  duplicates_filter=os.getenv("DUPLICATE_TYPES", "open"),
648
765
  )
649
- detector.check_for_duplicates(target_pr, allow_duplicates=allow_duplicates, gh=gh)
766
+ detector.check_for_duplicates(
767
+ target_pr,
768
+ allow_duplicates=allow_duplicates,
769
+ gh=gh,
770
+ expected_github_hash=expected_github_hash,
771
+ )
650
772
 
651
- log.info("Duplicate check completed for PR #%d", gh.pr_number)
773
+ log.debug("Duplicate check completed for PR #%d", gh.pr_number)
652
774
 
653
775
  except DuplicateChangeError:
654
776
  # Re-raise duplicate errors
655
777
  raise
656
778
  except Exception as exc:
657
- log.warning("Duplicate detection failed for PR #%d: %s", gh.pr_number, exc)
779
+ log.warning(
780
+ "Duplicate detection failed for PR #%d: %s", gh.pr_number, exc
781
+ )
658
782
  # Don't fail the entire process if duplicate detection has issues