github2gerrit 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,589 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: 2025 The Linux Foundation
3
+ """
4
+ Phase 2: Reconciliation plan + orphan policy integration.
5
+
6
+ Enhancements over Phase 1:
7
+ - Introduces a first-class `ReconciliationPlan` data model
8
+ - Computes deterministic digest (sha256 first 12 hex chars)
9
+ - Applies a lightweight orphan policy (comment / abandon / ignore - stub)
10
+ - Emits enriched JSON summary including digest
11
+ - Maintains backward compatibility (still returns list[str] to caller)
12
+
13
+ Current scope (Phase 2):
14
+ - Strategy handling (topic / comment / topic+comment)
15
+ - Multi-pass matching via `ReconciliationMatcher`
16
+ - Mapping reuse (legacy comment path) with extension
17
+ - Plan construction (reused / new / orphan / digest)
18
+ - Orphan policy logging (no side-effect network abandon yet)
19
+
20
+ Deferred (later phases):
21
+ - Real abandon/comment REST side-effects
22
+ - Verification phase digest comparison
23
+ - Idempotent backref retrieval via Gerrit REST
24
+ - Full pipeline state machine integration
25
+
26
+ Design notes:
27
+ - Silent fallback on missing optional inputs (robustness)
28
+ - All new helpers keep line length within 80 characters
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import hashlib
34
+ import json
35
+ import logging
36
+ import time
37
+ from collections.abc import Sequence
38
+ from dataclasses import dataclass
39
+
40
+ from github2gerrit.core import GerritInfo
41
+ from github2gerrit.gerrit_query import GerritChange
42
+ from github2gerrit.gerrit_query import query_changes_by_topic
43
+ from github2gerrit.mapping_comment import parse_mapping_comments
44
+ from github2gerrit.mapping_comment import validate_mapping_consistency
45
+ from github2gerrit.reconcile_matcher import LocalCommit
46
+ from github2gerrit.reconcile_matcher import ReconciliationMatcher
47
+
48
+
49
+ log = logging.getLogger(__name__)
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Type hints (imported lazily to avoid circulars)
54
+ # ---------------------------------------------------------------------------
55
+
56
+ try: # pragma: no cover - typing only
57
+ from github2gerrit.models import GitHubContext
58
+ from github2gerrit.models import Inputs
59
+ except Exception: # pragma: no cover
60
+ GitHubContext = object # type: ignore[misc,assignment]
61
+ Inputs = object # type: ignore[misc,assignment]
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Public API
66
+ # ---------------------------------------------------------------------------
67
+
68
+
69
+ def perform_reconciliation(
70
+ inputs: Inputs,
71
+ gh: GitHubContext,
72
+ gerrit: GerritInfo | None,
73
+ local_commits: list[LocalCommit],
74
+ *,
75
+ expected_pr_url: str | None = None,
76
+ expected_github_hash: str | None = None,
77
+ ) -> list[str]:
78
+ """
79
+ Build and apply a reconciliation plan (Phase 2).
80
+
81
+ Still returns only the ordered Change-Id list for backward
82
+ compatibility with the legacy orchestrator. A richer plan object
83
+ is constructed internally (digest, orphan ids, classification).
84
+
85
+ Args:
86
+ inputs: Configuration / feature switches.
87
+ gh: GitHub context information.
88
+ gerrit: Gerrit connection info.
89
+ local_commits: Ordered local commits (topological).
90
+ expected_pr_url: Optional authoritative PR URL.
91
+ expected_github_hash: Optional expected GitHub-Hash trailer.
92
+
93
+ Returns:
94
+ Ordered list of Change-Ids (plan.mapping_order).
95
+ """
96
+ if not local_commits:
97
+ return []
98
+
99
+ strategy = (
100
+ getattr(inputs, "reuse_strategy", "topic+comment") or ""
101
+ ).lower()
102
+ if strategy == "none":
103
+ log.info("Reconciliation disabled (reuse_strategy=none)")
104
+ return []
105
+
106
+ if expected_pr_url is None:
107
+ expected_pr_url = f"{gh.server_url}/{gh.repository}/pull/{gh.pr_number}"
108
+
109
+ log.debug(
110
+ "Recon strategy=%s commits=%d pr=%s",
111
+ strategy,
112
+ len(local_commits),
113
+ expected_pr_url,
114
+ )
115
+
116
+ # 1. Topic discovery
117
+ gerrit_changes: list[GerritChange] = []
118
+ if "topic" in strategy:
119
+ gerrit_changes = _query_and_validate_topic_changes(
120
+ gerrit=gerrit,
121
+ gh=gh,
122
+ allow_orphans=getattr(inputs, "allow_orphan_changes", False),
123
+ expected_pr_url=expected_pr_url,
124
+ expected_github_hash=expected_github_hash,
125
+ )
126
+
127
+ # 2. Comment fallback (only if topic yielded nothing)
128
+ if "comment" in strategy and not gerrit_changes:
129
+ mapped_ids = _attempt_comment_based_reuse(
130
+ gh=gh,
131
+ expected_pr_url=expected_pr_url,
132
+ expected_github_hash=expected_github_hash,
133
+ )
134
+ if mapped_ids:
135
+ ordered = _extend_or_generate(
136
+ mapped_ids, len(local_commits), local_commits
137
+ )
138
+ plan = ReconciliationPlan(
139
+ change_ids=ordered,
140
+ reused_ids=mapped_ids[: len(local_commits)],
141
+ new_ids=ordered[len(mapped_ids) :],
142
+ orphan_change_ids=[],
143
+ digest=_compute_plan_digest(ordered),
144
+ strategy=strategy,
145
+ )
146
+ _maybe_emit_summary(
147
+ plan, log_json=getattr(inputs, "log_reconcile_json", False)
148
+ )
149
+ return plan.change_ids
150
+
151
+ # 3. Matcher path
152
+ if gerrit_changes:
153
+ matcher = ReconciliationMatcher(
154
+ similarity_threshold=getattr(inputs, "similarity_subject", 0.7),
155
+ allow_duplicate_subjects=True,
156
+ )
157
+ result = matcher.reconcile(local_commits, gerrit_changes)
158
+ ordered = result.change_ids
159
+ reused_ids = ordered[: result.reused_count]
160
+ new_ids = ordered[result.reused_count :]
161
+ orphan_ids = [c.change_id for c in result.orphaned_changes]
162
+ plan = ReconciliationPlan(
163
+ change_ids=ordered,
164
+ reused_ids=reused_ids,
165
+ new_ids=new_ids,
166
+ orphan_change_ids=orphan_ids,
167
+ digest=_compute_plan_digest(ordered),
168
+ strategy=strategy,
169
+ )
170
+ # Apply orphan policy (with REST side-effects)
171
+ orphan_policy = getattr(inputs, "orphan_policy", "comment")
172
+ actions = _apply_orphan_policy(orphan_ids, orphan_policy, gerrit=gerrit)
173
+ if actions.has_actions():
174
+ log.info(
175
+ "ORPHAN_ACTIONS json=%s",
176
+ json.dumps(actions.as_dict(), separators=(",", ":")),
177
+ )
178
+ _maybe_emit_summary(
179
+ plan, log_json=getattr(inputs, "log_reconcile_json", False)
180
+ )
181
+ return plan.change_ids
182
+
183
+ # 4. All new path
184
+ new_ids = [_generate_change_id(c.sha) for c in local_commits]
185
+ plan = ReconciliationPlan(
186
+ change_ids=new_ids,
187
+ reused_ids=[],
188
+ new_ids=new_ids,
189
+ orphan_change_ids=[],
190
+ digest=_compute_plan_digest(new_ids),
191
+ strategy=strategy,
192
+ )
193
+ _maybe_emit_summary(
194
+ plan, log_json=getattr(inputs, "log_reconcile_json", False)
195
+ )
196
+ return plan.change_ids
197
+
198
+
199
+ # ---------------------------------------------------------------------------
200
+ # Internal helpers
201
+ # ---------------------------------------------------------------------------
202
+
203
+
204
+ def _query_and_validate_topic_changes(
205
+ *,
206
+ gerrit: GerritInfo | None,
207
+ gh: GitHubContext,
208
+ allow_orphans: bool,
209
+ expected_pr_url: str,
210
+ expected_github_hash: str | None,
211
+ ) -> list[GerritChange]:
212
+ """Query and filter Gerrit changes by topic with metadata validation."""
213
+ topic = (
214
+ f"GH-{gh.repository_owner}-{gh.repository.split('/')[-1]}-"
215
+ f"{gh.pr_number}"
216
+ )
217
+ try:
218
+ from github2gerrit.gerrit_rest import (
219
+ build_client_for_host, # lazy import
220
+ )
221
+
222
+ if gerrit is None:
223
+ log.debug("No Gerrit info provided, returning empty changes list")
224
+ return []
225
+
226
+ client = build_client_for_host(
227
+ gerrit.host,
228
+ timeout=8.0,
229
+ max_attempts=3,
230
+ )
231
+ statuses = ["NEW", "MERGED"] if not allow_orphans else ["NEW"]
232
+ changes = query_changes_by_topic(client, topic, statuses=statuses)
233
+ if not changes:
234
+ log.debug(
235
+ "Topic query returned 0 Gerrit changes for topic=%s", topic
236
+ )
237
+ return []
238
+
239
+ validated = _filter_changes_by_pr_metadata(
240
+ changes,
241
+ expected_pr_url=expected_pr_url,
242
+ expected_github_hash=expected_github_hash,
243
+ )
244
+ log.info(
245
+ "Validated %d/%d Gerrit changes via topic metadata match",
246
+ len(validated),
247
+ len(changes),
248
+ )
249
+ except Exception as exc:
250
+ log.debug("Topic-based discovery failed: %s", exc)
251
+ else:
252
+ return validated
253
+ return []
254
+
255
+
256
+ def _filter_changes_by_pr_metadata(
257
+ changes: Sequence[GerritChange],
258
+ *,
259
+ expected_pr_url: str,
260
+ expected_github_hash: str | None,
261
+ ) -> list[GerritChange]:
262
+ """
263
+ Filter changes whose commit messages reference the expected PR
264
+ (and GitHub-Hash when provided).
265
+ """
266
+ filtered: list[GerritChange] = []
267
+ for ch in changes:
268
+ msg = ch.commit_message or ""
269
+ if expected_pr_url not in msg:
270
+ continue
271
+ if (
272
+ expected_github_hash
273
+ and f"GitHub-Hash: {expected_github_hash}" not in msg
274
+ ):
275
+ continue
276
+ filtered.append(ch)
277
+ return filtered
278
+
279
+
280
+ def _attempt_comment_based_reuse(
281
+ *,
282
+ gh: GitHubContext,
283
+ expected_pr_url: str,
284
+ expected_github_hash: str | None,
285
+ ) -> list[str] | None:
286
+ """
287
+ Attempt to recover mapping from prior PR comment (legacy path).
288
+ Returns ordered list of mapped Change-Ids or None.
289
+ """
290
+ try:
291
+ from github2gerrit.github_api import build_client
292
+ from github2gerrit.github_api import get_pull
293
+ from github2gerrit.github_api import get_repo_from_env
294
+
295
+ gh_client = build_client()
296
+ repo = get_repo_from_env(gh_client)
297
+ pr_obj = get_pull(repo, int(gh.pr_number or 0))
298
+ issue = pr_obj.as_issue()
299
+ comments = list(issue.get_comments())
300
+ bodies = [getattr(c, "body", "") or "" for c in comments]
301
+
302
+ mapping = parse_mapping_comments(bodies)
303
+ if not mapping:
304
+ return None
305
+
306
+ if expected_github_hash is None:
307
+ # Skip strict validation if hash is unknown (best-effort reuse).
308
+ if mapping.pr_url != expected_pr_url:
309
+ log.warning(
310
+ "Skipping mapping reuse: PR URL mismatch (%s != %s)",
311
+ mapping.pr_url,
312
+ expected_pr_url,
313
+ )
314
+ return None
315
+ return mapping.change_ids
316
+
317
+ if validate_mapping_consistency(
318
+ mapping, expected_pr_url, expected_github_hash
319
+ ):
320
+ log.debug(
321
+ "Using comment-based mapping reuse (%d Change-Ids).",
322
+ len(mapping.change_ids),
323
+ )
324
+ return mapping.change_ids
325
+ else:
326
+ return None
327
+ except Exception as exc:
328
+ log.debug("Comment-based reconciliation failed: %s", exc)
329
+ return None
330
+
331
+
332
+ def _extend_or_generate(
333
+ existing_ids: list[str],
334
+ total_commits: int,
335
+ local_commits: list[LocalCommit],
336
+ ) -> list[str]:
337
+ """
338
+ Extend an existing ordered mapping with new Change-Ids for additional
339
+ commits not present in the prior mapping list.
340
+ """
341
+ result = list(existing_ids)
342
+ for idx in range(len(existing_ids), total_commits):
343
+ # Use commit SHA for stable seed component (still add entropy).
344
+ result.append(_generate_change_id(local_commits[idx].sha))
345
+ return result
346
+
347
+
348
+ def _generate_change_id(seed: str) -> str:
349
+ """
350
+ Generate a Gerrit-style Change-Id using a seed and time component.
351
+ """
352
+ content = f"{time.time()}_{seed}"
353
+ return "I" + hashlib.sha256(content.encode("utf-8")).hexdigest()[:40]
354
+
355
+
356
+ def _emit_summary_json(
357
+ *,
358
+ total_local: int,
359
+ reused: int,
360
+ new: int,
361
+ orphaned: int,
362
+ strategy: str,
363
+ digest: str,
364
+ ) -> None:
365
+ """
366
+ Emit a structured one-line JSON summary for downstream parsing.
367
+ """
368
+ summary = {
369
+ "total_local": total_local,
370
+ "reused": reused,
371
+ "new": new,
372
+ "orphaned": orphaned,
373
+ "strategy": strategy,
374
+ "digest": digest,
375
+ }
376
+ log.info(
377
+ "RECONCILE_SUMMARY json=%s",
378
+ json.dumps(summary),
379
+ )
380
+
381
+
382
+ # ---------------------------------------------------------------------------
383
+ # Minimal dataclass for future expansion (placeholder for Phase 2)
384
+ # ---------------------------------------------------------------------------
385
+
386
+
387
+ @dataclass(slots=True)
388
+ class ReconciliationPlan:
389
+ """
390
+ Concrete reconciliation plan (Phase 2).
391
+
392
+ Fields:
393
+ change_ids: Ordered Change-Ids aligned to local commits
394
+ reused_ids: Subset reused from existing Gerrit changes
395
+ new_ids: Newly generated Change-Ids
396
+ orphan_change_ids: Gerrit changes not matched by local commits
397
+ digest: Deterministic digest of ordered Change-Ids
398
+ strategy: Strategy string used
399
+ """
400
+
401
+ change_ids: list[str]
402
+ reused_ids: list[str]
403
+ new_ids: list[str]
404
+ orphan_change_ids: list[str]
405
+ digest: str
406
+ strategy: str
407
+
408
+
409
+ @dataclass(slots=True)
410
+ class OrphanActionLog:
411
+ """
412
+ Captures logged orphan handling outcomes (Phase 2 stub).
413
+ """
414
+
415
+ abandoned: list[str]
416
+ commented: list[str]
417
+ ignored: list[str]
418
+
419
+ def has_actions(self) -> bool:
420
+ return bool(self.abandoned or self.commented)
421
+
422
+ def as_dict(self) -> dict[str, list[str]]:
423
+ return {
424
+ "abandoned": self.abandoned,
425
+ "commented": self.commented,
426
+ "ignored": self.ignored,
427
+ }
428
+
429
+
430
+ def _compute_plan_digest(change_ids: list[str]) -> str:
431
+ content = "\n".join(change_ids)
432
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()[:12]
433
+
434
+
435
+ def _apply_orphan_policy(
436
+ orphan_ids: list[str],
437
+ policy: str,
438
+ *,
439
+ gerrit: GerritInfo | None = None,
440
+ ) -> OrphanActionLog:
441
+ """
442
+ Apply orphan policy with REST side-effects.
443
+
444
+ Policies:
445
+ - abandon: invoke Gerrit REST abandon with reason
446
+ - comment: add explanatory comment via Gerrit REST
447
+ - ignore: no action taken
448
+ """
449
+ policy_lc = (policy or "comment").lower()
450
+ abandoned: list[str] = []
451
+ commented: list[str] = []
452
+ ignored: list[str] = []
453
+
454
+ if not orphan_ids:
455
+ return OrphanActionLog(abandoned=[], commented=[], ignored=[])
456
+
457
+ if policy_lc == "abandon":
458
+ abandoned = _abandon_orphan_changes(orphan_ids, gerrit)
459
+ log.info(
460
+ "Orphan policy 'abandon' completed for %d changes", len(abandoned)
461
+ )
462
+ elif policy_lc == "comment":
463
+ commented = _comment_orphan_changes(orphan_ids, gerrit)
464
+ log.info(
465
+ "Orphan policy 'comment' completed for %d changes", len(commented)
466
+ )
467
+ else:
468
+ ignored = orphan_ids[:]
469
+ log.info("Orphan policy 'ignore' selected - no action taken")
470
+
471
+ return OrphanActionLog(
472
+ abandoned=abandoned,
473
+ commented=commented,
474
+ ignored=ignored,
475
+ )
476
+
477
+
478
+ def _abandon_orphan_changes(
479
+ orphan_ids: list[str], gerrit: GerritInfo | None
480
+ ) -> list[str]:
481
+ """
482
+ Abandon orphan changes via Gerrit REST API.
483
+
484
+ Returns list of successfully abandoned change IDs.
485
+ """
486
+ if not orphan_ids or gerrit is None:
487
+ return []
488
+
489
+ from github2gerrit.gerrit_rest import GerritRestError
490
+ from github2gerrit.gerrit_rest import build_client_for_host
491
+
492
+ abandoned = []
493
+ try:
494
+ client = build_client_for_host(
495
+ gerrit.host, timeout=10.0, max_attempts=3
496
+ )
497
+
498
+ for change_id in orphan_ids:
499
+ try:
500
+ abandon_message = (
501
+ "Abandoned due to GitHub PR update (orphaned change)"
502
+ )
503
+ path = f"/changes/{change_id}/abandon"
504
+ data = {"message": abandon_message}
505
+
506
+ client.post(path, data=data)
507
+ abandoned.append(change_id)
508
+ log.debug("Successfully abandoned change %s", change_id)
509
+
510
+ except GerritRestError as exc:
511
+ log.warning("Failed to abandon change %s: %s", change_id, exc)
512
+ except Exception as exc:
513
+ log.warning(
514
+ "Unexpected error abandoning change %s: %s", change_id, exc
515
+ )
516
+
517
+ except Exception as exc:
518
+ log.warning(
519
+ "Failed to create Gerrit REST client for abandon operations: %s",
520
+ exc,
521
+ )
522
+
523
+ return abandoned
524
+
525
+
526
+ def _comment_orphan_changes(
527
+ orphan_ids: list[str], gerrit: GerritInfo | None
528
+ ) -> list[str]:
529
+ """
530
+ Add comments to orphan changes via Gerrit REST API.
531
+
532
+ Returns list of successfully commented change IDs.
533
+ """
534
+ if not orphan_ids or gerrit is None:
535
+ return []
536
+
537
+ from github2gerrit.gerrit_rest import GerritRestError
538
+ from github2gerrit.gerrit_rest import build_client_for_host
539
+
540
+ commented = []
541
+ try:
542
+ client = build_client_for_host(
543
+ gerrit.host, timeout=10.0, max_attempts=3
544
+ )
545
+
546
+ for change_id in orphan_ids:
547
+ try:
548
+ comment_message = (
549
+ "This change has been orphaned by a GitHub PR update. "
550
+ "It is no longer part of the current PR commit set."
551
+ )
552
+ path = f"/changes/{change_id}/revisions/current/review"
553
+ data = {"message": comment_message}
554
+
555
+ client.post(path, data=data)
556
+ commented.append(change_id)
557
+ log.debug("Successfully commented on change %s", change_id)
558
+
559
+ except GerritRestError as exc:
560
+ log.warning(
561
+ "Failed to comment on change %s: %s", change_id, exc
562
+ )
563
+ except Exception as exc:
564
+ log.warning(
565
+ "Unexpected error commenting on change %s: %s",
566
+ change_id,
567
+ exc,
568
+ )
569
+
570
+ except Exception as exc:
571
+ log.warning(
572
+ "Failed to create Gerrit REST client for comment operations: %s",
573
+ exc,
574
+ )
575
+
576
+ return commented
577
+
578
+
579
+ def _maybe_emit_summary(plan: ReconciliationPlan, *, log_json: bool) -> None:
580
+ if not log_json:
581
+ return
582
+ _emit_summary_json(
583
+ total_local=len(plan.change_ids),
584
+ reused=len(plan.reused_ids),
585
+ new=len(plan.new_ids),
586
+ orphaned=len(plan.orphan_change_ids),
587
+ strategy=plan.strategy,
588
+ digest=plan.digest,
589
+ )