github2gerrit 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- github2gerrit/cli.py +458 -192
- github2gerrit/commit_normalization.py +471 -0
- github2gerrit/core.py +822 -252
- github2gerrit/duplicate_detection.py +1 -69
- github2gerrit/external_api.py +517 -0
- github2gerrit/gerrit_rest.py +298 -0
- github2gerrit/gerrit_urls.py +149 -52
- github2gerrit/github_api.py +12 -79
- github2gerrit/gitutils.py +208 -49
- github2gerrit/models.py +2 -0
- github2gerrit/pr_content_filter.py +476 -0
- github2gerrit/similarity.py +2 -2
- github2gerrit/ssh_agent_setup.py +351 -0
- github2gerrit/ssh_common.py +244 -0
- github2gerrit/ssh_discovery.py +4 -0
- github2gerrit/utils.py +113 -0
- github2gerrit-0.1.8.dist-info/METADATA +798 -0
- github2gerrit-0.1.8.dist-info/RECORD +24 -0
- github2gerrit-0.1.6.dist-info/METADATA +0 -552
- github2gerrit-0.1.6.dist-info/RECORD +0 -17
- {github2gerrit-0.1.6.dist-info → github2gerrit-0.1.8.dist-info}/WHEEL +0 -0
- {github2gerrit-0.1.6.dist-info → github2gerrit-0.1.8.dist-info}/entry_points.txt +0 -0
- {github2gerrit-0.1.6.dist-info → github2gerrit-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {github2gerrit-0.1.6.dist-info → github2gerrit-0.1.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,471 @@
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
2
|
+
# SPDX-FileCopyrightText: 2025 The Linux Foundation
|
3
|
+
#
|
4
|
+
# Commit Normalization Module
|
5
|
+
#
|
6
|
+
# This module provides functionality to normalize commit messages to follow
|
7
|
+
# conventional commit standards. It analyzes repository configuration and
|
8
|
+
# history to determine the preferred commit message format and applies
|
9
|
+
# appropriate transformations to automated PR titles.
|
10
|
+
#
|
11
|
+
# Key features:
|
12
|
+
# - Detects conventional commit preferences from .pre-commit-config.yaml
|
13
|
+
# - Analyzes .github/release-drafter.yml for commit type patterns
|
14
|
+
# - Examines git history for existing conventional commit patterns
|
15
|
+
# - Transforms dependabot/automation PR titles to conventional format
|
16
|
+
# - Respects repository-specific capitalization preferences
|
17
|
+
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
import logging
|
21
|
+
import re
|
22
|
+
import shutil
|
23
|
+
import subprocess
|
24
|
+
from dataclasses import dataclass
|
25
|
+
from dataclasses import field
|
26
|
+
from pathlib import Path
|
27
|
+
|
28
|
+
import yaml
|
29
|
+
|
30
|
+
|
31
|
+
log = logging.getLogger("github2gerrit.commit_normalization")
|
32
|
+
|
33
|
+
# Conventional commit types in order of preference
|
34
|
+
CONVENTIONAL_COMMIT_TYPES = [
|
35
|
+
"feat",
|
36
|
+
"fix",
|
37
|
+
"docs",
|
38
|
+
"style",
|
39
|
+
"refactor",
|
40
|
+
"perf",
|
41
|
+
"test",
|
42
|
+
"build",
|
43
|
+
"ci",
|
44
|
+
"chore",
|
45
|
+
"revert",
|
46
|
+
]
|
47
|
+
|
48
|
+
# Patterns for detecting different types of changes
|
49
|
+
CHANGE_TYPE_PATTERNS = {
|
50
|
+
"docs": [
|
51
|
+
r"update.*documentation",
|
52
|
+
r"add.*documentation",
|
53
|
+
r"improve.*docs",
|
54
|
+
r"update.*readme",
|
55
|
+
r"add.*readme",
|
56
|
+
r"add\s+new\s+documentation",
|
57
|
+
],
|
58
|
+
"feat": [
|
59
|
+
r"add\s+new(?!\s+documentation)",
|
60
|
+
r"implement",
|
61
|
+
r"introduce",
|
62
|
+
r"create\s+new",
|
63
|
+
],
|
64
|
+
"fix": [
|
65
|
+
r"fix",
|
66
|
+
r"resolve",
|
67
|
+
r"correct",
|
68
|
+
r"repair",
|
69
|
+
r"patch",
|
70
|
+
],
|
71
|
+
"build": [
|
72
|
+
r"update.*dependencies",
|
73
|
+
r"upgrade.*dependencies",
|
74
|
+
r"bump.*dependencies",
|
75
|
+
r"update.*dependency",
|
76
|
+
r"upgrade.*dependency",
|
77
|
+
r"bump.*dependency",
|
78
|
+
r"update.*gradle",
|
79
|
+
r"update.*maven",
|
80
|
+
r"update.*npm",
|
81
|
+
r"update.*pip",
|
82
|
+
r"update.*requirements",
|
83
|
+
],
|
84
|
+
"ci": [
|
85
|
+
r"update.*workflow",
|
86
|
+
r"update.*action",
|
87
|
+
r"update.*pipeline",
|
88
|
+
r"update.*jenkins",
|
89
|
+
r"update.*github.*action",
|
90
|
+
r"update.*ci",
|
91
|
+
],
|
92
|
+
"chore": [
|
93
|
+
r"bump",
|
94
|
+
r"update",
|
95
|
+
r"upgrade",
|
96
|
+
r"maintain",
|
97
|
+
r"housekeeping",
|
98
|
+
r"cleanup",
|
99
|
+
r"pre-commit.*autofix",
|
100
|
+
r"pre-commit.*autoupdate",
|
101
|
+
],
|
102
|
+
}
|
103
|
+
|
104
|
+
# Dependabot-specific patterns
|
105
|
+
DEPENDABOT_PATTERNS = [
|
106
|
+
r"bump\s+(?P<package>[^\s]+)\s+from\s+(?P<old_version>[^\s]+)\s+to\s+(?P<new_version>[^\s]+)",
|
107
|
+
r"update\s+(?P<package>[^\s]+)\s+requirement\s+from\s+(?P<old_version>[^\s]+)\s+to\s+(?P<new_version>[^\s]+)",
|
108
|
+
r"upgrade\s+(?P<package>[^\s]+)\s+from\s+(?P<old_version>[^\s]+)\s+to\s+(?P<new_version>[^\s]+)",
|
109
|
+
]
|
110
|
+
|
111
|
+
|
112
|
+
@dataclass
|
113
|
+
class ConventionalCommitPreferences:
|
114
|
+
"""Repository preferences for conventional commits."""
|
115
|
+
|
116
|
+
# Capitalization style: "lower", "title", "sentence"
|
117
|
+
capitalization: str = "lower"
|
118
|
+
|
119
|
+
# Preferred commit types found in the repository
|
120
|
+
preferred_types: dict[str, str] = field(default_factory=dict)
|
121
|
+
|
122
|
+
# Default type for dependency updates
|
123
|
+
dependency_type: str = "chore"
|
124
|
+
|
125
|
+
# Default type for automated fixes
|
126
|
+
automation_type: str = "chore"
|
127
|
+
|
128
|
+
# Whether to use scope in commit messages
|
129
|
+
use_scope: bool = False
|
130
|
+
|
131
|
+
# Default scope for dependency updates
|
132
|
+
dependency_scope: str = "deps"
|
133
|
+
|
134
|
+
|
135
|
+
class CommitNormalizer:
|
136
|
+
"""Normalizes commit messages to conventional commit format."""
|
137
|
+
|
138
|
+
def __init__(self, workspace: Path | None = None):
|
139
|
+
self.workspace = workspace or Path.cwd()
|
140
|
+
self.preferences = ConventionalCommitPreferences()
|
141
|
+
self._detected_preferences = False
|
142
|
+
|
143
|
+
def should_normalize(self, title: str, author: str) -> bool:
|
144
|
+
"""Check if a commit title should be normalized."""
|
145
|
+
if not title:
|
146
|
+
return False
|
147
|
+
|
148
|
+
# Check if already in conventional commit format
|
149
|
+
if self._is_conventional_commit(title):
|
150
|
+
return False
|
151
|
+
|
152
|
+
# Check for automation patterns
|
153
|
+
return self._is_automation_pr(title, author)
|
154
|
+
|
155
|
+
def normalize_commit_title(self, title: str, author: str) -> str:
|
156
|
+
"""Normalize a commit title to conventional commit format."""
|
157
|
+
if not self.should_normalize(title, author):
|
158
|
+
return title
|
159
|
+
|
160
|
+
# Detect preferences if not already done
|
161
|
+
if not self._detected_preferences:
|
162
|
+
self._detect_preferences()
|
163
|
+
self._detected_preferences = True
|
164
|
+
|
165
|
+
# Determine the appropriate conventional commit type
|
166
|
+
commit_type = self._determine_commit_type(title, author)
|
167
|
+
|
168
|
+
# Clean and normalize the title
|
169
|
+
normalized_title = self._clean_title(title)
|
170
|
+
|
171
|
+
# Apply conventional commit format
|
172
|
+
return self._format_conventional_commit(commit_type, normalized_title)
|
173
|
+
|
174
|
+
def _is_conventional_commit(self, title: str) -> bool:
|
175
|
+
"""Check if title is already in conventional commit format."""
|
176
|
+
pattern = r"^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\(.+?\))?\s*!?\s*:\s*.+"
|
177
|
+
return bool(re.match(pattern, title, re.IGNORECASE))
|
178
|
+
|
179
|
+
def _is_automation_pr(self, title: str, author: str) -> bool:
|
180
|
+
"""Check if this is an automation PR that should be normalized."""
|
181
|
+
# Check for known automation authors
|
182
|
+
automation_authors = [
|
183
|
+
"dependabot[bot]",
|
184
|
+
"dependabot-preview[bot]",
|
185
|
+
"pre-commit-ci[bot]",
|
186
|
+
"renovate[bot]",
|
187
|
+
"greenkeeper[bot]",
|
188
|
+
]
|
189
|
+
|
190
|
+
if any(author.lower().startswith(bot.lower()) for bot in automation_authors):
|
191
|
+
return True
|
192
|
+
|
193
|
+
# Check for automation patterns in title
|
194
|
+
automation_patterns = [
|
195
|
+
r"^bump\s+",
|
196
|
+
r"^update\s+.*\s+from\s+.*\s+to\s+",
|
197
|
+
r"^upgrade\s+.*\s+from\s+.*\s+to\s+",
|
198
|
+
r"pre-commit.*autofix",
|
199
|
+
r"pre-commit.*autoupdate",
|
200
|
+
]
|
201
|
+
|
202
|
+
return any(re.search(pattern, title, re.IGNORECASE) for pattern in automation_patterns)
|
203
|
+
|
204
|
+
def _detect_preferences(self) -> None:
|
205
|
+
"""Detect repository preferences for conventional commits."""
|
206
|
+
log.debug("Detecting conventional commit preferences for workspace: %s", self.workspace)
|
207
|
+
|
208
|
+
# Check .pre-commit-config.yaml
|
209
|
+
self._check_precommit_config()
|
210
|
+
|
211
|
+
# Check .github/release-drafter.yml
|
212
|
+
self._check_release_drafter_config()
|
213
|
+
|
214
|
+
# Analyze git history
|
215
|
+
self._analyze_git_history()
|
216
|
+
|
217
|
+
log.info("Detected commit preferences: %s", self.preferences)
|
218
|
+
|
219
|
+
def _check_precommit_config(self) -> None:
|
220
|
+
"""Check .pre-commit-config.yaml for commit message patterns."""
|
221
|
+
config_file = self.workspace / ".pre-commit-config.yaml"
|
222
|
+
if not config_file.exists():
|
223
|
+
return
|
224
|
+
|
225
|
+
try:
|
226
|
+
with config_file.open("r", encoding="utf-8") as f:
|
227
|
+
config = yaml.safe_load(f)
|
228
|
+
|
229
|
+
ci_config = config.get("ci", {})
|
230
|
+
|
231
|
+
# Check autofix commit message
|
232
|
+
autofix_msg = ci_config.get("autofix_commit_msg", "")
|
233
|
+
if autofix_msg:
|
234
|
+
self._extract_preferences_from_message(autofix_msg)
|
235
|
+
|
236
|
+
# Check autoupdate commit message
|
237
|
+
autoupdate_msg = ci_config.get("autoupdate_commit_msg", "")
|
238
|
+
if autoupdate_msg:
|
239
|
+
self._extract_preferences_from_message(autoupdate_msg)
|
240
|
+
|
241
|
+
except Exception as e:
|
242
|
+
log.debug("Failed to parse .pre-commit-config.yaml: %s", e)
|
243
|
+
|
244
|
+
def _check_release_drafter_config(self) -> None:
|
245
|
+
"""Check .github/release-drafter.yml for commit type patterns."""
|
246
|
+
config_paths = [
|
247
|
+
self.workspace / ".github" / "release-drafter.yml",
|
248
|
+
self.workspace / ".github" / "release-drafter.yaml",
|
249
|
+
]
|
250
|
+
|
251
|
+
for config_file in config_paths:
|
252
|
+
if not config_file.exists():
|
253
|
+
continue
|
254
|
+
|
255
|
+
try:
|
256
|
+
with config_file.open("r", encoding="utf-8") as f:
|
257
|
+
config = yaml.safe_load(f)
|
258
|
+
|
259
|
+
autolabeler = config.get("autolabeler", [])
|
260
|
+
for rule in autolabeler:
|
261
|
+
titles = rule.get("title", [])
|
262
|
+
|
263
|
+
for title_pattern in titles:
|
264
|
+
# Extract conventional commit type from pattern
|
265
|
+
if title_pattern.startswith("/") and title_pattern.endswith("/i"):
|
266
|
+
pattern = title_pattern[1:-2] # Remove /pattern/i
|
267
|
+
if ":" in pattern:
|
268
|
+
commit_type = pattern.split(":")[0]
|
269
|
+
if commit_type in CONVENTIONAL_COMMIT_TYPES:
|
270
|
+
self.preferences.preferred_types[commit_type] = self._get_capitalization(
|
271
|
+
commit_type
|
272
|
+
)
|
273
|
+
|
274
|
+
break # Use first found config
|
275
|
+
|
276
|
+
except Exception as e:
|
277
|
+
log.debug("Failed to parse release-drafter config: %s", e)
|
278
|
+
|
279
|
+
def _analyze_git_history(self) -> None:
|
280
|
+
"""Analyze recent git history for conventional commit patterns."""
|
281
|
+
try:
|
282
|
+
# Get recent commit messages
|
283
|
+
git_cmd = shutil.which("git")
|
284
|
+
if not git_cmd:
|
285
|
+
log.debug("git command not found in PATH")
|
286
|
+
return
|
287
|
+
|
288
|
+
result = subprocess.run( # noqa: S603
|
289
|
+
[git_cmd, "log", "--pretty=format:%s", "-50"],
|
290
|
+
cwd=self.workspace,
|
291
|
+
capture_output=True,
|
292
|
+
text=True,
|
293
|
+
timeout=10,
|
294
|
+
check=False,
|
295
|
+
)
|
296
|
+
|
297
|
+
if result.returncode != 0:
|
298
|
+
log.debug("Failed to get git history")
|
299
|
+
return
|
300
|
+
|
301
|
+
commit_messages = result.stdout.strip().split("\n")
|
302
|
+
|
303
|
+
# Analyze conventional commit patterns
|
304
|
+
type_counts: dict[str, int] = {}
|
305
|
+
capitalization_examples: dict[str, str] = {}
|
306
|
+
|
307
|
+
for message in commit_messages:
|
308
|
+
if self._is_conventional_commit(message):
|
309
|
+
match = re.match(r"^([a-zA-Z]+)", message)
|
310
|
+
if match:
|
311
|
+
commit_type = match.group(1).lower()
|
312
|
+
type_counts[commit_type] = type_counts.get(commit_type, 0) + 1
|
313
|
+
|
314
|
+
# Track capitalization
|
315
|
+
if commit_type not in capitalization_examples:
|
316
|
+
capitalization_examples[commit_type] = match.group(1)
|
317
|
+
|
318
|
+
# Update preferences based on analysis
|
319
|
+
if type_counts:
|
320
|
+
# Determine most common capitalization
|
321
|
+
if capitalization_examples:
|
322
|
+
sample_type = next(iter(capitalization_examples.values()))
|
323
|
+
if sample_type.isupper():
|
324
|
+
self.preferences.capitalization = "upper"
|
325
|
+
elif sample_type.istitle():
|
326
|
+
self.preferences.capitalization = "title"
|
327
|
+
else:
|
328
|
+
self.preferences.capitalization = "lower"
|
329
|
+
|
330
|
+
# Update preferred types
|
331
|
+
for commit_type in type_counts:
|
332
|
+
if commit_type in CONVENTIONAL_COMMIT_TYPES:
|
333
|
+
self.preferences.preferred_types[commit_type] = self._apply_capitalization(commit_type)
|
334
|
+
|
335
|
+
except Exception as e:
|
336
|
+
log.debug("Failed to analyze git history: %s", e)
|
337
|
+
|
338
|
+
def _extract_preferences_from_message(self, message: str) -> None:
|
339
|
+
"""Extract preferences from a commit message."""
|
340
|
+
if self._is_conventional_commit(message):
|
341
|
+
match = re.match(r"^([a-zA-Z]+)", message)
|
342
|
+
if match:
|
343
|
+
commit_type = match.group(1)
|
344
|
+
self.preferences.automation_type = commit_type.lower()
|
345
|
+
|
346
|
+
# Detect capitalization
|
347
|
+
if commit_type.isupper():
|
348
|
+
self.preferences.capitalization = "upper"
|
349
|
+
elif commit_type.istitle():
|
350
|
+
self.preferences.capitalization = "title"
|
351
|
+
else:
|
352
|
+
self.preferences.capitalization = "lower"
|
353
|
+
|
354
|
+
def _determine_commit_type(self, title: str, author: str) -> str:
|
355
|
+
"""Determine the appropriate conventional commit type."""
|
356
|
+
title_lower = title.lower()
|
357
|
+
|
358
|
+
# Check for dependabot patterns first
|
359
|
+
if "dependabot" in author.lower() or any(re.search(pattern, title_lower) for pattern in DEPENDABOT_PATTERNS):
|
360
|
+
return self.preferences.dependency_type
|
361
|
+
|
362
|
+
# Check for pre-commit.ci patterns
|
363
|
+
if "pre-commit" in author.lower() or "pre-commit" in title_lower:
|
364
|
+
return self.preferences.automation_type
|
365
|
+
|
366
|
+
# Pattern-based detection
|
367
|
+
for commit_type, patterns in CHANGE_TYPE_PATTERNS.items():
|
368
|
+
for pattern in patterns:
|
369
|
+
if re.search(pattern, title_lower):
|
370
|
+
return commit_type
|
371
|
+
|
372
|
+
# Default to chore for unrecognized automation
|
373
|
+
return self.preferences.automation_type
|
374
|
+
|
375
|
+
def _clean_title(self, title: str) -> str:
|
376
|
+
"""Clean and normalize the title text."""
|
377
|
+
# Remove markdown links
|
378
|
+
title = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", title)
|
379
|
+
|
380
|
+
# Remove trailing ellipsis
|
381
|
+
title = re.sub(r"\s*[.]{3,}.*$", "", title)
|
382
|
+
|
383
|
+
# Remove markdown formatting
|
384
|
+
title = re.sub(r"[*_`]", "", title)
|
385
|
+
|
386
|
+
# For dependabot titles, extract the essential information
|
387
|
+
for pattern in DEPENDABOT_PATTERNS:
|
388
|
+
match = re.search(pattern, title, re.IGNORECASE)
|
389
|
+
if match:
|
390
|
+
package = match.group("package")
|
391
|
+
old_version = match.group("old_version")
|
392
|
+
new_version = match.group("new_version")
|
393
|
+
return f"bump {package} from {old_version} to {new_version}"
|
394
|
+
|
395
|
+
# Remove common prefixes if not already handled
|
396
|
+
prefixes_to_remove = [
|
397
|
+
r"^bump\s+",
|
398
|
+
r"^update\s+",
|
399
|
+
r"^upgrade\s+",
|
400
|
+
]
|
401
|
+
|
402
|
+
for prefix in prefixes_to_remove:
|
403
|
+
title = re.sub(prefix, "", title, flags=re.IGNORECASE).strip()
|
404
|
+
|
405
|
+
# Ensure first letter is lowercase (will be adjusted by capitalization later)
|
406
|
+
if title and title[0].isupper():
|
407
|
+
title = title[0].lower() + title[1:]
|
408
|
+
|
409
|
+
return title.strip()
|
410
|
+
|
411
|
+
def _format_conventional_commit(self, commit_type: str, title: str) -> str:
|
412
|
+
"""Format a conventional commit message."""
|
413
|
+
# Apply capitalization preference
|
414
|
+
formatted_type = self._apply_capitalization(commit_type)
|
415
|
+
|
416
|
+
# Add scope if preferred for dependency updates
|
417
|
+
scope = ""
|
418
|
+
if commit_type == self.preferences.dependency_type and self.preferences.use_scope:
|
419
|
+
scope = f"({self.preferences.dependency_scope})"
|
420
|
+
|
421
|
+
return f"{formatted_type}{scope}: {title}"
|
422
|
+
|
423
|
+
def _apply_capitalization(self, commit_type: str) -> str:
|
424
|
+
"""Apply the preferred capitalization to a commit type."""
|
425
|
+
if self.preferences.capitalization == "upper":
|
426
|
+
return commit_type.upper()
|
427
|
+
elif self.preferences.capitalization == "title":
|
428
|
+
return commit_type.title()
|
429
|
+
else:
|
430
|
+
return commit_type.lower()
|
431
|
+
|
432
|
+
def _get_capitalization(self, text: str) -> str:
|
433
|
+
"""Determine the capitalization style of text."""
|
434
|
+
if text.isupper():
|
435
|
+
return "upper"
|
436
|
+
elif text.istitle():
|
437
|
+
return "title"
|
438
|
+
else:
|
439
|
+
return "lower"
|
440
|
+
|
441
|
+
|
442
|
+
def normalize_commit_title(title: str, author: str, workspace: Path | None = None) -> str:
|
443
|
+
"""
|
444
|
+
Normalize a commit title to conventional commit format.
|
445
|
+
|
446
|
+
Args:
|
447
|
+
title: The original commit title
|
448
|
+
author: The author of the commit/PR
|
449
|
+
workspace: Path to the git repository workspace
|
450
|
+
|
451
|
+
Returns:
|
452
|
+
Normalized commit title in conventional commit format
|
453
|
+
"""
|
454
|
+
normalizer = CommitNormalizer(workspace)
|
455
|
+
return normalizer.normalize_commit_title(title, author)
|
456
|
+
|
457
|
+
|
458
|
+
def should_normalize_commit(title: str, author: str, workspace: Path | None = None) -> bool:
|
459
|
+
"""
|
460
|
+
Check if a commit title should be normalized.
|
461
|
+
|
462
|
+
Args:
|
463
|
+
title: The original commit title
|
464
|
+
author: The author of the commit/PR
|
465
|
+
workspace: Path to the git repository workspace
|
466
|
+
|
467
|
+
Returns:
|
468
|
+
True if the commit should be normalized
|
469
|
+
"""
|
470
|
+
normalizer = CommitNormalizer(workspace)
|
471
|
+
return normalizer.should_normalize(title, author)
|