github2gerrit 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,471 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: 2025 The Linux Foundation
3
+ #
4
+ # Commit Normalization Module
5
+ #
6
+ # This module provides functionality to normalize commit messages to follow
7
+ # conventional commit standards. It analyzes repository configuration and
8
+ # history to determine the preferred commit message format and applies
9
+ # appropriate transformations to automated PR titles.
10
+ #
11
+ # Key features:
12
+ # - Detects conventional commit preferences from .pre-commit-config.yaml
13
+ # - Analyzes .github/release-drafter.yml for commit type patterns
14
+ # - Examines git history for existing conventional commit patterns
15
+ # - Transforms dependabot/automation PR titles to conventional format
16
+ # - Respects repository-specific capitalization preferences
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import re
22
+ import shutil
23
+ import subprocess
24
+ from dataclasses import dataclass
25
+ from dataclasses import field
26
+ from pathlib import Path
27
+
28
+ import yaml
29
+
30
+
31
+ log = logging.getLogger("github2gerrit.commit_normalization")
32
+
33
+ # Conventional commit types in order of preference
34
+ CONVENTIONAL_COMMIT_TYPES = [
35
+ "feat",
36
+ "fix",
37
+ "docs",
38
+ "style",
39
+ "refactor",
40
+ "perf",
41
+ "test",
42
+ "build",
43
+ "ci",
44
+ "chore",
45
+ "revert",
46
+ ]
47
+
48
+ # Patterns for detecting different types of changes
49
+ CHANGE_TYPE_PATTERNS = {
50
+ "docs": [
51
+ r"update.*documentation",
52
+ r"add.*documentation",
53
+ r"improve.*docs",
54
+ r"update.*readme",
55
+ r"add.*readme",
56
+ r"add\s+new\s+documentation",
57
+ ],
58
+ "feat": [
59
+ r"add\s+new(?!\s+documentation)",
60
+ r"implement",
61
+ r"introduce",
62
+ r"create\s+new",
63
+ ],
64
+ "fix": [
65
+ r"fix",
66
+ r"resolve",
67
+ r"correct",
68
+ r"repair",
69
+ r"patch",
70
+ ],
71
+ "build": [
72
+ r"update.*dependencies",
73
+ r"upgrade.*dependencies",
74
+ r"bump.*dependencies",
75
+ r"update.*dependency",
76
+ r"upgrade.*dependency",
77
+ r"bump.*dependency",
78
+ r"update.*gradle",
79
+ r"update.*maven",
80
+ r"update.*npm",
81
+ r"update.*pip",
82
+ r"update.*requirements",
83
+ ],
84
+ "ci": [
85
+ r"update.*workflow",
86
+ r"update.*action",
87
+ r"update.*pipeline",
88
+ r"update.*jenkins",
89
+ r"update.*github.*action",
90
+ r"update.*ci",
91
+ ],
92
+ "chore": [
93
+ r"bump",
94
+ r"update",
95
+ r"upgrade",
96
+ r"maintain",
97
+ r"housekeeping",
98
+ r"cleanup",
99
+ r"pre-commit.*autofix",
100
+ r"pre-commit.*autoupdate",
101
+ ],
102
+ }
103
+
104
+ # Dependabot-specific patterns
105
+ DEPENDABOT_PATTERNS = [
106
+ r"bump\s+(?P<package>[^\s]+)\s+from\s+(?P<old_version>[^\s]+)\s+to\s+(?P<new_version>[^\s]+)",
107
+ r"update\s+(?P<package>[^\s]+)\s+requirement\s+from\s+(?P<old_version>[^\s]+)\s+to\s+(?P<new_version>[^\s]+)",
108
+ r"upgrade\s+(?P<package>[^\s]+)\s+from\s+(?P<old_version>[^\s]+)\s+to\s+(?P<new_version>[^\s]+)",
109
+ ]
110
+
111
+
112
+ @dataclass
113
+ class ConventionalCommitPreferences:
114
+ """Repository preferences for conventional commits."""
115
+
116
+ # Capitalization style: "lower", "title", "sentence"
117
+ capitalization: str = "lower"
118
+
119
+ # Preferred commit types found in the repository
120
+ preferred_types: dict[str, str] = field(default_factory=dict)
121
+
122
+ # Default type for dependency updates
123
+ dependency_type: str = "chore"
124
+
125
+ # Default type for automated fixes
126
+ automation_type: str = "chore"
127
+
128
+ # Whether to use scope in commit messages
129
+ use_scope: bool = False
130
+
131
+ # Default scope for dependency updates
132
+ dependency_scope: str = "deps"
133
+
134
+
135
+ class CommitNormalizer:
136
+ """Normalizes commit messages to conventional commit format."""
137
+
138
+ def __init__(self, workspace: Path | None = None):
139
+ self.workspace = workspace or Path.cwd()
140
+ self.preferences = ConventionalCommitPreferences()
141
+ self._detected_preferences = False
142
+
143
+ def should_normalize(self, title: str, author: str) -> bool:
144
+ """Check if a commit title should be normalized."""
145
+ if not title:
146
+ return False
147
+
148
+ # Check if already in conventional commit format
149
+ if self._is_conventional_commit(title):
150
+ return False
151
+
152
+ # Check for automation patterns
153
+ return self._is_automation_pr(title, author)
154
+
155
+ def normalize_commit_title(self, title: str, author: str) -> str:
156
+ """Normalize a commit title to conventional commit format."""
157
+ if not self.should_normalize(title, author):
158
+ return title
159
+
160
+ # Detect preferences if not already done
161
+ if not self._detected_preferences:
162
+ self._detect_preferences()
163
+ self._detected_preferences = True
164
+
165
+ # Determine the appropriate conventional commit type
166
+ commit_type = self._determine_commit_type(title, author)
167
+
168
+ # Clean and normalize the title
169
+ normalized_title = self._clean_title(title)
170
+
171
+ # Apply conventional commit format
172
+ return self._format_conventional_commit(commit_type, normalized_title)
173
+
174
+ def _is_conventional_commit(self, title: str) -> bool:
175
+ """Check if title is already in conventional commit format."""
176
+ pattern = r"^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\(.+?\))?\s*!?\s*:\s*.+"
177
+ return bool(re.match(pattern, title, re.IGNORECASE))
178
+
179
+ def _is_automation_pr(self, title: str, author: str) -> bool:
180
+ """Check if this is an automation PR that should be normalized."""
181
+ # Check for known automation authors
182
+ automation_authors = [
183
+ "dependabot[bot]",
184
+ "dependabot-preview[bot]",
185
+ "pre-commit-ci[bot]",
186
+ "renovate[bot]",
187
+ "greenkeeper[bot]",
188
+ ]
189
+
190
+ if any(author.lower().startswith(bot.lower()) for bot in automation_authors):
191
+ return True
192
+
193
+ # Check for automation patterns in title
194
+ automation_patterns = [
195
+ r"^bump\s+",
196
+ r"^update\s+.*\s+from\s+.*\s+to\s+",
197
+ r"^upgrade\s+.*\s+from\s+.*\s+to\s+",
198
+ r"pre-commit.*autofix",
199
+ r"pre-commit.*autoupdate",
200
+ ]
201
+
202
+ return any(re.search(pattern, title, re.IGNORECASE) for pattern in automation_patterns)
203
+
204
+ def _detect_preferences(self) -> None:
205
+ """Detect repository preferences for conventional commits."""
206
+ log.debug("Detecting conventional commit preferences for workspace: %s", self.workspace)
207
+
208
+ # Check .pre-commit-config.yaml
209
+ self._check_precommit_config()
210
+
211
+ # Check .github/release-drafter.yml
212
+ self._check_release_drafter_config()
213
+
214
+ # Analyze git history
215
+ self._analyze_git_history()
216
+
217
+ log.info("Detected commit preferences: %s", self.preferences)
218
+
219
+ def _check_precommit_config(self) -> None:
220
+ """Check .pre-commit-config.yaml for commit message patterns."""
221
+ config_file = self.workspace / ".pre-commit-config.yaml"
222
+ if not config_file.exists():
223
+ return
224
+
225
+ try:
226
+ with config_file.open("r", encoding="utf-8") as f:
227
+ config = yaml.safe_load(f)
228
+
229
+ ci_config = config.get("ci", {})
230
+
231
+ # Check autofix commit message
232
+ autofix_msg = ci_config.get("autofix_commit_msg", "")
233
+ if autofix_msg:
234
+ self._extract_preferences_from_message(autofix_msg)
235
+
236
+ # Check autoupdate commit message
237
+ autoupdate_msg = ci_config.get("autoupdate_commit_msg", "")
238
+ if autoupdate_msg:
239
+ self._extract_preferences_from_message(autoupdate_msg)
240
+
241
+ except Exception as e:
242
+ log.debug("Failed to parse .pre-commit-config.yaml: %s", e)
243
+
244
+ def _check_release_drafter_config(self) -> None:
245
+ """Check .github/release-drafter.yml for commit type patterns."""
246
+ config_paths = [
247
+ self.workspace / ".github" / "release-drafter.yml",
248
+ self.workspace / ".github" / "release-drafter.yaml",
249
+ ]
250
+
251
+ for config_file in config_paths:
252
+ if not config_file.exists():
253
+ continue
254
+
255
+ try:
256
+ with config_file.open("r", encoding="utf-8") as f:
257
+ config = yaml.safe_load(f)
258
+
259
+ autolabeler = config.get("autolabeler", [])
260
+ for rule in autolabeler:
261
+ titles = rule.get("title", [])
262
+
263
+ for title_pattern in titles:
264
+ # Extract conventional commit type from pattern
265
+ if title_pattern.startswith("/") and title_pattern.endswith("/i"):
266
+ pattern = title_pattern[1:-2] # Remove /pattern/i
267
+ if ":" in pattern:
268
+ commit_type = pattern.split(":")[0]
269
+ if commit_type in CONVENTIONAL_COMMIT_TYPES:
270
+ self.preferences.preferred_types[commit_type] = self._get_capitalization(
271
+ commit_type
272
+ )
273
+
274
+ break # Use first found config
275
+
276
+ except Exception as e:
277
+ log.debug("Failed to parse release-drafter config: %s", e)
278
+
279
+ def _analyze_git_history(self) -> None:
280
+ """Analyze recent git history for conventional commit patterns."""
281
+ try:
282
+ # Get recent commit messages
283
+ git_cmd = shutil.which("git")
284
+ if not git_cmd:
285
+ log.debug("git command not found in PATH")
286
+ return
287
+
288
+ result = subprocess.run( # noqa: S603
289
+ [git_cmd, "log", "--pretty=format:%s", "-50"],
290
+ cwd=self.workspace,
291
+ capture_output=True,
292
+ text=True,
293
+ timeout=10,
294
+ check=False,
295
+ )
296
+
297
+ if result.returncode != 0:
298
+ log.debug("Failed to get git history")
299
+ return
300
+
301
+ commit_messages = result.stdout.strip().split("\n")
302
+
303
+ # Analyze conventional commit patterns
304
+ type_counts: dict[str, int] = {}
305
+ capitalization_examples: dict[str, str] = {}
306
+
307
+ for message in commit_messages:
308
+ if self._is_conventional_commit(message):
309
+ match = re.match(r"^([a-zA-Z]+)", message)
310
+ if match:
311
+ commit_type = match.group(1).lower()
312
+ type_counts[commit_type] = type_counts.get(commit_type, 0) + 1
313
+
314
+ # Track capitalization
315
+ if commit_type not in capitalization_examples:
316
+ capitalization_examples[commit_type] = match.group(1)
317
+
318
+ # Update preferences based on analysis
319
+ if type_counts:
320
+ # Determine most common capitalization
321
+ if capitalization_examples:
322
+ sample_type = next(iter(capitalization_examples.values()))
323
+ if sample_type.isupper():
324
+ self.preferences.capitalization = "upper"
325
+ elif sample_type.istitle():
326
+ self.preferences.capitalization = "title"
327
+ else:
328
+ self.preferences.capitalization = "lower"
329
+
330
+ # Update preferred types
331
+ for commit_type in type_counts:
332
+ if commit_type in CONVENTIONAL_COMMIT_TYPES:
333
+ self.preferences.preferred_types[commit_type] = self._apply_capitalization(commit_type)
334
+
335
+ except Exception as e:
336
+ log.debug("Failed to analyze git history: %s", e)
337
+
338
+ def _extract_preferences_from_message(self, message: str) -> None:
339
+ """Extract preferences from a commit message."""
340
+ if self._is_conventional_commit(message):
341
+ match = re.match(r"^([a-zA-Z]+)", message)
342
+ if match:
343
+ commit_type = match.group(1)
344
+ self.preferences.automation_type = commit_type.lower()
345
+
346
+ # Detect capitalization
347
+ if commit_type.isupper():
348
+ self.preferences.capitalization = "upper"
349
+ elif commit_type.istitle():
350
+ self.preferences.capitalization = "title"
351
+ else:
352
+ self.preferences.capitalization = "lower"
353
+
354
+ def _determine_commit_type(self, title: str, author: str) -> str:
355
+ """Determine the appropriate conventional commit type."""
356
+ title_lower = title.lower()
357
+
358
+ # Check for dependabot patterns first
359
+ if "dependabot" in author.lower() or any(re.search(pattern, title_lower) for pattern in DEPENDABOT_PATTERNS):
360
+ return self.preferences.dependency_type
361
+
362
+ # Check for pre-commit.ci patterns
363
+ if "pre-commit" in author.lower() or "pre-commit" in title_lower:
364
+ return self.preferences.automation_type
365
+
366
+ # Pattern-based detection
367
+ for commit_type, patterns in CHANGE_TYPE_PATTERNS.items():
368
+ for pattern in patterns:
369
+ if re.search(pattern, title_lower):
370
+ return commit_type
371
+
372
+ # Default to chore for unrecognized automation
373
+ return self.preferences.automation_type
374
+
375
+ def _clean_title(self, title: str) -> str:
376
+ """Clean and normalize the title text."""
377
+ # Remove markdown links
378
+ title = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", title)
379
+
380
+ # Remove trailing ellipsis
381
+ title = re.sub(r"\s*[.]{3,}.*$", "", title)
382
+
383
+ # Remove markdown formatting
384
+ title = re.sub(r"[*_`]", "", title)
385
+
386
+ # For dependabot titles, extract the essential information
387
+ for pattern in DEPENDABOT_PATTERNS:
388
+ match = re.search(pattern, title, re.IGNORECASE)
389
+ if match:
390
+ package = match.group("package")
391
+ old_version = match.group("old_version")
392
+ new_version = match.group("new_version")
393
+ return f"bump {package} from {old_version} to {new_version}"
394
+
395
+ # Remove common prefixes if not already handled
396
+ prefixes_to_remove = [
397
+ r"^bump\s+",
398
+ r"^update\s+",
399
+ r"^upgrade\s+",
400
+ ]
401
+
402
+ for prefix in prefixes_to_remove:
403
+ title = re.sub(prefix, "", title, flags=re.IGNORECASE).strip()
404
+
405
+ # Ensure first letter is lowercase (will be adjusted by capitalization later)
406
+ if title and title[0].isupper():
407
+ title = title[0].lower() + title[1:]
408
+
409
+ return title.strip()
410
+
411
+ def _format_conventional_commit(self, commit_type: str, title: str) -> str:
412
+ """Format a conventional commit message."""
413
+ # Apply capitalization preference
414
+ formatted_type = self._apply_capitalization(commit_type)
415
+
416
+ # Add scope if preferred for dependency updates
417
+ scope = ""
418
+ if commit_type == self.preferences.dependency_type and self.preferences.use_scope:
419
+ scope = f"({self.preferences.dependency_scope})"
420
+
421
+ return f"{formatted_type}{scope}: {title}"
422
+
423
+ def _apply_capitalization(self, commit_type: str) -> str:
424
+ """Apply the preferred capitalization to a commit type."""
425
+ if self.preferences.capitalization == "upper":
426
+ return commit_type.upper()
427
+ elif self.preferences.capitalization == "title":
428
+ return commit_type.title()
429
+ else:
430
+ return commit_type.lower()
431
+
432
+ def _get_capitalization(self, text: str) -> str:
433
+ """Determine the capitalization style of text."""
434
+ if text.isupper():
435
+ return "upper"
436
+ elif text.istitle():
437
+ return "title"
438
+ else:
439
+ return "lower"
440
+
441
+
442
+ def normalize_commit_title(title: str, author: str, workspace: Path | None = None) -> str:
443
+ """
444
+ Normalize a commit title to conventional commit format.
445
+
446
+ Args:
447
+ title: The original commit title
448
+ author: The author of the commit/PR
449
+ workspace: Path to the git repository workspace
450
+
451
+ Returns:
452
+ Normalized commit title in conventional commit format
453
+ """
454
+ normalizer = CommitNormalizer(workspace)
455
+ return normalizer.normalize_commit_title(title, author)
456
+
457
+
458
+ def should_normalize_commit(title: str, author: str, workspace: Path | None = None) -> bool:
459
+ """
460
+ Check if a commit title should be normalized.
461
+
462
+ Args:
463
+ title: The original commit title
464
+ author: The author of the commit/PR
465
+ workspace: Path to the git repository workspace
466
+
467
+ Returns:
468
+ True if the commit should be normalized
469
+ """
470
+ normalizer = CommitNormalizer(workspace)
471
+ return normalizer.should_normalize(title, author)